blob: d19c67b3768900e209ced6552b8de242d7974d2e [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Preserve the calling directory
_CALLING_DIR="$(pwd)"
# Download any application given a URL
## Arg1 - Remote URL
## Arg2 - Local file name
download_app() {
local remote_url="$1"
local local_name="$2"
# setup `curl` and `wget` options
local curl_opts="--progress-bar -L"
local wget_opts="--progress=bar:force"
# check if we already have the given application
# check if we have curl installed
# download application
[ ! -f "${local_name}" ] && [ $(command -v curl) ] && \
echo "exec: curl ${curl_opts} ${remote_url}" 1>&2 && \
curl ${curl_opts} "${remote_url}" > "${local_name}"
# if the file still doesn't exist, lets try `wget` and cross our fingers
[ ! -f "${local_name}" ] && [ $(command -v wget) ] && \
echo "exec: wget ${wget_opts} ${remote_url}" 1>&2 && \
wget ${wget_opts} -O "${local_name}" "${remote_url}"
# if both were unsuccessful, exit
[ ! -f "${local_name}" ] && \
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
echo "please install manually and try again." && \
exit 2
}
# Installs any application tarball given a URL, the expected tarball name,
# and, optionally, a checkable binary path to determine if the binary has
# already been installed
## Arg1 - URL
## Arg2 - Tarball Name
## Arg3 - Checkable Binary
install_app() {
local remote_tarball="$1/$2"
local local_tarball="${_DIR}/$2"
local binary="${_DIR}/$3"
if [ -z "$3" -o ! -f "$binary" ]; then
download_app "${remote_tarball}" "${local_tarball}"
cd "${_DIR}" && tar -xzf "$2"
rm -rf "$local_tarball"
fi
}
# Determine the Spark version from the root pom.xml file and
# install Spark under the bin/ folder if needed.
install_spark() {
local SPARK_VERSION=`grep "<spark.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
local HADOOP_VERSION=`grep "<hadoop.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}' | cut -d '.' -f1-2`
local SPARK_DIR="${_DIR}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
local APACHE_MIRROR=${APACHE_MIRROR:-'http://d3kbcqa49mib13.cloudfront.net'}
install_app \
"${APACHE_MIRROR}" \
"spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" \
"spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell"
SPARK_BIN="${SPARK_DIR}/bin/spark-shell"
}
# Determine the Maven version from the root pom.xml file and
# install maven under the build/ folder if needed.
install_mvn() {
local MVN_VERSION="3.3.9"
MVN_BIN="$(command -v mvn)"
if [ "$MVN_BIN" ]; then
local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
fi
# See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers
function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; }
if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='}
install_app \
"${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \
"apache-maven-${MVN_VERSION}-bin.tar.gz" \
"apache-maven-${MVN_VERSION}/bin/mvn"
MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
fi
}
# Compile hivemall for the latest Spark release
compile_hivemall() {
local HIVEMALL_VERSION=`grep "<version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
local SCALA_VERSION=`grep "<scala.binary.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
local SPARK_VERSION=`grep "<spark.binary.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
HIVEMALL_BIN="${_DIR}/../target/hivemall-spark-${SPARK_VERSION}_${SCALA_VERSION}-${HIVEMALL_VERSION}-with-dependencies.jar"
if [ ! -f "${HIVEMALL_BIN}" ]; then
install_mvn && ${MVN_BIN} validate && ${MVN_BIN} -f "${_DIR}/../pom.xml" clean package -P"spark-${SPARK_VERSION}" -DskipTests
if [ $? = 127 ]; then
echo "Failed to compile hivemall for spark-${SPARK_VERSION}"
exit 1
fi
fi
}
# Install the proper version of Spark for launching spark-shell
install_spark
# Compile hivemall for the Spark version
compile_hivemall
# Reset the current working directory
cd "${_CALLING_DIR}"
echo "Using \`spark-shell\` from path: $SPARK_BIN" 1>&2
# Last, call the `spark-shell` command as usual
${SPARK_BIN} --properties-file ${_DIR}/../conf/spark-defaults.conf "$@"