| #!/usr/bin/env bash |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| # Determine the current working directory |
| _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" |
| # Preserve the calling directory |
| _CALLING_DIR="$(pwd)" |
| |
| # Download any application given a URL |
| ## Arg1 - Remote URL |
| ## Arg2 - Local file name |
| download_app() { |
| local remote_url="$1" |
| local local_name="$2" |
| |
| # setup `curl` and `wget` options |
| local curl_opts="--progress-bar -L" |
| local wget_opts="--progress=bar:force" |
| |
| # check if we already have the given application |
| # check if we have curl installed |
| # download application |
| [ ! -f "${local_name}" ] && [ $(command -v curl) ] && \ |
| echo "exec: curl ${curl_opts} ${remote_url}" 1>&2 && \ |
| curl ${curl_opts} "${remote_url}" > "${local_name}" |
| # if the file still doesn't exist, lets try `wget` and cross our fingers |
| [ ! -f "${local_name}" ] && [ $(command -v wget) ] && \ |
| echo "exec: wget ${wget_opts} ${remote_url}" 1>&2 && \ |
| wget ${wget_opts} -O "${local_name}" "${remote_url}" |
| # if both were unsuccessful, exit |
| [ ! -f "${local_name}" ] && \ |
| echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ |
| echo "please install manually and try again." && \ |
| exit 2 |
| } |
| |
| # Installs any application tarball given a URL, the expected tarball name, |
| # and, optionally, a checkable binary path to determine if the binary has |
| # already been installed |
| ## Arg1 - URL |
| ## Arg2 - Tarball Name |
| ## Arg3 - Checkable Binary |
| install_app() { |
| local remote_tarball="$1/$2" |
| local local_tarball="${_DIR}/$2" |
| local binary="${_DIR}/$3" |
| |
| if [ -z "$3" -o ! -f "$binary" ]; then |
| download_app "${remote_tarball}" "${local_tarball}" |
| cd "${_DIR}" && tar -xzf "$2" |
| rm -rf "$local_tarball" |
| fi |
| } |
| |
| # Determine the Spark version from the root pom.xml file and |
| # install Spark under the bin/ folder if needed. |
| install_spark() { |
| local SPARK_VERSION=`grep "<spark.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` |
| local HADOOP_VERSION=`grep "<hadoop.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}' | cut -d '.' -f1-2` |
| local SPARK_DIR="${_DIR}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" |
| local APACHE_MIRROR=${APACHE_MIRROR:-'http://d3kbcqa49mib13.cloudfront.net'} |
| |
| install_app \ |
| "${APACHE_MIRROR}" \ |
| "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" \ |
| "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell" |
| |
| SPARK_BIN="${SPARK_DIR}/bin/spark-shell" |
| } |
| |
| # Determine the Maven version from the root pom.xml file and |
| # install maven under the build/ folder if needed. |
| install_mvn() { |
| local MVN_VERSION="3.3.9" |
| MVN_BIN="$(command -v mvn)" |
| if [ "$MVN_BIN" ]; then |
| local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')" |
| fi |
| # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers |
| function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } |
| if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then |
| local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='} |
| |
| install_app \ |
| "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \ |
| "apache-maven-${MVN_VERSION}-bin.tar.gz" \ |
| "apache-maven-${MVN_VERSION}/bin/mvn" |
| |
| MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn" |
| fi |
| } |
| |
| # Compile hivemall for the latest Spark release |
| compile_hivemall() { |
| local HIVEMALL_VERSION=`grep "<version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` |
| local SCALA_VERSION=`grep "<scala.binary.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` |
| local SPARK_VERSION=`grep "<spark.binary.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` |
| |
| HIVEMALL_BIN="${_DIR}/../target/hivemall-spark-${SPARK_VERSION}_${SCALA_VERSION}-${HIVEMALL_VERSION}-with-dependencies.jar" |
| if [ ! -f "${HIVEMALL_BIN}" ]; then |
| install_mvn && ${MVN_BIN} validate && ${MVN_BIN} -f "${_DIR}/../pom.xml" clean package -P"spark-${SPARK_VERSION}" -DskipTests |
| if [ $? = 127 ]; then |
| echo "Failed to compile hivemall for spark-${SPARK_VERSION}" |
| exit 1 |
| fi |
| fi |
| } |
| |
| # Install the proper version of Spark for launching spark-shell |
| install_spark |
| |
| # Compile hivemall for the Spark version |
| compile_hivemall |
| |
| # Reset the current working directory |
| cd "${_CALLING_DIR}" |
| |
| echo "Using \`spark-shell\` from path: $SPARK_BIN" 1>&2 |
| |
| # Last, call the `spark-shell` command as usual |
| ${SPARK_BIN} --properties-file ${_DIR}/../conf/spark-defaults.conf "$@" |
| |