| #!/bin/bash |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| if [[ "$#" -ne 2 ]]; then |
| echo "usage) $0 [spark version] [hadoop version]" |
| echo " eg) $0 1.3.1 2.6" |
| exit 1 |
| fi |
| |
| SPARK_VERSION="${1}" |
| HADOOP_VERSION="${2}" |
| |
| set -xe |
| |
| MAX_DOWNLOAD_TIME_SEC=590 |
| FWDIR="$(dirname "${BASH_SOURCE-$0}")" |
| ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" |
| |
| ####################################### |
| # Downloads file from the givrn URL. |
| # Ties 3 times with 1s delay, 20s read and 15s connection timeouts. |
| # Globals: |
| # None |
| # Arguments: |
| # url - source URL |
| # Returns: |
| # None |
| ####################################### |
| download_with_retry() { |
| local url="$1" |
| wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 "${url}" |
| if [[ "$?" -ne 0 ]]; then |
| echo "3 download attempts for ${url} failed" |
| fi |
| } |
| |
| SPARK_CACHE=".spark-dist" |
| SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" |
| export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}" |
| echo "SPARK_HOME is ${SPARK_HOME}" |
| |
| if [[ ! -d "${SPARK_HOME}" ]]; then |
| mkdir -p "${SPARK_CACHE}" |
| cd "${SPARK_CACHE}" |
| if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then |
| pwd |
| ls -la . |
| echo "${SPARK_CACHE} does not have ${SPARK_ARCHIVE} downloading ..." |
| |
| # download spark from archive if not cached |
| echo "${SPARK_VERSION} being downloaded from archives" |
| STARTTIME=`date +%s` |
| #timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz" |
| download_with_retry "http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE}.tgz" |
| ENDTIME=`date +%s` |
| DOWNLOADTIME="$((ENDTIME-STARTTIME))" |
| fi |
| |
| # extract archive in un-cached root, clean-up on failure |
| cp "${SPARK_ARCHIVE}.tgz" .. |
| cd .. |
| if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then |
| echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 |
| rm -rf "${SPARK_ARCHIVE}" |
| rm -f "${SPARK_ARCHIVE}.tgz" |
| fi |
| fi |
| |
| set +xe |