make-distribution.sh - spark - Git at Google

 #!/usr/bin/env bash

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 #
 # Script to create a binary distribution for easy deploys of Spark.
 # The distribution directory defaults to dist/ but can be overridden below.
 # The distribution contains fat (assembly) jars that include the Scala library,
 # so it is completely self contained.
 # It does not contain source or *.class files.
 #
 # Optional Arguments
 #      --tgz: Additionally creates spark-$VERSION-bin.tar.gz
 #      --hadoop VERSION: Builds against specified version of Hadoop.
 #      --with-yarn: Enables support for Hadoop YARN.
 #
 # Recommended deploy/testing procedure (standalone mode):
 # 1) Rsync / deploy the dist/ dir to one host
 # 2) cd to deploy dir; ./bin/start-master.sh
 # 3) Verify master is up by visiting web page, ie http://master-ip:8080.  Note the spark:// URL.
 # 4) ./bin/start-slave.sh 1 <<spark:// URL>>
 # 5) MASTER="spark://my-master-ip:7077" ./spark-shell
 #

 # Figure out where the Spark framework is installed
 FWDIR="$(cd `dirname $0`; pwd)"
 DISTDIR="$FWDIR/dist"

 # Get version from SBT
 export TERM=dumb   # Prevents color codes in SBT output
 VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')

 # Initialize defaults
 SPARK_HADOOP_VERSION=1.0.4
 SPARK_YARN=false
 MAKE_TGZ=false

 # Parse arguments
 while (( "$#" )); do
   case $1 in
     --hadoop)
       SPARK_HADOOP_VERSION="$2"
       shift
       ;;
     --with-yarn)
       SPARK_YARN=true
       ;;
     --tgz)
       MAKE_TGZ=true
       ;;
   esac
   shift
 done

 if [ "$MAKE_TGZ" == "true" ]; then
 	echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
 else
 	echo "Making distribution for Spark $VERSION in $DISTDIR..."
 fi

 echo "Hadoop version set to $SPARK_HADOOP_VERSION"
 if [ "$SPARK_YARN" == "true" ]; then
   echo "YARN enabled"
 else
   echo "YARN disabled"
 fi

 # Build fat JAR
 export SPARK_HADOOP_VERSION
 export SPARK_YARN
 "$FWDIR/sbt/sbt" "assembly/assembly"

 # Make directories
 rm -rf "$DISTDIR"
 mkdir -p "$DISTDIR/jars"
 echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"

 # Copy jars
 cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/"

 # Copy other things
 mkdir "$DISTDIR"/conf
 cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
 cp -r "$FWDIR/bin" "$DISTDIR"
 cp -r "$FWDIR/python" "$DISTDIR"
 cp "$FWDIR/spark-class" "$DISTDIR"
 cp "$FWDIR/spark-shell" "$DISTDIR"
 cp "$FWDIR/spark-executor" "$DISTDIR"
 cp "$FWDIR/pyspark" "$DISTDIR"


 if [ "$MAKE_TGZ" == "true" ]; then
   TARDIR="$FWDIR/spark-$VERSION"
   cp -r "$DISTDIR" "$TARDIR"
   tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
   rm -rf "$TARDIR"
 fi
	#!/usr/bin/env bash

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	#
	# Script to create a binary distribution for easy deploys of Spark.
	# The distribution directory defaults to dist/ but can be overridden below.
	# The distribution contains fat (assembly) jars that include the Scala library,
	# so it is completely self contained.
	# It does not contain source or *.class files.
	#
	# Optional Arguments
	# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
	# --hadoop VERSION: Builds against specified version of Hadoop.
	# --with-yarn: Enables support for Hadoop YARN.
	#
	# Recommended deploy/testing procedure (standalone mode):
	# 1) Rsync / deploy the dist/ dir to one host
	# 2) cd to deploy dir; ./bin/start-master.sh
	# 3) Verify master is up by visiting web page, ie http://master-ip:8080. Note the spark:// URL.
	# 4) ./bin/start-slave.sh 1 <<spark:// URL>>
	# 5) MASTER="spark://my-master-ip:7077" ./spark-shell
	#

	# Figure out where the Spark framework is installed
	FWDIR="$(cd `dirname $0`; pwd)"
	DISTDIR="$FWDIR/dist"

	# Get version from SBT
	export TERM=dumb # Prevents color codes in SBT output
	VERSION=$($FWDIR/sbt/sbt "show version" \| tail -1 \| cut -f 2 \| sed 's/^\([a-zA-Z0-9.-]\)./\1/')

	# Initialize defaults
	SPARK_HADOOP_VERSION=1.0.4
	SPARK_YARN=false
	MAKE_TGZ=false

	# Parse arguments
	while (( "$#" )); do
	case $1 in
	--hadoop)
	SPARK_HADOOP_VERSION="$2"
	shift
	;;
	--with-yarn)
	SPARK_YARN=true
	;;
	--tgz)
	MAKE_TGZ=true
	;;
	esac
	shift
	done

	if [ "$MAKE_TGZ" == "true" ]; then
	echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
	else
	echo "Making distribution for Spark $VERSION in $DISTDIR..."
	fi

	echo "Hadoop version set to $SPARK_HADOOP_VERSION"
	if [ "$SPARK_YARN" == "true" ]; then
	echo "YARN enabled"
	else
	echo "YARN disabled"
	fi

	# Build fat JAR
	export SPARK_HADOOP_VERSION
	export SPARK_YARN
	"$FWDIR/sbt/sbt" "assembly/assembly"

	# Make directories
	rm -rf "$DISTDIR"
	mkdir -p "$DISTDIR/jars"
	echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"

	# Copy jars
	cp $FWDIR/assembly/target/scala/assemblyhadoop.jar "$DISTDIR/jars/"

	# Copy other things
	mkdir "$DISTDIR"/conf
	cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
	cp -r "$FWDIR/bin" "$DISTDIR"
	cp -r "$FWDIR/python" "$DISTDIR"
	cp "$FWDIR/spark-class" "$DISTDIR"
	cp "$FWDIR/spark-shell" "$DISTDIR"
	cp "$FWDIR/spark-executor" "$DISTDIR"
	cp "$FWDIR/pyspark" "$DISTDIR"


	if [ "$MAKE_TGZ" == "true" ]; then
	TARDIR="$FWDIR/spark-$VERSION"
	cp -r "$DISTDIR" "$TARDIR"
	tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
	rm -rf "$TARDIR"
	fi