bin/compute-classpath.sh - mahout - Git at Google

 #!/usr/bin/env bash

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 # This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 # script and the ExecutorRunner in standalone cluster mode.

 # Figure out where Spark is installed
 #FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 FWDIR="$SPARK_HOME"

 #. "$FWDIR"/bin/load-spark-env.sh # not executable by default in $SPARK_HOME/bin

 "$MAHOUT_HOME"/bin/mahout-load-spark-env.sh

 # compute the Scala version Note: though Mahout has not bee tested with Scala 2.11
 # Setting SPARK_SCALA_VERSION if not already set.

 if [ -z "$SPARK_SCALA_VERSION" ]; then

     ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
     ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
     ASSEMBLY_DIR3="$FWDIR/assembly/target/scala-2.12"


     # lazily check for all three scala versions 2.10, 2.11, 2.12.  TODO: use xors later
     if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1"  && -d "$ASSEMBLY_DIR3" ]]; then
         echo -e "Presence of build for scala versions(SCALA 2.10 and SCALA 2.11 and scala 2.12) detected." 1>&2
         echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.12 in spark-env.sh.' 1>&2
         exit 1
     fi

     if [[ -d "$ASSEMBLY_DIR1" ]]; then
         export SPARK_SCALA_VERSION="2.10"
     elif [[ -d "$ASSEMBLY_DIR2" ]]; then
         export SPARK_SCALA_VERSION="2.11"
     else
         export SPARK_SCALA_VERSION="2.12"
     fi
 fi


 function appendToClasspath(){
   if [ -n "$1" ]; then
     if [ -n "$CLASSPATH" ]; then
       CLASSPATH="$CLASSPATH:$1"
     else
       CLASSPATH="$1"
     fi
   fi
 }

 appendToClasspath "$SPARK_CLASSPATH"
 appendToClasspath "$SPARK_SUBMIT_CLASSPATH"

 # Build up classpath
 if [ -n "$SPARK_CONF_DIR" ]; then
   appendToClasspath "$SPARK_CONF_DIR"
 else
   appendToClasspath "$FWDIR/conf"
 fi

 ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SPARK_SCALA_VERSION"

 if [ -n "$JAVA_HOME" ]; then
   JAR_CMD="$JAVA_HOME/bin/jar"
 else
   JAR_CMD="jar"
 fi

 # A developer option to prepend more recently compiled Spark classes
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
   echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
     "classes ahead of assembly." >&2
   # Spark classes
   appendToClasspath "$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
   appendToClasspath "$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
   # Jars for shaded deps in their original form (copied here during build)
   appendToClasspath "$FWDIR/core/target/jars/*"
 fi

 # Use spark-assembly jar from either RELEASE or assembly directory
 if [ -f "$FWDIR/RELEASE" ]; then
   assembly_folder="$FWDIR"/lib
 else
   assembly_folder="$ASSEMBLY_DIR"
 fi

 num_jars=0

 for f in "${assembly_folder}"/spark-assembly*hadoop*.jar; do
   if [[ ! -e "$f" ]]; then
     echo "Failed to find Spark assembly in $assembly_folder" 1>&2
     echo "You need to build Spark before running this program." 1>&2
     exit 1
   fi
   ASSEMBLY_JAR="$f"
   num_jars=$((num_jars+1))
 done

 if [ "$num_jars" -gt "1" ]; then
   echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
   ls "${assembly_folder}"/spark-assembly*hadoop*.jar 1>&2
   echo "Please remove all but one jar." 1>&2
   exit 1
 fi

 # Only able to make this check if 'jar' command is available
 if [ $(command -v "$JAR_CMD") ] ; then
   # Verify that versions of java used to build the jars and run Spark are compatible
   jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
   if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
     echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
     echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
     echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
     echo "or build Spark with Java 6." 1>&2
     exit 1
   fi
 fi

 appendToClasspath "$ASSEMBLY_JAR"

 # When Hive support is needed, Datanucleus jars must be included on the classpath.
 # Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
 # Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
 # built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
 # assembly is built for Hive, before actually populating the CLASSPATH with the jars.
 # Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 if [ -f "$FWDIR/RELEASE" ]; then
   datanucleus_dir="$FWDIR"/lib
 else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi

 datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar$")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"

 if [ -n "$datanucleus_jars" ]; then
   appendToClasspath "$datanucleus_jars"
 fi

 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
   appendToClasspath "$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
   appendToClasspath "$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/test-classes"
 fi

 # Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
 # Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
 # the configurtion files.
 appendToClasspath "$HADOOP_CONF_DIR"
 appendToClasspath "$YARN_CONF_DIR"

 # To allow for distributions to append needed libraries to the classpath (e.g. when
 # using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
 # append it to tbe final classpath.
 appendToClasspath "$SPARK_DIST_CLASSPATH"

 echo "$CLASSPATH"
	#!/usr/bin/env bash

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	# This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
	# script and the ExecutorRunner in standalone cluster mode.

	# Figure out where Spark is installed
	#FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
	FWDIR="$SPARK_HOME"

	#. "$FWDIR"/bin/load-spark-env.sh # not executable by default in $SPARK_HOME/bin

	"$MAHOUT_HOME"/bin/mahout-load-spark-env.sh

	# compute the Scala version Note: though Mahout has not bee tested with Scala 2.11
	# Setting SPARK_SCALA_VERSION if not already set.

	if [ -z "$SPARK_SCALA_VERSION" ]; then

	ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
	ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
	ASSEMBLY_DIR3="$FWDIR/assembly/target/scala-2.12"


	# lazily check for all three scala versions 2.10, 2.11, 2.12. TODO: use xors later
	if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" && -d "$ASSEMBLY_DIR3" ]]; then
	echo -e "Presence of build for scala versions(SCALA 2.10 and SCALA 2.11 and scala 2.12) detected." 1>&2
	echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.12 in spark-env.sh.' 1>&2
	exit 1
	fi

	if [[ -d "$ASSEMBLY_DIR1" ]]; then
	export SPARK_SCALA_VERSION="2.10"
	elif [[ -d "$ASSEMBLY_DIR2" ]]; then
	export SPARK_SCALA_VERSION="2.11"
	else
	export SPARK_SCALA_VERSION="2.12"
	fi
	fi


	function appendToClasspath(){
	if [ -n "$1" ]; then
	if [ -n "$CLASSPATH" ]; then
	CLASSPATH="$CLASSPATH:$1"
	else
	CLASSPATH="$1"
	fi
	fi
	}

	appendToClasspath "$SPARK_CLASSPATH"
	appendToClasspath "$SPARK_SUBMIT_CLASSPATH"

	# Build up classpath
	if [ -n "$SPARK_CONF_DIR" ]; then
	appendToClasspath "$SPARK_CONF_DIR"
	else
	appendToClasspath "$FWDIR/conf"
	fi

	ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SPARK_SCALA_VERSION"

	if [ -n "$JAVA_HOME" ]; then
	JAR_CMD="$JAVA_HOME/bin/jar"
	else
	JAR_CMD="jar"
	fi

	# A developer option to prepend more recently compiled Spark classes
	if [ -n "$SPARK_PREPEND_CLASSES" ]; then
	echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
	"classes ahead of assembly." >&2
	# Spark classes
	appendToClasspath "$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
	appendToClasspath "$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
	# Jars for shaded deps in their original form (copied here during build)
	appendToClasspath "$FWDIR/core/target/jars/*"
	fi

	# Use spark-assembly jar from either RELEASE or assembly directory
	if [ -f "$FWDIR/RELEASE" ]; then
	assembly_folder="$FWDIR"/lib
	else
	assembly_folder="$ASSEMBLY_DIR"
	fi

	num_jars=0

	for f in "${assembly_folder}"/spark-assemblyhadoop.jar; do
	if [[ ! -e "$f" ]]; then
	echo "Failed to find Spark assembly in $assembly_folder" 1>&2
	echo "You need to build Spark before running this program." 1>&2
	exit 1
	fi
	ASSEMBLY_JAR="$f"
	num_jars=$((num_jars+1))
	done

	if [ "$num_jars" -gt "1" ]; then
	echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
	ls "${assembly_folder}"/spark-assemblyhadoop.jar 1>&2
	echo "Please remove all but one jar." 1>&2
	exit 1
	fi

	# Only able to make this check if 'jar' command is available
	if [ $(command -v "$JAR_CMD") ] ; then
	# Verify that versions of java used to build the jars and run Spark are compatible
	jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
	if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
	echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
	echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
	echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
	echo "or build Spark with Java 6." 1>&2
	exit 1
	fi
	fi

	appendToClasspath "$ASSEMBLY_JAR"

	# When Hive support is needed, Datanucleus jars must be included on the classpath.
	# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
	# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
	# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
	# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
	# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
	if [ -f "$FWDIR/RELEASE" ]; then
	datanucleus_dir="$FWDIR"/lib
	else
	datanucleus_dir="$FWDIR"/lib_managed/jars
	fi

	datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null \| grep "datanucleus-.*\\.jar$")"
	datanucleus_jars="$(echo "$datanucleus_jars" \| tr "\n" : \| sed s/:$//g)"

	if [ -n "$datanucleus_jars" ]; then
	appendToClasspath "$datanucleus_jars"
	fi

	# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
	if [[ $SPARK_TESTING == 1 ]]; then
	appendToClasspath "$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
	appendToClasspath "$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/test-classes"
	fi

	# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
	# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
	# the configurtion files.
	appendToClasspath "$HADOOP_CONF_DIR"
	appendToClasspath "$YARN_CONF_DIR"

	# To allow for distributions to append needed libraries to the classpath (e.g. when
	# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
	# append it to tbe final classpath.
	appendToClasspath "$SPARK_DIST_CLASSPATH"

	echo "$CLASSPATH"