bin/pyspark - spark - Git at Google

 #!/usr/bin/env bash

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"

 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"

 source $FWDIR/bin/utils.sh

 SCALA_VERSION=2.10

 function usage() {
   echo "Usage: ./bin/pyspark [options]" 1>&2
   $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
 }

 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   usage
 fi

 # Exit if the user hasn't compiled Spark
 if [ ! -f "$FWDIR/RELEASE" ]; then
   # Exit if the user hasn't compiled Spark
   ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
   if [[ $? != 0 ]]; then
     echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
     echo "You need to build Spark before running this program" 1>&2
     exit 1
   fi
 fi

 . $FWDIR/bin/load-spark-env.sh

 # Figure out which Python executable to use
 if [ -z "$PYSPARK_PYTHON" ] ; then
   PYSPARK_PYTHON="python"
 fi
 export PYSPARK_PYTHON

 # Add the PySpark classes to the Python path:
 export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
 export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH

 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
 export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py

 # If IPython options are specified, assume user wants to run IPython
 if [ -n "$IPYTHON_OPTS" ]; then
   IPYTHON=1
 fi

 # Build up arguments list manually to preserve quotes and backslashes.
 # We export Spark submit arguments as an environment variable because shell.py must run as a
 # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
 SUBMIT_USAGE_FUNCTION=usage
 gatherSparkSubmitOpts "$@"
 PYSPARK_SUBMIT_ARGS=""
 whitespace="[[:space:]]"
 for i in "${SUBMISSION_OPTS[@]}"; do
   if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
   if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
   PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
 done
 export PYSPARK_SUBMIT_ARGS

 # If a python file is provided, directly run spark-submit.
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
   echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
   primary=$1
   shift
   gatherSparkSubmitOpts "$@"
   exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
 else
   # Only use ipython if no command line arguments were provided [SPARK-1134]
   if [[ "$IPYTHON" = "1" ]]; then
     exec ipython $IPYTHON_OPTS
   else
     if [[ -n $SPARK_TESTING ]]; then
       exec "$PYSPARK_PYTHON" -m doctest
     else
       exec "$PYSPARK_PYTHON"
     fi
   fi
 fi
	#!/usr/bin/env bash

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	# Figure out where Spark is installed
	FWDIR="$(cd `dirname $0`/..; pwd)"

	# Export this as SPARK_HOME
	export SPARK_HOME="$FWDIR"

	source $FWDIR/bin/utils.sh

	SCALA_VERSION=2.10

	function usage() {
	echo "Usage: ./bin/pyspark [options]" 1>&2
	$FWDIR/bin/spark-submit --help 2>&1 \| grep -v Usage 1>&2
	exit 0
	}

	if [[ "$@" = --help ]] \|\| [[ "$@" = -h ]]; then
	usage
	fi

	# Exit if the user hasn't compiled Spark
	if [ ! -f "$FWDIR/RELEASE" ]; then
	# Exit if the user hasn't compiled Spark
	ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assemblyhadoop.jar >& /dev/null
	if [[ $? != 0 ]]; then
	echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
	echo "You need to build Spark before running this program" 1>&2
	exit 1
	fi
	fi

	. $FWDIR/bin/load-spark-env.sh

	# Figure out which Python executable to use
	if [ -z "$PYSPARK_PYTHON" ] ; then
	PYSPARK_PYTHON="python"
	fi
	export PYSPARK_PYTHON

	# Add the PySpark classes to the Python path:
	export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
	export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH

	# Load the PySpark shell.py script when ./pyspark is used interactively:
	export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
	export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py

	# If IPython options are specified, assume user wants to run IPython
	if [ -n "$IPYTHON_OPTS" ]; then
	IPYTHON=1
	fi

	# Build up arguments list manually to preserve quotes and backslashes.
	# We export Spark submit arguments as an environment variable because shell.py must run as a
	# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
	SUBMIT_USAGE_FUNCTION=usage
	gatherSparkSubmitOpts "$@"
	PYSPARK_SUBMIT_ARGS=""
	whitespace="[[:space:]]"
	for i in "${SUBMISSION_OPTS[@]}"; do
	if [[ $i =~ \" ]]; then i=$(echo $i \| sed 's/\"/\\\"/g'); fi
	if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
	PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
	done
	export PYSPARK_SUBMIT_ARGS

	# If a python file is provided, directly run spark-submit.
	if [[ "$1" =~ \.py$ ]]; then
	echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
	echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
	primary=$1
	shift
	gatherSparkSubmitOpts "$@"
	exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
	else
	# Only use ipython if no command line arguments were provided [SPARK-1134]
	if [[ "$IPYTHON" = "1" ]]; then
	exec ipython $IPYTHON_OPTS
	else
	if [[ -n $SPARK_TESTING ]]; then
	exec "$PYSPARK_PYTHON" -m doctest
	else
	exec "$PYSPARK_PYTHON"
	fi
	fi
	fi