| #!/usr/bin/env bash |
| |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| # Figure out where Spark is installed |
| FWDIR="$(cd `dirname $0`/..; pwd)" |
| |
| # Export this as SPARK_HOME |
| export SPARK_HOME="$FWDIR" |
| |
| source $FWDIR/bin/utils.sh |
| |
| SCALA_VERSION=2.10 |
| |
| function usage() { |
| echo "Usage: ./bin/pyspark [options]" 1>&2 |
| $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 |
| exit 0 |
| } |
| |
| if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then |
| usage |
| fi |
| |
| # Exit if the user hasn't compiled Spark |
| if [ ! -f "$FWDIR/RELEASE" ]; then |
| # Exit if the user hasn't compiled Spark |
| ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null |
| if [[ $? != 0 ]]; then |
| echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2 |
| echo "You need to build Spark before running this program" 1>&2 |
| exit 1 |
| fi |
| fi |
| |
| . $FWDIR/bin/load-spark-env.sh |
| |
| # Figure out which Python executable to use |
| if [ -z "$PYSPARK_PYTHON" ] ; then |
| PYSPARK_PYTHON="python" |
| fi |
| export PYSPARK_PYTHON |
| |
| # Add the PySpark classes to the Python path: |
| export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH |
| export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH |
| |
| # Load the PySpark shell.py script when ./pyspark is used interactively: |
| export OLD_PYTHONSTARTUP=$PYTHONSTARTUP |
| export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py |
| |
| # If IPython options are specified, assume user wants to run IPython |
| if [ -n "$IPYTHON_OPTS" ]; then |
| IPYTHON=1 |
| fi |
| |
| # Build up arguments list manually to preserve quotes and backslashes. |
| # We export Spark submit arguments as an environment variable because shell.py must run as a |
| # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks. |
| SUBMIT_USAGE_FUNCTION=usage |
| gatherSparkSubmitOpts "$@" |
| PYSPARK_SUBMIT_ARGS="" |
| whitespace="[[:space:]]" |
| for i in "${SUBMISSION_OPTS[@]}"; do |
| if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi |
| if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi |
| PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i" |
| done |
| export PYSPARK_SUBMIT_ARGS |
| |
| # If a python file is provided, directly run spark-submit. |
| if [[ "$1" =~ \.py$ ]]; then |
| echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2 |
| echo -e "Use ./bin/spark-submit <python file>\n" 1>&2 |
| primary=$1 |
| shift |
| gatherSparkSubmitOpts "$@" |
| exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}" |
| else |
| # Only use ipython if no command line arguments were provided [SPARK-1134] |
| if [[ "$IPYTHON" = "1" ]]; then |
| exec ipython $IPYTHON_OPTS |
| else |
| if [[ -n $SPARK_TESTING ]]; then |
| exec "$PYSPARK_PYTHON" -m doctest |
| else |
| exec "$PYSPARK_PYTHON" |
| fi |
| fi |
| fi |