|  | #!/usr/bin/env bash | 
|  |  | 
|  | # | 
|  | # Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | # contributor license agreements.  See the NOTICE file distributed with | 
|  | # this work for additional information regarding copyright ownership. | 
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | # (the "License"); you may not use this file except in compliance with | 
|  | # the License.  You may obtain a copy of the License at | 
|  | # | 
|  | #    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  | # | 
|  |  | 
|  | # This file is used for Binder integration to install PySpark available in | 
|  | # Jupyter notebook. | 
|  |  | 
|  | # SPARK-45706: Should fail fast. Otherwise, the Binder image is successfully | 
|  | # built, and it cannot be rebuilt. | 
|  | set -o pipefail | 
|  | set -e | 
|  |  | 
|  | VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)") | 
|  | TAG=$(git describe --tags --exact-match 2> /dev/null || true) | 
|  |  | 
|  | # If a commit is tagged, exactly specified version of pyspark should be installed to avoid | 
|  | # a kind of accident that an old version of pyspark is installed in the live notebook environment. | 
|  | # See SPARK-37170 | 
|  | if [ -n "$TAG" ]; then | 
|  | SPECIFIER="==" | 
|  | else | 
|  | SPECIFIER="<=" | 
|  | fi | 
|  |  | 
|  | pip install plotly "pandas<2.0.0" "pyspark[sql,ml,mllib,pandas_on_spark,connect]$SPECIFIER$VERSION" | 
|  |  | 
|  | # Set 'PYARROW_IGNORE_TIMEZONE' to suppress warnings from PyArrow. | 
|  | echo "export PYARROW_IGNORE_TIMEZONE=1" >> ~/.profile | 
|  |  | 
|  | # Add sbin to PATH to run `start-connect-server.sh`. | 
|  | SPARK_HOME=$(python -c "from pyspark.find_spark_home import _find_spark_home; print(_find_spark_home())") | 
|  | echo "export PATH=${PATH}:${SPARK_HOME}/sbin" >> ~/.profile | 
|  | echo "export SPARK_HOME=${SPARK_HOME}" >> ~/.profile | 
|  |  | 
|  | # Add Spark version to env for running command dynamically based on Spark version. | 
|  | SPARK_VERSION=$(python -c "import pyspark; print(pyspark.__version__)") | 
|  | echo "export SPARK_VERSION=${SPARK_VERSION}" >> ~/.profile | 
|  |  | 
|  | # Suppress warnings from Spark jobs, and UI progress bar. | 
|  | mkdir -p ~/.ipython/profile_default/startup | 
|  | echo "from pyspark.sql import SparkSession | 
|  | SparkSession.builder.config('spark.ui.showConsoleProgress', 'false').getOrCreate().sparkContext.setLogLevel('FATAL') | 
|  | " > ~/.ipython/profile_default/startup/00-init.py |