blob: 8ff3aaf5a142206f2a20f5eec4d3ed4cb9bd0252 [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file is used for Binder integration to install PySpark available in
# Jupyter notebook.
VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)")
TAG=$(git describe --tags --exact-match 2>/dev/null)
# If a commit is tagged, exactly specified version of pyspark should be installed to avoid
# a kind of accident that an old version of pyspark is installed in the live notebook environment.
# See SPARK-37170
if [ -n "$TAG" ]; then
SPECIFIER="=="
else
SPECIFIER="<="
fi
if [[ ! $VERSION < "3.4.0" ]]; then
pip install plotly "pyspark[sql,ml,mllib,pandas_on_spark,connect]$SPECIFIER$VERSION"
else
pip install plotly "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION"
fi
# Set 'PYARROW_IGNORE_TIMEZONE' to surpress warnings from PyArrow.
echo "export PYARROW_IGNORE_TIMEZONE=1" >> ~/.profile
# Add sbin to PATH to run `start-connect-server.sh`.
SPARK_HOME=$(python -c "from pyspark.find_spark_home import _find_spark_home; print(_find_spark_home())")
echo "export PATH=${PATH}:${SPARK_HOME}/sbin" >> ~/.profile
# Add Spark version to env for running command dynamically based on Spark version.
SPARK_VERSION=$(python -c "import pyspark; print(pyspark.__version__)")
echo "export SPARK_VERSION=${SPARK_VERSION}" >> ~/.profile
# Surpress warnings from Spark jobs, and UI progress bar.
mkdir -p ~/.ipython/profile_default/startup
echo """from pyspark.sql import SparkSession
SparkSession.builder.config('spark.ui.showConsoleProgress', 'false').getOrCreate().sparkContext.setLogLevel('FATAL')
""" > ~/.ipython/profile_default/startup/00-init.py