deployments/examples/spark/cmd/run.sh - yunikorn-k8shim - Git at Google

 #!/bin/bash

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 #########################
 # The command line help #
 #########################
 usage() {
     echo "Usage: $0"
     echo "   -h | --hadoop, hadoop-version"
     echo "   -s | --spark, spark version"
     echo "   -d | --directory, workspace directory"
     exit 1
 }

 case "$1" in
    --help)
        usage
        exit 0
        ;;
 esac

 while getopts ":h:s:d:-:" opt; do
   case $opt in
     h) SPARK_HADOOP_VERSION="$OPTARG"
     printf "Specified hadoop version is: %s\n" "$SPARK_HADOOP_VERSION"
     ;;
     s) SPARK_VERSION="$OPTARG"
     printf "Specified spark version is: %s\n" "$SPARK_VERSION"
     ;;
     d) WORK_SPACE_ROOT="$OPTARG"
     printf "Specified workspace directory is: %s\n" "$WORK_SPACE_ROOT"
     ;;
     -)
       case "$OPTARG" in
         hadoop)
           SPARK_HADOOP_VERSION="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
           printf "Specified hadoop version is: %s\n" "$SPARK_HADOOP_VERSION"
           ;;
         spark)
           SPARK_VERSION="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
           printf "Specified hadoop version is %s\n" "$SPARK_VERSION"
           ;;
         directory)
           WORK_SPACE_ROOT="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
           printf "Specified workspace directory is: %s\n" "$WORK_SPACE_ROOT"
           ;;
         *) echo "Invalid option --$OPTARG" >&2
           ;;
     esac ;;
     \?) echo "Invalid option -$OPTARG" >&2
     ;;
   esac
 done

 if [ -z "$WORK_SPACE_ROOT" ]; then
   SCRIPT_PATH=$(cd "$(dirname "$0")" || pwd)
   WORK_SPACE_ROOT=$(dirname "$SCRIPT_PATH")
   echo "Using default workspace dir: $WORK_SPACE_ROOT"
 fi

 if [ -z "$SPARK_HADOOP_VERSION" ]; then
   SPARK_HADOOP_VERSION=2.7
   echo "Using default spark hadoop version: $SPARK_HADOOP_VERSION"
 fi

 if [ -z "$SPARK_VERSION" ]; then
   SPARK_VERSION=2.4.4
   echo "Using default spark version: $SPARK_VERSION"
 fi

 SPARK_BINARY_FILE_NAME=spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}.tgz
 SPARK_BINARY_FILE_PATH=$WORK_SPACE_ROOT/$SPARK_BINARY_FILE_NAME
 SPARK_BINARY_FILE_CHECKSUM_FILE_NAME=$SPARK_BINARY_FILE_NAME.sha512
 SPARK_BINARY_FILE_CHECKSUM_FILE_PATH=$WORK_SPACE_ROOT/$SPARK_BINARY_FILE_CHECKSUM_FILE_NAME
 FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME=$SPARK_BINARY_FILE_CHECKSUM_FILE_NAME.formatted
 FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH=$WORK_SPACE_ROOT/$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME

 SPARK_HOME=$WORK_SPACE_ROOT/spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}
 SPARK_EXAMPLE_JAR=local://$SPARK_HOME/examples/jars/spark-examples_2.12-${SPARK_VERSION}.jar

 K8S_ENDPOINT=http://localhost:8001
 SPARK_EXECUTOR_NUM=1
 SPARK_DOCKER_IMAGE=yunikorn/spark:$SPARK_VERSION

 if [ -f "$SPARK_BINARY_FILE_PATH" ]; then
   echo "The binary file $SPARK_BINARY_FILE_NAME has been cached!"
 else
   echo "The binary file $SPARK_BINARY_FILE_NAME did not exist, try to download."
   wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_BINARY_FILE_NAME} -O "${SPARK_BINARY_FILE_PATH}"
 fi

 if [ -f "$SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" ]; then
   echo "The binary checksum file $SPARK_BINARY_FILE_CHECKSUM_FILE_NAME has been cached!"
 else
   echo "The binary checksum file $SPARK_BINARY_FILE_CHECKSUM_FILE_NAME did not exist, try to download."
   wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_BINARY_FILE_CHECKSUM_FILE_NAME} -O "${SPARK_BINARY_FILE_CHECKSUM_FILE_PATH}"
 fi

 if [ -f "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" ]; then
   echo "The formatted binary checksum file $FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME has been cached!"
 else
   # format the official checksum file for verifying
   echo "The formatted binary checksum file $FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME did not exist, try to generate."
   < "$SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" tr -d " \t\n\r" | awk -v awkvar="$SPARK_BINARY_FILE_PATH" -F: '{print $2 "  " awkvar}' > "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH"
 fi

 # check signature to verify the completeness
 if [[ 'OK' == $(shasum -c -a 512 "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH"  | awk '{print $2}') ]]; then
   echo "The checksum is matched!"
   echo "Try to remove the old unpacked dir and re-uncompress it"
   rm -rf "$WORK_SPACE_ROOT"/spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}
   tar -xvzf "$SPARK_BINARY_FILE_PATH" -C "$WORK_SPACE_ROOT"
 else
   echo "The checksum is not matched, Removing the incompleted file, please download it again."
   rm -f "$SPARK_BINARY_FILE_PATH"
   exit 0
 fi

 # spark submit command
 "${SPARK_HOME}"/bin/spark-submit \
   --master k8s://${K8S_ENDPOINT} --deploy-mode cluster --name spark-pi \
   --class org.apache.spark.examples.SparkPi \
   --conf spark.executor.instances=${SPARK_EXECUTOR_NUM} \
   --conf spark.kubernetes.container.image=${SPARK_DOCKER_IMAGE} \
   --conf spark.kubernetes.driver.podTemplateFile=../driver.yaml \
   --conf spark.kubernetes.executor.podTemplateFile=../executor.yaml \
   "${SPARK_EXAMPLE_JAR}"
	#!/bin/bash

	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	#########################
	# The command line help #
	#########################
	usage() {
	echo "Usage: $0"
	echo " -h \| --hadoop, hadoop-version"
	echo " -s \| --spark, spark version"
	echo " -d \| --directory, workspace directory"
	exit 1
	}

	case "$1" in
	--help)
	usage
	exit 0
	;;
	esac

	while getopts ":h:s:d:-:" opt; do
	case $opt in
	h) SPARK_HADOOP_VERSION="$OPTARG"
	printf "Specified hadoop version is: %s\n" "$SPARK_HADOOP_VERSION"
	;;
	s) SPARK_VERSION="$OPTARG"
	printf "Specified spark version is: %s\n" "$SPARK_VERSION"
	;;
	d) WORK_SPACE_ROOT="$OPTARG"
	printf "Specified workspace directory is: %s\n" "$WORK_SPACE_ROOT"
	;;
	-)
	case "$OPTARG" in
	hadoop)
	SPARK_HADOOP_VERSION="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
	printf "Specified hadoop version is: %s\n" "$SPARK_HADOOP_VERSION"
	;;
	spark)
	SPARK_VERSION="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
	printf "Specified hadoop version is %s\n" "$SPARK_VERSION"
	;;
	directory)
	WORK_SPACE_ROOT="${!OPTIND}"; OPTIND=$(( OPTIND + 1 ))
	printf "Specified workspace directory is: %s\n" "$WORK_SPACE_ROOT"
	;;
	*) echo "Invalid option --$OPTARG" >&2
	;;
	esac ;;
	\?) echo "Invalid option -$OPTARG" >&2
	;;
	esac
	done

	if [ -z "$WORK_SPACE_ROOT" ]; then
	SCRIPT_PATH=$(cd "$(dirname "$0")" \|\| pwd)
	WORK_SPACE_ROOT=$(dirname "$SCRIPT_PATH")
	echo "Using default workspace dir: $WORK_SPACE_ROOT"
	fi

	if [ -z "$SPARK_HADOOP_VERSION" ]; then
	SPARK_HADOOP_VERSION=2.7
	echo "Using default spark hadoop version: $SPARK_HADOOP_VERSION"
	fi

	if [ -z "$SPARK_VERSION" ]; then
	SPARK_VERSION=2.4.4
	echo "Using default spark version: $SPARK_VERSION"
	fi

	SPARK_BINARY_FILE_NAME=spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}.tgz
	SPARK_BINARY_FILE_PATH=$WORK_SPACE_ROOT/$SPARK_BINARY_FILE_NAME
	SPARK_BINARY_FILE_CHECKSUM_FILE_NAME=$SPARK_BINARY_FILE_NAME.sha512
	SPARK_BINARY_FILE_CHECKSUM_FILE_PATH=$WORK_SPACE_ROOT/$SPARK_BINARY_FILE_CHECKSUM_FILE_NAME
	FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME=$SPARK_BINARY_FILE_CHECKSUM_FILE_NAME.formatted
	FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH=$WORK_SPACE_ROOT/$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME

	SPARK_HOME=$WORK_SPACE_ROOT/spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}
	SPARK_EXAMPLE_JAR=local://$SPARK_HOME/examples/jars/spark-examples_2.12-${SPARK_VERSION}.jar

	K8S_ENDPOINT=http://localhost:8001
	SPARK_EXECUTOR_NUM=1
	SPARK_DOCKER_IMAGE=yunikorn/spark:$SPARK_VERSION

	if [ -f "$SPARK_BINARY_FILE_PATH" ]; then
	echo "The binary file $SPARK_BINARY_FILE_NAME has been cached!"
	else
	echo "The binary file $SPARK_BINARY_FILE_NAME did not exist, try to download."
	wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_BINARY_FILE_NAME} -O "${SPARK_BINARY_FILE_PATH}"
	fi

	if [ -f "$SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" ]; then
	echo "The binary checksum file $SPARK_BINARY_FILE_CHECKSUM_FILE_NAME has been cached!"
	else
	echo "The binary checksum file $SPARK_BINARY_FILE_CHECKSUM_FILE_NAME did not exist, try to download."
	wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_BINARY_FILE_CHECKSUM_FILE_NAME} -O "${SPARK_BINARY_FILE_CHECKSUM_FILE_PATH}"
	fi

	if [ -f "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" ]; then
	echo "The formatted binary checksum file $FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME has been cached!"
	else
	# format the official checksum file for verifying
	echo "The formatted binary checksum file $FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_NAME did not exist, try to generate."
	< "$SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" tr -d " \t\n\r" \| awk -v awkvar="$SPARK_BINARY_FILE_PATH" -F: '{print $2 " " awkvar}' > "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH"
	fi

	# check signature to verify the completeness
	if [[ 'OK' == $(shasum -c -a 512 "$FORMATTED_SPARK_BINARY_FILE_CHECKSUM_FILE_PATH" \| awk '{print $2}') ]]; then
	echo "The checksum is matched!"
	echo "Try to remove the old unpacked dir and re-uncompress it"
	rm -rf "$WORK_SPACE_ROOT"/spark-${SPARK_VERSION}-bin-hadoop${SPARK_HADOOP_VERSION}
	tar -xvzf "$SPARK_BINARY_FILE_PATH" -C "$WORK_SPACE_ROOT"
	else
	echo "The checksum is not matched, Removing the incompleted file, please download it again."
	rm -f "$SPARK_BINARY_FILE_PATH"
	exit 0
	fi

	# spark submit command
	"${SPARK_HOME}"/bin/spark-submit \
	--master k8s://${K8S_ENDPOINT} --deploy-mode cluster --name spark-pi \
	--class org.apache.spark.examples.SparkPi \
	--conf spark.executor.instances=${SPARK_EXECUTOR_NUM} \
	--conf spark.kubernetes.container.image=${SPARK_DOCKER_IMAGE} \
	--conf spark.kubernetes.driver.podTemplateFile=../driver.yaml \
	--conf spark.kubernetes.executor.podTemplateFile=../executor.yaml \
	"${SPARK_EXAMPLE_JAR}"