dev/run-tpcds-test - auron - Git at Google

 #!/usr/bin/env bash

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 #
 # Shell script for validating TPCDS query results

 if [ -z "${SPARK_HOME}" ]; then
   echo "env SPARK_HOME not defined" 1>&2
   exit 1
 fi

 # Determine the current working directory
 _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 echo $_DIR

 # Load common functions
 . "${_DIR}/utils.sh"

 # Do some preparations before launching spark-submit
 parse_args_for_spark_submit "$@"

 # Resolve a jar location for the TPCDS data generator
 find_resource() {
   local jar_file="tpcds-validator_${SCALA_VERSION:-2.12}-0.1.0-SNAPSHOT-with-dependencies.jar"
   local built_jar="$_DIR/../${jar_file}"
   if [[ -e "$built_jar" ]]; then
     RESOURCE=$built_jar
   else
     echo "${built_jar} not found"
     exit 1
   fi
 }

 find_resource

 echo "Using \`spark-submit\` from path: $SPARK_HOME" 1>&2
 exec "${SPARK_HOME}"/bin/spark-submit \
   --driver-memory 5g \
   --class org.apache.spark.sql.execution.benchmark.TPCDSQueryValidator  \
   --conf spark.sql.extensions=org.apache.spark.sql.auron.AuronSparkSessionExtension \
   --conf spark.shuffle.manager=org.apache.spark.sql.execution.auron.shuffle.AuronShuffleManager \
   --conf spark.sql.shuffle.partitions=1000 \
   --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=16777216 \
   --conf spark.sql.autoBroadcastJoinThreshold=1048576 \
   --conf spark.sql.broadcastTimeout=900s \
   --conf spark.driver.memoryOverhead=3072 \
   --conf spark.auron.memoryFraction=0.8 \
   $(join_by " " ${SPARK_CONF[@]}) \
   ${RESOURCE} \
   $(join_by " " ${ARGS[@]})
	#!/usr/bin/env bash

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	#
	# Shell script for validating TPCDS query results

	if [ -z "${SPARK_HOME}" ]; then
	echo "env SPARK_HOME not defined" 1>&2
	exit 1
	fi

	# Determine the current working directory
	_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
	echo $_DIR

	# Load common functions
	. "${_DIR}/utils.sh"

	# Do some preparations before launching spark-submit
	parse_args_for_spark_submit "$@"

	# Resolve a jar location for the TPCDS data generator
	find_resource() {
	local jar_file="tpcds-validator_${SCALA_VERSION:-2.12}-0.1.0-SNAPSHOT-with-dependencies.jar"
	local built_jar="$_DIR/../${jar_file}"
	if [[ -e "$built_jar" ]]; then
	RESOURCE=$built_jar
	else
	echo "${built_jar} not found"
	exit 1
	fi
	}

	find_resource

	echo "Using \`spark-submit\` from path: $SPARK_HOME" 1>&2
	exec "${SPARK_HOME}"/bin/spark-submit \
	--driver-memory 5g \
	--class org.apache.spark.sql.execution.benchmark.TPCDSQueryValidator \
	--conf spark.sql.extensions=org.apache.spark.sql.auron.AuronSparkSessionExtension \
	--conf spark.shuffle.manager=org.apache.spark.sql.execution.auron.shuffle.AuronShuffleManager \
	--conf spark.sql.shuffle.partitions=1000 \
	--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=16777216 \
	--conf spark.sql.autoBroadcastJoinThreshold=1048576 \
	--conf spark.sql.broadcastTimeout=900s \
	--conf spark.driver.memoryOverhead=3072 \
	--conf spark.auron.memoryFraction=0.8 \
	$(join_by " " ${SPARK_CONF[@]}) \
	${RESOURCE} \
	$(join_by " " ${ARGS[@]})