blob: 9c2bf1d55e9d85aacc7d521a579b83ccc9a06fb7 [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Shell script for validating TPCDS query results
if [ -z "${SPARK_HOME}" ]; then
echo "env SPARK_HOME not defined" 1>&2
exit 1
fi
# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo $_DIR
# Load common functions
. "${_DIR}/utils.sh"
# Do some preparations before launching spark-submit
parse_args_for_spark_submit "$@"
# Resolve a jar location for the TPCDS data generator
find_resource() {
local jar_file="tpcds-validator_${SCALA_VERSION:-2.12}-0.1.0-SNAPSHOT-with-dependencies.jar"
local built_jar="$_DIR/../${jar_file}"
if [[ -e "$built_jar" ]]; then
RESOURCE=$built_jar
else
echo "${built_jar} not found"
exit 1
fi
}
find_resource
echo "Using \`spark-submit\` from path: $SPARK_HOME" 1>&2
exec "${SPARK_HOME}"/bin/spark-submit \
--driver-memory 5g \
--class org.apache.spark.sql.execution.benchmark.TPCDSQueryValidator \
--conf spark.sql.extensions=org.apache.spark.sql.auron.AuronSparkSessionExtension \
--conf spark.shuffle.manager=org.apache.spark.sql.execution.auron.shuffle.AuronShuffleManager \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=16777216 \
--conf spark.sql.autoBroadcastJoinThreshold=1048576 \
--conf spark.sql.broadcastTimeout=900s \
--conf spark.driver.memoryOverhead=3072 \
--conf spark.auron.memoryFraction=0.8 \
$(join_by " " ${SPARK_CONF[@]}) \
${RESOURCE} \
$(join_by " " ${ARGS[@]})