blob: d3db825b73011a2c5b563d91b16741fe19c97613 [file] [log] [blame]
#!/usr/bin/env bash
#
# Copyright 2022 The Blaze Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Shell script for validating TPCDS query results
if [ -z "${SPARK_HOME}" ]; then
echo "env SPARK_HOME not defined" 1>&2
exit 1
fi
# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo $_DIR
# Load common functions
. "${_DIR}/utils.sh"
# Do some preparations before launching spark-submit
parse_args_for_spark_submit "$@"
# Resolve a jar location for the TPCDS data generator
find_resource() {
local jar_file="tpcds-validator_2.12-0.1.0-SNAPSHOT-with-dependencies.jar"
local built_jar="$_DIR/../${jar_file}"
if [[ -e "$built_jar" ]]; then
RESOURCE=$built_jar
else
echo "${built_jar} not found"
exit 1
fi
}
find_resource
echo "Using \`spark-submit\` from path: $SPARK_HOME" 1>&2
exec "${SPARK_HOME}"/bin/spark-submit \
--driver-memory 5g \
--class org.apache.spark.sql.execution.benchmark.TPCDSQueryValidator \
--conf spark.sql.extensions=org.apache.spark.sql.blaze.BlazeSparkSessionExtension \
--conf spark.shuffle.manager=org.apache.spark.sql.execution.blaze.shuffle.BlazeShuffleManager \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=16777216 \
--conf spark.sql.autoBroadcastJoinThreshold=1048576 \
--conf spark.sql.broadcastTimeout=900s \
--conf spark.driver.memoryOverhead=3072 \
--conf spark.blaze.memoryFraction=0.8 \
$(join_by " " ${SPARK_CONF[@]}) \
${RESOURCE} \
$(join_by " " ${ARGS[@]})