| #!/usr/bin/env bash |
| |
| # |
| # Copyright 2022 The Blaze Authors |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| # |
| # Shell script for validating TPCDS query results |
| |
| if [ -z "${SPARK_HOME}" ]; then |
| echo "env SPARK_HOME not defined" 1>&2 |
| exit 1 |
| fi |
| |
| # Determine the current working directory |
| _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" |
| echo $_DIR |
| |
| # Load common functions |
| . "${_DIR}/utils.sh" |
| |
| # Do some preparations before launching spark-submit |
| parse_args_for_spark_submit "$@" |
| |
| # Resolve a jar location for the TPCDS data generator |
| find_resource() { |
| local jar_file="tpcds-validator_2.12-0.1.0-SNAPSHOT-with-dependencies.jar" |
| local built_jar="$_DIR/../${jar_file}" |
| if [[ -e "$built_jar" ]]; then |
| RESOURCE=$built_jar |
| else |
| echo "${built_jar} not found" |
| exit 1 |
| fi |
| } |
| |
| find_resource |
| |
| echo "Using \`spark-submit\` from path: $SPARK_HOME" 1>&2 |
| exec "${SPARK_HOME}"/bin/spark-submit \ |
| --driver-memory 5g \ |
| --class org.apache.spark.sql.execution.benchmark.TPCDSQueryValidator \ |
| --conf spark.sql.extensions=org.apache.spark.sql.blaze.BlazeSparkSessionExtension \ |
| --conf spark.shuffle.manager=org.apache.spark.sql.execution.blaze.shuffle.BlazeShuffleManager \ |
| --conf spark.sql.shuffle.partitions=1000 \ |
| --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=16777216 \ |
| --conf spark.sql.autoBroadcastJoinThreshold=1048576 \ |
| --conf spark.sql.broadcastTimeout=900s \ |
| --conf spark.driver.memoryOverhead=3072 \ |
| --conf spark.blaze.memoryFraction=0.8 \ |
| $(join_by " " ${SPARK_CONF[@]}) \ |
| ${RESOURCE} \ |
| $(join_by " " ${ARGS[@]}) |
| |