blob: ba407662d228b90f08b33575d4df4de17d05175b [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
BENCHMARK=$1; shift
JMH_OPTIONS="$@"
if [ -z "$BENCHMARK" ]; then
# Print usage if run without arguments.
cat << EOF
Runs Parquet JMH-based benchmarks.
Usage:
run.sh <BENCHMARK> [JMH_OPTIONS]
Information on the JMH_OPTIONS can be found by running: run.sh all -help
<BENCHMARK> | Description
----------- | ----------
all | Runs all benchmarks in the module (listed here and others).
build | (No benchmark run, shortcut to rebuild the JMH uber jar).
clean | (No benchmark run, shortcut to clean up any temporary files).
read | Reading files with different compression, page and block sizes.
write | Writing files.
checksum | Reading and writing with and without CRC checksums.
filter | Filtering column indexes
Examples:
# More information about the run script and the available arguments.
./parquet-benchmarks/run.sh
# More information on the JMH options available.
./parquet-benchmarks/run.sh all -help
# Run every benchmark once (~20 minutes).
./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1
# A more rigourous run of all benchmarks, saving a report for comparison.
./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json
# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes)
./parquet-benchmarks/run.sh checksum
# Running one specific benchmark using a regex.
./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks
EOF
elif [ "$BENCHMARK" == "build" ]; then
# Shortcut utility to rebuild the benchmark module only.
( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package )
elif [ "$BENCHMARK" == "clean" ]; then
# Shortcut utility to clean any state left behind from any previous run.
java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup
else
# Actually run a benchmark in the JMH harness.
# Build the benchmark uberjar if it doesn't already exist.
if [ ! -f ${SCRIPT_PATH}/target/parquet-benchmarks.jar ]; then
${SCRIPT_PATH}/run.sh build
fi
# Pick a regex if specified.
BENCHMARK_REGEX=""
case "$BENCHMARK" in
"read")
BENCHMARK_REGEX="org.apache.parquet.benchmarks.ReadBenchmarks"
;;
"write")
BENCHMARK_REGEX="org.apache.parquet.benchmarks.WriteBenchmarks"
;;
"checksum")
BENCHMARK_REGEX="org.apache.parquet.benchmarks.PageChecksum.*"
;;
"filter")
BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks"
;;
esac
echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
# Clean any data files generated by the benchmarks.
${SCRIPT_PATH}/run.sh clean
fi