parquet-benchmarks/run.sh - parquet-mr - Git at Google

 #!/usr/bin/env bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #

 SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )

 BENCHMARK=$1; shift
 JMH_OPTIONS="$@"

 if [ -z "$BENCHMARK" ]; then

   # Print usage if run without arguments.
   cat << EOF
 Runs Parquet JMH-based benchmarks.

 Usage:
   run.sh <BENCHMARK> [JMH_OPTIONS]

 Information on the JMH_OPTIONS can be found by running: run.sh all -help

 <BENCHMARK> | Description
 ----------- | ----------
 all         | Runs all benchmarks in the module (listed here and others).
 build       | (No benchmark run, shortcut to rebuild the JMH uber jar).
 clean       | (No benchmark run, shortcut to clean up any temporary files).
 read        | Reading files with different compression, page and block sizes.
 write       | Writing files.
 checksum    | Reading and writing with and without CRC checksums.
 filter      | Filtering column indexes

 Examples:

 # More information about the run script and the available arguments.
 ./parquet-benchmarks/run.sh

 # More information on the JMH options available.
 ./parquet-benchmarks/run.sh all -help

 # Run every benchmark once (~20 minutes).
 ./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1

 # A more rigourous run of all benchmarks, saving a report for comparison.
 ./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json

 # Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes)
 ./parquet-benchmarks/run.sh checksum

 # Running one specific benchmark using a regex.
 ./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks

 EOF

 elif [ "$BENCHMARK" == "build" ]; then

   # Shortcut utility to rebuild the benchmark module only.
   ( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package )

 elif [ "$BENCHMARK" == "clean" ]; then

   # Shortcut utility to clean any state left behind from any previous run.
   java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup

 else

   # Actually run a benchmark in the JMH harness.

   # Build the benchmark uberjar if it doesn't already exist.
   if [ ! -f ${SCRIPT_PATH}/target/parquet-benchmarks.jar ]; then
     ${SCRIPT_PATH}/run.sh build
   fi

   # Pick a regex if specified.
   BENCHMARK_REGEX=""
   case "$BENCHMARK" in
   "read")
     BENCHMARK_REGEX="org.apache.parquet.benchmarks.ReadBenchmarks"
     ;;
   "write")
     BENCHMARK_REGEX="org.apache.parquet.benchmarks.WriteBenchmarks"
     ;;
   "checksum")
     BENCHMARK_REGEX="org.apache.parquet.benchmarks.PageChecksum.*"
     ;;
   "filter")
     BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks"
     ;;
   esac

   echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
   java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS

    # Clean any data files generated by the benchmarks.
    ${SCRIPT_PATH}/run.sh clean
 fi
	#!/usr/bin/env bash
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#

	SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )

	BENCHMARK=$1; shift
	JMH_OPTIONS="$@"

	if [ -z "$BENCHMARK" ]; then

	# Print usage if run without arguments.
	cat << EOF
	Runs Parquet JMH-based benchmarks.

	Usage:
	run.sh <BENCHMARK> [JMH_OPTIONS]

	Information on the JMH_OPTIONS can be found by running: run.sh all -help

	<BENCHMARK> \| Description
	----------- \| ----------
	all \| Runs all benchmarks in the module (listed here and others).
	build \| (No benchmark run, shortcut to rebuild the JMH uber jar).
	clean \| (No benchmark run, shortcut to clean up any temporary files).
	read \| Reading files with different compression, page and block sizes.
	write \| Writing files.
	checksum \| Reading and writing with and without CRC checksums.
	filter \| Filtering column indexes

	Examples:

	# More information about the run script and the available arguments.
	./parquet-benchmarks/run.sh

	# More information on the JMH options available.
	./parquet-benchmarks/run.sh all -help

	# Run every benchmark once (~20 minutes).
	./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1

	# A more rigourous run of all benchmarks, saving a report for comparison.
	./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json

	# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes)
	./parquet-benchmarks/run.sh checksum

	# Running one specific benchmark using a regex.
	./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks

	EOF

	elif [ "$BENCHMARK" == "build" ]; then

	# Shortcut utility to rebuild the benchmark module only.
	( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package )

	elif [ "$BENCHMARK" == "clean" ]; then

	# Shortcut utility to clean any state left behind from any previous run.
	java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup

	else

	# Actually run a benchmark in the JMH harness.

	# Build the benchmark uberjar if it doesn't already exist.
	if [ ! -f ${SCRIPT_PATH}/target/parquet-benchmarks.jar ]; then
	${SCRIPT_PATH}/run.sh build
	fi

	# Pick a regex if specified.
	BENCHMARK_REGEX=""
	case "$BENCHMARK" in
	"read")
	BENCHMARK_REGEX="org.apache.parquet.benchmarks.ReadBenchmarks"
	;;
	"write")
	BENCHMARK_REGEX="org.apache.parquet.benchmarks.WriteBenchmarks"
	;;
	"checksum")
	BENCHMARK_REGEX="org.apache.parquet.benchmarks.PageChecksum.*"
	;;
	"filter")
	BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks"
	;;
	esac

	echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
	java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS

	# Clean any data files generated by the benchmarks.
	${SCRIPT_PATH}/run.sh clean
	fi