PARQUET-1644: Clean up some benchmark code and docs. (#672)


diff --git a/parquet-benchmarks/README.md b/parquet-benchmarks/README.md
index 8da067b..63101bd 100644
--- a/parquet-benchmarks/README.md
+++ b/parquet-benchmarks/README.md
@@ -17,22 +17,42 @@
   ~ under the License.
   -->
   
-##Running Parquet Benchmarks
+# Running Parquet Benchmarks
 
-First, build the ``parquet-benchmarks`` module
+The Parquet benchmarks in this module are run using the 
+[OpenJDK Java Microbenchmarking Harness](http://openjdk.java.net/projects/code-tools/jmh/).
+
+First, building the `parquet-benchmarks` module creates an uber-jar including the Parquet
+classes and all dependencies, and a main class to launch the JMH tool.
 
 ```
 mvn --projects parquet-benchmarks -amd -DskipTests -Denforcer.skip=true clean package
 ```
 
-Then, you can run all the benchmarks with the following command
+JMH doesn't have the notion of "benchmark suites", but there are certain benchmarks that 
+make sense to group together or to run in isolation during development.  The 
+`./parquet-benchmarks/run.sh` script can be used to launch all or some benchmarks:
 
 ```
-./parquet-benchmarks/run.sh -wi 5 -i 5 -f 3 -bm all
+# More information about the run script and the available arguments.
+./parquet-benchmarks/run.sh
+
+# More information on the JMH options available.
+./parquet-benchmarks/run.sh all -help
+
+# Run every benchmark once (~20 minutes).
+./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1
+
+# A more rigourous run of all benchmarks, saving a report for comparison.
+./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json
+
+# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes)
+./parquet-benchmarks/run.sh checksum
+
+# Running one specific benchmark using a regex.
+./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks
+
+# Manually clean up any state left behind from a previous run.
+./parquet-benchmarks/run.sh clean
 ```
 
-To understand what each command line argument means and for more arguments please see
-
-```
-java -jar parquet-benchmarks/target/parquet-benchmarks.jar -help
-```
\ No newline at end of file
diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh
index 8aa1e69..ba40766 100755
--- a/parquet-benchmarks/run.sh
+++ b/parquet-benchmarks/run.sh
@@ -20,11 +20,91 @@
 
 SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
 
-echo "Starting WRITE benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@"
-echo "Generating test data"
-java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate
-echo "Data generated, starting READ benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Read* "$@"
-echo "Cleaning up generated data"
-java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup
+BENCHMARK=$1; shift
+JMH_OPTIONS="$@"
+
+if [ -z "$BENCHMARK" ]; then
+
+  # Print usage if run without arguments.
+  cat << EOF
+Runs Parquet JMH-based benchmarks.
+
+Usage:
+  run.sh <BENCHMARK> [JMH_OPTIONS]
+
+Information on the JMH_OPTIONS can be found by running: run.sh all -help
+
+<BENCHMARK> | Description
+----------- | ----------
+all         | Runs all benchmarks in the module (listed here and others).
+build       | (No benchmark run, shortcut to rebuild the JMH uber jar).
+clean       | (No benchmark run, shortcut to clean up any temporary files).
+read        | Reading files with different compression, page and block sizes.
+write       | Writing files.
+checksum    | Reading and writing with and without CRC checksums.
+filter      | Filtering column indexes
+
+Examples:
+
+# More information about the run script and the available arguments.
+./parquet-benchmarks/run.sh
+
+# More information on the JMH options available.
+./parquet-benchmarks/run.sh all -help
+
+# Run every benchmark once (~20 minutes).
+./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1
+
+# A more rigourous run of all benchmarks, saving a report for comparison.
+./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json
+
+# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes)
+./parquet-benchmarks/run.sh checksum
+
+# Running one specific benchmark using a regex.
+./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks
+
+EOF
+
+elif [ "$BENCHMARK" == "build" ]; then
+
+  # Shortcut utility to rebuild the benchmark module only.
+  ( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package )
+
+elif [ "$BENCHMARK" == "clean" ]; then
+
+  # Shortcut utility to clean any state left behind from any previous run.
+  java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup
+
+else
+
+  # Actually run a benchmark in the JMH harness.
+
+  # Build the benchmark uberjar if it doesn't already exist.
+  if [ ! -f ${SCRIPT_PATH}/target/parquet-benchmarks.jar ]; then
+    ${SCRIPT_PATH}/run.sh build
+  fi
+
+  # Pick a regex if specified.
+  BENCHMARK_REGEX=""
+  case "$BENCHMARK" in
+  "read")
+    BENCHMARK_REGEX="org.apache.parquet.benchmarks.ReadBenchmarks"
+    ;;
+  "write")
+    BENCHMARK_REGEX="org.apache.parquet.benchmarks.WriteBenchmarks"
+    ;;
+  "checksum")
+    BENCHMARK_REGEX="org.apache.parquet.benchmarks.PageChecksum.*"
+    ;;
+  "filter")
+    BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks"
+    ;;
+  esac
+
+  echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
+  java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS
+
+   # Clean any data files generated by the benchmarks.
+   ${SCRIPT_PATH}/run.sh clean
+fi
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
index f039403..24da822 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
@@ -25,6 +25,8 @@
   public static final Configuration configuration = new Configuration();
 
   public static final String TARGET_DIR = "target/tests/ParquetBenchmarks";
+  public static final Path targetDir = new Path(TARGET_DIR );
+
   public static final Path file_1M = new Path(TARGET_DIR + "/PARQUET-1M");
 
   //different block and page sizes
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
index 42d9953..3b5db68 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
@@ -115,14 +115,7 @@
 
   public void cleanup()
   {
-    deleteIfExists(configuration, file_1M);
-    deleteIfExists(configuration, file_1M_BS256M_PS4M);
-    deleteIfExists(configuration, file_1M_BS256M_PS8M);
-    deleteIfExists(configuration, file_1M_BS512M_PS4M);
-    deleteIfExists(configuration, file_1M_BS512M_PS8M);
-//    deleteIfExists(configuration, parquetFile_1M_LZO);
-    deleteIfExists(configuration, file_1M_SNAPPY);
-    deleteIfExists(configuration, file_1M_GZIP);
+    deleteIfExists(configuration, targetDir);
   }
 
   public static void main(String[] args) {
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java
index 6c62cc6..49ebdce 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java
@@ -40,7 +40,7 @@
 import static org.apache.parquet.benchmarks.BenchmarkUtils.exists;
 import static org.apache.parquet.hadoop.metadata.CompressionCodecName.*;
 
-public class PageChecksumDataGenerator {
+public class PageChecksumDataGenerator extends DataGenerator {
 
   private final MessageType SCHEMA = MessageTypeParser.parseMessageType(
     "message m {" +
@@ -103,25 +103,4 @@
       throw new RuntimeException(e);
     }
   }
-
-  public void cleanup() {
-    deleteIfExists(configuration, file_100K_NOCHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_100K_CHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_100K_NOCHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_100K_CHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_100K_NOCHECKSUMS_SNAPPY);
-    deleteIfExists(configuration, file_100K_CHECKSUMS_SNAPPY);
-    deleteIfExists(configuration, file_1M_NOCHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_1M_CHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_1M_NOCHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_1M_CHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_1M_NOCHECKSUMS_SNAPPY);
-    deleteIfExists(configuration, file_1M_CHECKSUMS_SNAPPY);
-    deleteIfExists(configuration, file_10M_NOCHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_10M_CHECKSUMS_UNCOMPRESSED);
-    deleteIfExists(configuration, file_10M_NOCHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_10M_CHECKSUMS_GZIP);
-    deleteIfExists(configuration, file_10M_NOCHECKSUMS_SNAPPY);
-    deleteIfExists(configuration, file_10M_CHECKSUMS_SNAPPY);
-  }
 }
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java
index db23eeb..be2ebe4 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java
@@ -51,16 +51,15 @@
 
   private PageChecksumDataGenerator pageChecksumDataGenerator = new PageChecksumDataGenerator();
 
+  /**
+   * This needs to be done exactly once.  To avoid needlessly regenerating the files for reading, they aren't cleaned
+   * as part of the benchmark.  If the files exist, a message will be printed and they will not be regenerated.
+   */
   @Setup(Level.Trial)
   public void setup() {
     pageChecksumDataGenerator.generateAll();
   }
 
-  @Setup(Level.Trial)
-  public void cleanup() {
-    pageChecksumDataGenerator.cleanup();
-  }
-
   private void readFile(Path file, int nRows, boolean verifyChecksums, Blackhole blackhole)
     throws IOException {
     try (ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file)
@@ -82,96 +81,114 @@
 
   // 100k rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsUncompressedWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsGzipWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsGzipWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read100KRowsSnappyWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, true, blackhole);
   }
 
   // 1M rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsUncompressedWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsGzipWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_GZIP, ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsGzipWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_GZIP, ONE_MILLION, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsSnappyWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, true, blackhole);
   }
 
   // 10M rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsUncompressedWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsGzipWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsGzipWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, true, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, false, blackhole);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read10MRowsSnappyWithVerification(Blackhole blackhole) throws IOException {
     readFile(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, true, blackhole);
   }
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java
index c743dde..e892d53 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java
@@ -57,102 +57,120 @@
   private PageChecksumDataGenerator pageChecksumDataGenerator = new PageChecksumDataGenerator();
 
   @Setup(Level.Iteration)
-  public void cleanup() {
+  public void setup() {
     pageChecksumDataGenerator.cleanup();
   }
 
   // 100k rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsUncompressedWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_UNCOMPRESSED, 100 * ONE_K, false, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsUncompressedWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, true, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsGzipWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_GZIP, 100 * ONE_K, false, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsGzipWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, true, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsSnappyWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_SNAPPY, 100 * ONE_K, false, SNAPPY);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write100KRowsSnappyWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, true, SNAPPY);
   }
 
   // 1M rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsUncompressedWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_UNCOMPRESSED, ONE_MILLION, false, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsUncompressedWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, true, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsGzipWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_GZIP, ONE_MILLION, false, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsGzipWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_GZIP, ONE_MILLION, true, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsSnappyWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_SNAPPY, ONE_MILLION, false, SNAPPY);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsSnappyWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, true, SNAPPY);
   }
 
   // 10M rows, uncompressed, GZIP, Snappy
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsUncompressedWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, false, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsUncompressedWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, true, UNCOMPRESSED);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsGzipWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_GZIP, 10 * ONE_MILLION, false, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsGzipWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, true, GZIP);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsSnappyWithoutChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_SNAPPY, 10 * ONE_MILLION, false, SNAPPY);
   }
 
-  @Benchmark @BenchmarkMode(Mode.SingleShotTime)
+  @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write10MRowsSnappyWithChecksums() throws IOException {
     pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, true, SNAPPY);
   }
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
index dba5544..e74204a 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
@@ -20,6 +20,13 @@
 
 import org.apache.hadoop.fs.Path;
 import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
 import org.openjdk.jmh.infra.Blackhole;
 import org.apache.parquet.example.data.Group;
 import org.apache.parquet.hadoop.ParquetReader;
@@ -29,7 +36,9 @@
 
 import java.io.IOException;
 
+@State(Scope.Benchmark)
 public class ReadBenchmarks {
+
   private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
   {
     ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
@@ -47,7 +56,17 @@
     reader.close();
   }
 
+  /**
+   * This needs to be done exactly once.  To avoid needlessly regenerating the files for reading, they aren't cleaned
+   * as part of the benchmark.  If the files exist, a message will be printed and they will not be regenerated.
+   */
+  @Setup(Level.Trial)
+  public void generateFilesForRead() {
+    new DataGenerator().generateAll();
+  }
+
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole)
           throws IOException
   {
@@ -55,6 +74,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsBS256MPS4MUncompressed(Blackhole blackhole)
           throws IOException
   {
@@ -62,6 +82,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsBS256MPS8MUncompressed(Blackhole blackhole)
           throws IOException
   {
@@ -69,6 +90,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsBS512MPS4MUncompressed(Blackhole blackhole)
           throws IOException
   {
@@ -76,6 +98,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole)
           throws IOException
   {
@@ -91,6 +114,7 @@
 //  }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsDefaultBlockAndPageSizeSNAPPY(Blackhole blackhole)
           throws IOException
   {
@@ -98,6 +122,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void read1MRowsDefaultBlockAndPageSizeGZIP(Blackhole blackhole)
           throws IOException
   {
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 5c26a84..0a2d2c0 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -19,7 +19,9 @@
 package org.apache.parquet.benchmarks;
 
 import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
 
@@ -39,12 +41,13 @@
   private DataGenerator dataGenerator = new DataGenerator();
 
   @Setup(Level.Iteration)
-  public void cleanup() {
+  public void setup() {
     //clean existing test data at the beginning of each iteration
     dataGenerator.cleanup();
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsDefaultBlockAndPageSizeUncompressed()
           throws IOException
   {
@@ -59,6 +62,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsBS256MPS4MUncompressed()
           throws IOException
   {
@@ -73,6 +77,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsBS256MPS8MUncompressed()
           throws IOException
   {
@@ -87,6 +92,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsBS512MPS4MUncompressed()
           throws IOException
   {
@@ -101,6 +107,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsBS512MPS8MUncompressed()
           throws IOException
   {
@@ -130,6 +137,7 @@
 //  }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsDefaultBlockAndPageSizeSNAPPY()
           throws IOException
   {
@@ -144,6 +152,7 @@
   }
 
   @Benchmark
+  @BenchmarkMode(Mode.SingleShotTime)
   public void write1MRowsDefaultBlockAndPageSizeGZIP()
           throws IOException
   {
diff --git a/parquet-benchmarks/run_checksums.sh b/parquet-benchmarks/src/main/resources/log4j.properties
old mode 100755
new mode 100644
similarity index 67%
rename from parquet-benchmarks/run_checksums.sh
rename to parquet-benchmarks/src/main/resources/log4j.properties
index e798488..f4737c8
--- a/parquet-benchmarks/run_checksums.sh
+++ b/parquet-benchmarks/src/main/resources/log4j.properties
@@ -1,4 +1,3 @@
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,12 +16,9 @@
 # under the License.
 #
 
-# !/usr/bin/env bash
+log4j.rootLogger=INFO, stdout
 
-SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
-
-echo "Page level CRC checksum benchmarks"
-echo "Running write benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*PageChecksumWriteBenchmarks -bm ss "$@"
-echo "Running read benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*PageChecksumReadBenchmarks -bm ss "$@"
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m [%C]%n