hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java - hbase - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with this
  * work for additional information regarding copyright ownership. The ASF
  * licenses this file to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  * License for the specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.hadoop.hbase.regionserver;

 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;

 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.compress.CompressionOutputStream;
 import org.apache.hadoop.io.compress.Compressor;
 import org.apache.hadoop.io.compress.Decompressor;

 /**
  * Tests various algorithms for key compression on an existing HFile. Useful
  * for testing, debugging and benchmarking.
  */
 public class DataBlockEncodingTool {
   private static final Log LOG = LogFactory.getLog(
       DataBlockEncodingTool.class);

   private static final boolean includesMemstoreTS = true;

   /**
    * How many times to run the benchmark. More times means better data in terms
    * of statistics but slower execution. Has to be strictly larger than
    * {@link DEFAULT_BENCHMARK_N_OMIT}.
    */
   private static final int DEFAULT_BENCHMARK_N_TIMES = 12;

   /**
    * How many first runs should not be included in the benchmark. Done in order
    * to exclude setup cost.
    */
   private static final int DEFAULT_BENCHMARK_N_OMIT = 2;

   /** HFile name to be used in benchmark */
   private static final String OPT_HFILE_NAME = "f";

   /** Maximum number of key/value pairs to process in a single benchmark run */
   private static final String OPT_KV_LIMIT = "n";

   /** Whether to run a benchmark to measure read throughput */
   private static final String OPT_MEASURE_THROUGHPUT = "b";

   /** If this is specified, no correctness testing will be done */
   private static final String OPT_OMIT_CORRECTNESS_TEST = "c";

   /** What encoding algorithm to test */
   private static final String OPT_ENCODING_ALGORITHM = "a";

   /** Number of times to run each benchmark */
   private static final String OPT_BENCHMARK_N_TIMES = "t";

   /** Number of first runs of every benchmark to omit from statistics */
   private static final String OPT_BENCHMARK_N_OMIT = "omit";

   /** Compression algorithm to use if not specified on the command line */
   private static final Algorithm DEFAULT_COMPRESSION =
       Compression.Algorithm.GZ;

   private static final DecimalFormat DELIMITED_DECIMAL_FORMAT =
       new DecimalFormat();

   static {
     DELIMITED_DECIMAL_FORMAT.setGroupingSize(3);
   }

   private static final String PCT_FORMAT = "%.2f %%";
   private static final String INT_FORMAT = "%d";

   private static int benchmarkNTimes = DEFAULT_BENCHMARK_N_TIMES;
   private static int benchmarkNOmit = DEFAULT_BENCHMARK_N_OMIT;

   private List<EncodedDataBlock> codecs = new ArrayList<EncodedDataBlock>();
   private long totalPrefixLength = 0;
   private long totalKeyLength = 0;
   private long totalValueLength = 0;
   private long totalKeyRedundancyLength = 0;
   private long totalCFLength = 0;

   private byte[] rawKVs;
   private boolean useHBaseChecksum = false;

   private final String compressionAlgorithmName;
   private final Algorithm compressionAlgorithm;
   private final Compressor compressor;
   private final Decompressor decompressor;

   private static enum Manipulation {
     ENCODING,
     DECODING,
     COMPRESSION,
     DECOMPRESSION;

     @Override
     public String toString() {
       String s = super.toString();
       StringBuilder sb = new StringBuilder();
       sb.append(s.charAt(0));
       sb.append(s.substring(1).toLowerCase(Locale.ROOT));
       return sb.toString();
     }
   }

   /**
    * @param compressionAlgorithmName What kind of algorithm should be used
    *                                 as baseline for comparison (e.g. lzo, gz).
    */
   public DataBlockEncodingTool(String compressionAlgorithmName) {
     this.compressionAlgorithmName = compressionAlgorithmName;
     this.compressionAlgorithm = Compression.getCompressionAlgorithmByName(
         compressionAlgorithmName);
     this.compressor = this.compressionAlgorithm.getCompressor();
     this.decompressor = this.compressionAlgorithm.getDecompressor();
   }

   /**
    * Check statistics for given HFile for different data block encoders.
    * @param scanner Of file which will be compressed.
    * @param kvLimit Maximal count of KeyValue which will be processed.
    * @throws IOException thrown if scanner is invalid
    */
   public void checkStatistics(final KeyValueScanner scanner, final int kvLimit)
       throws IOException {
     scanner.seek(KeyValue.LOWESTKEY);

     KeyValue currentKV;

     byte[] previousKey = null;
     byte[] currentKey;

     DataBlockEncoding[] encodings = DataBlockEncoding.values();

     ByteArrayOutputStream uncompressedOutputStream =
         new ByteArrayOutputStream();

     int j = 0;
     while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
       // Iterates through key/value pairs
       j++;
       currentKey = currentKV.getKey();
       if (previousKey != null) {
         for (int i = 0; i < previousKey.length && i < currentKey.length &&
             previousKey[i] == currentKey[i]; ++i) {
           totalKeyRedundancyLength++;
         }
       }

       uncompressedOutputStream.write(currentKV.getBuffer(),
           currentKV.getOffset(), currentKV.getLength());

       previousKey = currentKey;

       int kLen = currentKV.getKeyLength();
       int vLen = currentKV.getValueLength();
       int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
       int restLen = currentKV.getLength() - kLen - vLen;

       totalKeyLength += kLen;
       totalValueLength += vLen;
       totalPrefixLength += restLen;
       totalCFLength += cfLen;
     }

     rawKVs = uncompressedOutputStream.toByteArray();
     boolean useTag = (currentKV.getTagsLength() > 0);
     for (DataBlockEncoding encoding : encodings) {
       if (encoding == DataBlockEncoding.NONE) {
         continue;
       }
       DataBlockEncoder d = encoding.getEncoder();
       HFileContext meta = new HFileContextBuilder()
                           .withCompression(Compression.Algorithm.NONE)
                           .withIncludesMvcc(includesMemstoreTS)
                           .withIncludesTags(useTag).build();
       codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta ));
     }
   }

   /**
    * Verify if all data block encoders are working properly.
    *
    * @param scanner Of file which was compressed.
    * @param kvLimit Maximal count of KeyValue which will be processed.
    * @return true if all data block encoders compressed/decompressed correctly.
    * @throws IOException thrown if scanner is invalid
    */
   public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit)
       throws IOException {
     KeyValue currentKv;

     scanner.seek(KeyValue.LOWESTKEY);
     List<Iterator<Cell>> codecIterators =
         new ArrayList<Iterator<Cell>>();
     for(EncodedDataBlock codec : codecs) {
       codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)));
     }

     int j = 0;
     while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
       // Iterates through key/value pairs
       ++j;
       for (Iterator<Cell> it : codecIterators) {
         Cell c = it.next();
         KeyValue codecKv = KeyValueUtil.ensureKeyValue(c);
         if (codecKv == null || 0 != Bytes.compareTo(
             codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(),
             currentKv.getBuffer(), currentKv.getOffset(),
             currentKv.getLength())) {
           if (codecKv == null) {
             LOG.error("There is a bug in codec " + it +
                 " it returned null KeyValue,");
           } else {
             int prefix = 0;
             int limitLength = 2 * Bytes.SIZEOF_INT +
                 Math.min(codecKv.getLength(), currentKv.getLength());
             while (prefix < limitLength &&
                 codecKv.getBuffer()[prefix + codecKv.getOffset()] ==
                 currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
               prefix++;
             }

             LOG.error("There is bug in codec " + it.toString() +
                 "\n on element " + j +
                 "\n codecKv.getKeyLength() " + codecKv.getKeyLength() +
                 "\n codecKv.getValueLength() " + codecKv.getValueLength() +
                 "\n codecKv.getLength() " + codecKv.getLength() +
                 "\n currentKv.getKeyLength() " + currentKv.getKeyLength() +
                 "\n currentKv.getValueLength() " + currentKv.getValueLength() +
                 "\n codecKv.getLength() " + currentKv.getLength() +
                 "\n currentKV rowLength " + currentKv.getRowLength() +
                 " familyName " + currentKv.getFamilyLength() +
                 " qualifier " + currentKv.getQualifierLength() +
                 "\n prefix " + prefix +
                 "\n codecKv   '" + Bytes.toStringBinary(codecKv.getBuffer(),
                     codecKv.getOffset(), prefix) + "' diff '" +
                     Bytes.toStringBinary(codecKv.getBuffer(),
                         codecKv.getOffset() + prefix, codecKv.getLength() -
                         prefix) + "'" +
                 "\n currentKv '" + Bytes.toStringBinary(
                    currentKv.getBuffer(),
                    currentKv.getOffset(), prefix) + "' diff '" +
                    Bytes.toStringBinary(currentKv.getBuffer(),
                        currentKv.getOffset() + prefix, currentKv.getLength() -
                        prefix) + "'"
                 );
           }
           return false;
         }
       }
     }

     LOG.info("Verification was successful!");

     return true;
   }

   /**
    * Benchmark codec's speed.
    */
   public void benchmarkCodecs() throws IOException {
     LOG.info("Starting a throughput benchmark for data block encoding codecs");
     int prevTotalSize = -1;
     for (EncodedDataBlock codec : codecs) {
       prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
     }

     benchmarkDefaultCompression(prevTotalSize, rawKVs);
   }

   /**
    * Benchmark compression/decompression throughput.
    * @param previousTotalSize Total size used for verification. Use -1 if
    *          unknown.
    * @param codec Tested encoder.
    * @return Size of uncompressed data.
    */
   private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) {
     int prevTotalSize = previousTotalSize;
     int totalSize = 0;

     // decompression time
     List<Long> durations = new ArrayList<Long>();
     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
       totalSize = 0;

       Iterator<Cell> it;

       it = codec.getIterator(HFileBlock.headerSize(useHBaseChecksum));

       // count only the algorithm time, without memory allocations
       // (expect first time)
       final long startTime = System.nanoTime();
       while (it.hasNext()) {
         totalSize += KeyValueUtil.ensureKeyValue(it.next()).getLength();
       }
       final long finishTime = System.nanoTime();
       if (itTime >= benchmarkNOmit) {
         durations.add(finishTime - startTime);
       }

       if (prevTotalSize != -1 && prevTotalSize != totalSize) {
         throw new IllegalStateException(String.format(
             "Algorithm '%s' decoded data to different size", codec.toString()));
       }
       prevTotalSize = totalSize;
     }

     List<Long> encodingDurations = new ArrayList<Long>();
     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
       final long startTime = System.nanoTime();
       codec.encodeData();
       final long finishTime = System.nanoTime();
       if (itTime >= benchmarkNOmit) {
         encodingDurations.add(finishTime - startTime);
       }
     }

     System.out.println(codec.toString() + ":");
     printBenchmarkResult(totalSize, encodingDurations, Manipulation.ENCODING);
     printBenchmarkResult(totalSize, durations, Manipulation.DECODING);
     System.out.println();

     return prevTotalSize;
   }

   private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer)
       throws IOException {
     benchmarkAlgorithm(compressionAlgorithm,
         compressionAlgorithmName.toUpperCase(Locale.ROOT), rawBuffer, 0, totalSize);
   }

   /**
    * Check decompress performance of a given algorithm and print it.
    * @param algorithm Compression algorithm.
    * @param name Name of algorithm.
    * @param buffer Buffer to be compressed.
    * @param offset Position of the beginning of the data.
    * @param length Length of data in buffer.
    * @throws IOException
    */
   public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name,
       byte[] buffer, int offset, int length) throws IOException {
     System.out.println(name + ":");

     // compress it
     List<Long> compressDurations = new ArrayList<Long>();
     ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
     CompressionOutputStream compressingStream =
         algorithm.createPlainCompressionStream(compressedStream, compressor);
     try {
       for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
         final long startTime = System.nanoTime();
         compressingStream.resetState();
         compressedStream.reset();
         compressingStream.write(buffer, offset, length);
         compressingStream.flush();
         compressedStream.toByteArray();

         final long finishTime = System.nanoTime();

         // add time record
         if (itTime >= benchmarkNOmit) {
           compressDurations.add(finishTime - startTime);
         }
       }
     } catch (IOException e) {
       throw new RuntimeException(String.format(
           "Benchmark, or encoding algorithm '%s' cause some stream problems",
           name), e);
     }
     compressingStream.close();
     printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION);

     byte[] compBuffer = compressedStream.toByteArray();

     // uncompress it several times and measure performance
     List<Long> durations = new ArrayList<Long>();
     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
       final long startTime = System.nanoTime();
       byte[] newBuf = new byte[length + 1];

       try {
         ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer,
             0, compBuffer.length);
         InputStream decompressedStream = algorithm.createDecompressionStream(
             downStream, decompressor, 0);

         int destOffset = 0;
         int nextChunk;
         while ((nextChunk = decompressedStream.available()) > 0) {
           destOffset += decompressedStream.read(newBuf, destOffset, nextChunk);
         }
         decompressedStream.close();

         // iterate over KeyValues
         KeyValue kv;
         for (int pos = 0; pos < length; pos += kv.getLength()) {
           kv = new KeyValue(newBuf, pos);
         }

       } catch (IOException e) {
         throw new RuntimeException(String.format(
             "Decoding path in '%s' algorithm cause exception ", name), e);
       }

       final long finishTime = System.nanoTime();

       // check correctness
       if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {
         int prefix = 0;
         for(; prefix < buffer.length && prefix < newBuf.length; ++prefix) {
           if (buffer[prefix] != newBuf[prefix]) {
             break;
           }
         }
         throw new RuntimeException(String.format(
             "Algorithm '%s' is corrupting the data", name));
       }

       // add time record
       if (itTime >= benchmarkNOmit) {
         durations.add(finishTime - startTime);
       }
     }
     printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION);
     System.out.println();
   }

   private static final double BYTES_IN_MB = 1024 * 1024.0;
   private static final double NS_IN_SEC = 1000.0 * 1000.0 * 1000.0;
   private static final double MB_SEC_COEF = NS_IN_SEC / BYTES_IN_MB;

   private static void printBenchmarkResult(int totalSize,
       List<Long> durationsInNanoSec, Manipulation manipulation) {
     final int n = durationsInNanoSec.size();
     long meanTime = 0;
     for (long time : durationsInNanoSec) {
       meanTime += time;
     }
     meanTime /= n;

     double meanMBPerSec = totalSize * MB_SEC_COEF / meanTime;
     double mbPerSecSTD = 0;
     if (n > 0) {
       for (long time : durationsInNanoSec) {
         double mbPerSec = totalSize * MB_SEC_COEF / time;
         double dev = mbPerSec - meanMBPerSec;
         mbPerSecSTD += dev * dev;
       }
       mbPerSecSTD = Math.sqrt(mbPerSecSTD / n);
     }

     outputTuple(manipulation + " performance", "%6.2f MB/s (+/- %.2f MB/s)",
          meanMBPerSec, mbPerSecSTD);
   }

   private static void outputTuple(String caption, String format,
       Object... values) {
     if (format.startsWith(INT_FORMAT)) {
       format = "%s" + format.substring(INT_FORMAT.length());
       values[0] = DELIMITED_DECIMAL_FORMAT.format(values[0]);
     }

     StringBuilder sb = new StringBuilder();
     sb.append("  ");
     sb.append(caption);
     sb.append(":");

     String v = String.format(format, values);
     int padding = 60 - sb.length() - v.length();
     for (int i = 0; i < padding; ++i) {
       sb.append(' ');
     }
     sb.append(v);
     System.out.println(sb);
   }

   /**
    * Display statistics of different compression algorithms.
    * @throws IOException
    */
   public void displayStatistics() throws IOException {
     final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT);
     long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;

     System.out.println("Raw data size:");
     outputTuple("Raw bytes", INT_FORMAT, rawBytes);
     outputTuplePct("Key bytes", totalKeyLength);
     outputTuplePct("Value bytes", totalValueLength);
     outputTuplePct("KV infrastructure", totalPrefixLength);
     outputTuplePct("CF overhead", totalCFLength);
     outputTuplePct("Total key redundancy", totalKeyRedundancyLength);

     int compressedSize = EncodedDataBlock.getCompressedSize(
         compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length);
     outputTuple(comprAlgo + " only size", INT_FORMAT,
         compressedSize);
     outputSavings(comprAlgo + " only", compressedSize, rawBytes);
     System.out.println();

     for (EncodedDataBlock codec : codecs) {
       System.out.println(codec.toString());
       long encodedBytes = codec.getSize();
       outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);
       outputSavings("Key encoding", encodedBytes - totalValueLength,
           rawBytes - totalValueLength);
       outputSavings("Total encoding", encodedBytes, rawBytes);

       int encodedCompressedSize = codec.getEncodedCompressedSize(
           compressionAlgorithm, compressor);
       outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT,
           encodedCompressedSize);
       outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);
       outputSavings("Encoding with " + comprAlgo, encodedCompressedSize,
           compressedSize);

       System.out.println();
     }
   }

   private void outputTuplePct(String caption, long size) {
     outputTuple(caption, INT_FORMAT + " (" + PCT_FORMAT + ")",
         size, size * 100.0 / rawKVs.length);
   }

   private void outputSavings(String caption, long part, long whole) {
     double pct = 100.0 * (1 - 1.0 * part / whole);
     double times = whole * 1.0 / part;
     outputTuple(caption + " savings", PCT_FORMAT + " (%.2f x)",
         pct, times);
   }

   /**
    * Test a data block encoder on the given HFile. Output results to console.
    * @param kvLimit The limit of KeyValue which will be analyzed.
    * @param hfilePath an HFile path on the file system.
    * @param compressionName Compression algorithm used for comparison.
    * @param doBenchmark Run performance benchmarks.
    * @param doVerify Verify correctness.
    * @throws IOException When pathName is incorrect.
    */
   public static void testCodecs(Configuration conf, int kvLimit,
       String hfilePath, String compressionName, boolean doBenchmark,
       boolean doVerify) throws IOException {
     // create environment
     Path path = new Path(hfilePath);
     CacheConfig cacheConf = new CacheConfig(conf);
     FileSystem fs = FileSystem.get(conf);
     StoreFile hsf = new StoreFile(fs, path, conf, cacheConf,
       BloomType.NONE);

     StoreFileReader reader = hsf.createReader();
     reader.loadFileInfo();
     KeyValueScanner scanner = reader.getStoreFileScanner(true, true, false, 0, 0, false);

     // run the utilities
     DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
     int majorVersion = reader.getHFileVersion();
     comp.useHBaseChecksum = majorVersion > 2 ||
       (majorVersion == 2 &&
        reader.getHFileMinorVersion() >= HFileReaderImpl.MINOR_VERSION_WITH_CHECKSUM);
     comp.checkStatistics(scanner, kvLimit);
     if (doVerify) {
       comp.verifyCodecs(scanner, kvLimit);
     }
     if (doBenchmark) {
       comp.benchmarkCodecs();
     }
     comp.displayStatistics();

     // cleanup
     scanner.close();
     reader.close(cacheConf.shouldEvictOnClose());
   }

   private static void printUsage(Options options) {
     System.err.println("Usage:");
     System.err.println(String.format("./hbase %s <options>",
         DataBlockEncodingTool.class.getName()));
     System.err.println("Options:");
     for (Object it : options.getOptions()) {
       Option opt = (Option) it;
       if (opt.hasArg()) {
         System.err.println(String.format("-%s %s: %s", opt.getOpt(),
             opt.getArgName(), opt.getDescription()));
       } else {
         System.err.println(String.format("-%s: %s", opt.getOpt(),
             opt.getDescription()));
       }
     }
   }

   /**
    * A command line interface to benchmarks. Parses command-line arguments and
    * runs the appropriate benchmarks.
    * @param args Should have length at least 1 and holds the file path to HFile.
    * @throws IOException If you specified the wrong file.
    */
   public static void main(final String[] args) throws IOException {
     // set up user arguments
     Options options = new Options();
     options.addOption(OPT_HFILE_NAME, true, "HFile to analyse (REQUIRED)");
     options.getOption(OPT_HFILE_NAME).setArgName("FILENAME");
     options.addOption(OPT_KV_LIMIT, true,
         "Maximum number of KeyValues to process. A benchmark stops running " +
         "after iterating over this many KV pairs.");
     options.getOption(OPT_KV_LIMIT).setArgName("NUMBER");
     options.addOption(OPT_MEASURE_THROUGHPUT, false,
         "Measure read throughput");
     options.addOption(OPT_OMIT_CORRECTNESS_TEST, false,
         "Omit corectness tests.");
     options.addOption(OPT_ENCODING_ALGORITHM, true,
         "What kind of compression algorithm use for comparison.");
     options.addOption(OPT_BENCHMARK_N_TIMES,
         true, "Number of times to run each benchmark. Default value: " +
             DEFAULT_BENCHMARK_N_TIMES);
     options.addOption(OPT_BENCHMARK_N_OMIT, true,
         "Number of first runs of every benchmark to exclude from "
             + "statistics (" + DEFAULT_BENCHMARK_N_OMIT
             + " by default, so that " + "only the last "
             + (DEFAULT_BENCHMARK_N_TIMES - DEFAULT_BENCHMARK_N_OMIT)
             + " times are included in statistics.)");

     // parse arguments
     CommandLineParser parser = new PosixParser();
     CommandLine cmd = null;
     try {
       cmd = parser.parse(options, args);
     } catch (ParseException e) {
       System.err.println("Could not parse arguments!");
       System.exit(-1);
       return; // avoid warning
     }

     int kvLimit = Integer.MAX_VALUE;
     if (cmd.hasOption(OPT_KV_LIMIT)) {
       kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));
     }

     // basic argument sanity checks
     if (!cmd.hasOption(OPT_HFILE_NAME)) {
       LOG.error("Please specify HFile name using the " + OPT_HFILE_NAME
           + " option");
       printUsage(options);
       System.exit(-1);
     }

     String pathName = cmd.getOptionValue(OPT_HFILE_NAME);
     String compressionName = DEFAULT_COMPRESSION.getName();
     if (cmd.hasOption(OPT_ENCODING_ALGORITHM)) {
       compressionName =
           cmd.getOptionValue(OPT_ENCODING_ALGORITHM).toLowerCase(Locale.ROOT);
     }
     boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);
     boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);

     if (cmd.hasOption(OPT_BENCHMARK_N_TIMES)) {
       benchmarkNTimes = Integer.valueOf(cmd.getOptionValue(
           OPT_BENCHMARK_N_TIMES));
     }
     if (cmd.hasOption(OPT_BENCHMARK_N_OMIT)) {
       benchmarkNOmit =
           Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_OMIT));
     }
     if (benchmarkNTimes < benchmarkNOmit) {
       LOG.error("The number of times to run each benchmark ("
           + benchmarkNTimes
           + ") must be greater than the number of benchmark runs to exclude "
           + "from statistics (" + benchmarkNOmit + ")");
       System.exit(1);
     }
     LOG.info("Running benchmark " + benchmarkNTimes + " times. " +
         "Excluding the first " + benchmarkNOmit + " times from statistics.");

     final Configuration conf = HBaseConfiguration.create();
     try {
       testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark,
           doVerify);
     } finally {
       (new CacheConfig(conf)).getBlockCache().shutdown();
     }
   }

 }