Add threshold tuning
diff --git a/optimize_cost.py b/optimize_cost.py
new file mode 100644
index 0000000..5b15328
--- /dev/null
+++ b/optimize_cost.py
@@ -0,0 +1,18 @@
+from math import ceil
+
+PACKED_VALUES = 504
+def find_inflection(bitwidth):
+  rle_cost = float(16 + (ceil(bitwidth/8.0)*8) + (PACKED_VALUES * bitwidth))
+  bp_cost = float(8 + (bitwidth * PACKED_VALUES))
+  cpb_bp = bp_cost / PACKED_VALUES
+
+  repeats = 2
+  while(True):
+    cpb_rle = (rle_cost / (PACKED_VALUES + repeats))
+    if cpb_rle < cpb_bp: break
+    repeats += 1
+  print "Bitwidth: %s, repeats %s" % (bitwidth, repeats)
+
+for bitwidth in xrange(1, 33):
+  find_inflection(bitwidth)
+
diff --git a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
index 9c93135..674efe8 100644
--- a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
+++ b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
@@ -34,6 +34,16 @@
 public class RunLengthBitPackingHybridEncoder {
   private static final Log LOG = Log.getLog(RunLengthBitPackingHybridEncoder.class);
 
+  /**
+   * These represent how many repeats are needed to use RLE.
+   * These values are optimal when we expect to frequently
+   * see 504 values (the max bit-packed-run length) without switching
+   * to RLE. These thresholds could be lowered if we expect repeats
+   * to occur more frequently. Everything with bitWidth > 12 however
+   * should use RLE whenever it can.
+   */
+  private static final int[] RLE_THRESHOLDS = new int[] {16, 8, 6, 4, 4, 3, 3, 2, 3, 3, 3};
+
   private final BytePacker packer;
 
   private final CapacityByteArrayOutputStream baos;
@@ -45,6 +55,13 @@
   private final int bitWidth;
 
   /**
+   * How many times a value needs to repeat in order
+   * to use run length encoding instead of bit packing.
+   * Changes based on bitWidth.
+   */
+  private final int rleThreshold;
+
+  /**
    * Values that are bit packed 8 at at a time are packed into this
    * buffer, which is then written to baos
    */
@@ -89,7 +106,7 @@
 
   private boolean toBytesCalled;
 
-  public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity) {
+  public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity, int rleThreshold) {
     if (DEBUG) {
       LOG.debug(String.format("Encoding: RunLengthBitPackingHybridEncoder with "
         + "bithWidth: %d initialCapacity %d", bitWidth, initialCapacity));
@@ -102,9 +119,22 @@
     this.packBuffer = new byte[bitWidth];
     this.bufferedValues = new int[8];
     this.packer = ByteBitPacking.getPacker(bitWidth);
+    this.rleThreshold = rleThreshold;
+
     reset(false);
   }
 
+  public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity) {
+    this(bitWidth, initialCapacity, determineRleThreshold(bitWidth));
+  }
+
+  private static int determineRleThreshold(int bitWidth) {
+    if (bitWidth - 1 < RLE_THRESHOLDS.length) {
+      return RLE_THRESHOLDS[bitWidth - 1];
+    }
+    return 2;
+  }
+
   private void reset(boolean resetBaos) {
     if (resetBaos) {
       this.baos.reset();
@@ -123,7 +153,7 @@
       // consecutively
       ++repeatCount;
 
-      if (repeatCount >= 8) {
+      if (repeatCount >= rleThreshold) {
         // we've seen this at least 8 times, we're
         // certainly going to write an rle-run,
         // so just keep on counting repeats for now
@@ -132,7 +162,7 @@
     } else {
       // This is a new value, check if it signals the end of
       // an rle-run
-      if (repeatCount >= 8) {
+      if (repeatCount >= rleThreshold) {
         // it does! write an rle-run
         writeRleRun();
       }
@@ -227,7 +257,7 @@
     Preconditions.checkArgument(!toBytesCalled,
         "You cannot call toBytes() more than once without calling reset()");
 
-    if (repeatCount >= 8) {
+    if (repeatCount >= rleThreshold) {
       writeRleRun();
     } else if(numBufferedValues > 0) {
       for (int i = numBufferedValues; i < 8; i++) {