be/src/util/rle-test.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <stdlib.h>
 #include <stdio.h>
 #include <iostream>

 #include <boost/utility.hpp>
 #include <math.h>
 #include <random>

 #include "testutil/gtest-util.h"
 #include "testutil/rand-util.h"
 #include "util/bit-packing.inline.h"
 #include "util/bit-stream-utils.h"
 #include "util/encoding-test-util.h"
 #include "util/rle-encoding.h"

 #include "common/names.h"

 namespace impala {

 const int MAX_WIDTH = BatchedBitReader::MAX_BITWIDTH;

 class RleTest : public ::testing::Test {
  protected:

   /// Get many values from a batch RLE decoder using its low level functions.
   template <typename T>
   static bool GetRleValues(RleBatchDecoder<T>* decoder, int num_vals, T* vals) {
     int decoded = 0;
     // Decode repeated and literal runs until we've filled the output.
     while (decoded < num_vals) {
       if (decoder->NextNumRepeats() > 0) {
         EXPECT_EQ(0, decoder->NextNumLiterals());
         int num_repeats_to_output =
             min<int>(decoder->NextNumRepeats(), num_vals - decoded);
         T repeated_val = decoder->GetRepeatedValue(num_repeats_to_output);
         for (int i = 0; i < num_repeats_to_output; ++i) {
           *vals = repeated_val;
           ++vals;
         }
         decoded += num_repeats_to_output;
         continue;
       }
       int num_literals_to_output =
           min<int>(decoder->NextNumLiterals(), num_vals - decoded);
       if (num_literals_to_output == 0) return false;
       if (!decoder->GetLiteralValues(num_literals_to_output, vals)) return false;
       decoded += num_literals_to_output;
       vals += num_literals_to_output;
     }
     return true;
   }

   /// Get many values from a batch RLE decoder using its low level functions.
   template <typename T>
   static bool GetRleValuesSkip(RleBatchDecoder<T>* decoder, int num_vals, T* vals,
       int skip_at, int skip_count) {
     if (!GetRleValues(decoder, skip_at, vals)) return false;
     if (decoder->SkipValues(skip_count) != skip_count) return false;
     int consumed = skip_at + skip_count;
     if (!GetRleValues(decoder, num_vals - consumed, vals + skip_at)) return false;
     return true;
   }

   /// Get many values from a batch RLE decoder using its GetValues() function.
   template <typename T>
   static bool GetRleValuesBatched(RleBatchDecoder<T>* decoder, int num_vals, T* vals) {
     return num_vals == decoder->GetValues(num_vals, vals);
   }

   /// Get many values from a batch RLE decoder using its GetValues() function.
   template <typename T>
   static bool GetRleValuesBatchedSkip(RleBatchDecoder<T>* decoder, int num_vals, T* vals,
       int skip_at, int skip_count) {
     int cnt = 0;
     if (skip_at > 0) cnt += decoder->GetValues(skip_at, vals);
     if (decoder->SkipValues(skip_count) != skip_count) return false;
     cnt += skip_count;
     if (num_vals - cnt > 0) cnt += decoder->GetValues(num_vals - cnt, vals + skip_at);
     return cnt == num_vals;
   }

   // Validates encoding of values by encoding and decoding them.
   // If expected_encoding != NULL, validates that the encoded buffer is
   // exactly 'expected_encoding'.
   // if expected_len is not -1, validates that is is the same as the encoded size (in
   // bytes).
   int ValidateRle(const vector<int>& values, int bit_width, uint8_t* expected_encoding,
       int expected_len) {
     stringstream ss;
     ss << "bit_width=" << bit_width;
     const string& description = ss.str();
     const int len = 64 * 1024;
     uint8_t buffer[len];
     EXPECT_LE(expected_len, len);

     RleEncoder encoder(buffer, len, bit_width);

     int encoded_len = 0;
     for (int clear_count = 0; clear_count < 2; clear_count++) {
       if (clear_count >= 1) {
         // Check that we can reuse the encoder after calling Clear().
         encoder.Clear();
       }
       for (int i = 0; i < values.size(); ++i) {
         bool result = encoder.Put(values[i]);
         EXPECT_TRUE(result);
       }
       encoded_len = encoder.Flush();

       if (expected_len != -1) {
         EXPECT_EQ(encoded_len, expected_len);
       }
       if (expected_encoding != NULL) {
         EXPECT_TRUE(memcmp(buffer, expected_encoding, expected_len) == 0);
       }

       // Verify read.
       RleBatchDecoder<uint64_t> per_value_decoder(buffer, len, bit_width);
       RleBatchDecoder<uint64_t> per_run_decoder(buffer, len, bit_width);
       RleBatchDecoder<uint64_t> batch_decoder(buffer, len, bit_width);
       // Ensure it returns the same results after Reset().
       for (int trial = 0; trial < 2; ++trial) {
         for (int i = 0; i < values.size(); ++i) {
           uint64_t val;
           EXPECT_TRUE(per_value_decoder.GetSingleValue(&val)) << description;
           EXPECT_EQ(values[i], val) << description << " i=" << i << " trial=" << trial;
         }
         // Unpack everything at once from the other decoders.
         vector<uint64_t> decoded_values1(values.size());
         vector<uint64_t> decoded_values2(values.size());
         EXPECT_TRUE(GetRleValues(
             &per_run_decoder, decoded_values1.size(), decoded_values1.data()));
         EXPECT_TRUE(GetRleValuesBatched(
             &batch_decoder, decoded_values2.size(), decoded_values2.data()));
         for (int i = 0; i < values.size(); ++i) {
           EXPECT_EQ(values[i], decoded_values1[i]) << description << " i=" << i;
           EXPECT_EQ(values[i], decoded_values2[i]) << description << " i=" << i;
         }
         per_value_decoder.Reset(buffer, len, bit_width);
         per_run_decoder.Reset(buffer, len, bit_width);
         batch_decoder.Reset(buffer, len, bit_width);
       }
     }
     return encoded_len;
   }

   int ValidateRleSkip(
       const vector<int>& values, int bit_width, int skip_at, int skip_count) {
     stringstream ss;
     ss << "bit_width=" << bit_width
        << " skip_at=" << skip_at
        << " skip_count=" << skip_count
        << " values.size()=" << values.size();
     const string& description = ss.str();
     const int len = 64 * 1024;
     uint8_t buffer[len];

     RleEncoder encoder(buffer, len, bit_width);
     int encoded_len = 0;

     for (int i = 0; i < values.size(); ++i) {
       bool result = encoder.Put(values[i]);
       EXPECT_TRUE(result);
     }
     encoded_len = encoder.Flush();

     vector<int> expected_values(values.begin(), values.begin() + skip_at);
     for (int i = skip_at + skip_count; i < values.size(); ++i) {
       expected_values.push_back(values[i]);
     }

     // Verify read.
     RleBatchDecoder<uint64_t> per_value_decoder(buffer, len, bit_width);
     RleBatchDecoder<uint64_t> per_run_decoder(buffer, len, bit_width);
     RleBatchDecoder<uint64_t> batch_decoder(buffer, len, bit_width);
     // Ensure it returns the same results after Reset().
     for (int trial = 0; trial < 2; ++trial) {
       for (int i = 0; i < skip_at; ++i) {
         uint64_t val;
         EXPECT_TRUE(per_value_decoder.GetSingleValue(&val)) << description;
         EXPECT_EQ(expected_values[i], val) << description << " i=" << i << " trial="
             << trial;
       }
       per_value_decoder.SkipValues(skip_count);
       for (int i = skip_at; i < expected_values.size(); ++i) {
               uint64_t val;
               EXPECT_TRUE(per_value_decoder.GetSingleValue(&val)) << description;
               EXPECT_EQ(expected_values[i], val) << description << " i=" << i << " trial="
                   << trial;
       }
       // Unpack everything at once from the other decoders.
       vector<uint64_t> decoded_values1(expected_values.size());
       vector<uint64_t> decoded_values2(expected_values.size());
       EXPECT_TRUE(
           GetRleValuesSkip(&per_run_decoder, values.size(),
               decoded_values1.data(), skip_at, skip_count)) << description;
       EXPECT_TRUE(
           GetRleValuesBatchedSkip(&batch_decoder, values.size(),
               decoded_values2.data(), skip_at, skip_count)) << description;
       for (int i = 0; i < expected_values.size(); ++i) {
         EXPECT_EQ(expected_values[i], decoded_values1[i]) << description << " i=" << i;
         EXPECT_EQ(expected_values[i], decoded_values2[i]) << description << " i=" << i;
       }
       per_value_decoder.Reset(buffer, len, bit_width);
       per_run_decoder.Reset(buffer, len, bit_width);
       batch_decoder.Reset(buffer, len, bit_width);
     }
     return encoded_len;
   }

   // ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value
   // is used, otherwise alternating values are used.
   void TestRleValues(int bit_width, int num_vals, int value = -1) {
     const int64_t mod = (bit_width == 64) ? 1 : 1LL << bit_width;
     vector<int> values;
     for (int v = 0; v < num_vals; ++v) {
       values.push_back((value != -1) ? value : (v % mod));
     }
     ValidateRle(values, bit_width, NULL, -1);
   }

   /// Returns the total number of bytes written when encoding the boolean values passed.
   int RleBooleanLength(const vector<int>& values) {
     return ValidateRle(values, 1, nullptr, -1);
   }

   /// Make a sequence of values.
   /// intitial_literal_length: the length of an initial literal sequence.
   /// repeated_length: the length of a repeated sequence.
   /// trailing_literal_length: the length of a closing literal sequence.
   /// bit_width: the bit length of the values being used.
   vector<int>& MakeSequenceBitWidth(vector<int>& values, int intitial_literal_length,
       int repeated_length, int trailing_literal_length, int bit_width) {
     const int64_t mod = 1LL << bit_width;
     values.clear();
     for (int i = 0; i < intitial_literal_length; ++i) {
       values.push_back(i % mod);
     }
     for (int i = 0; i < repeated_length; ++i) {
       values.push_back(1);
     }
     for (int i = 0; i < trailing_literal_length; ++i) {
       values.push_back(i % mod);
     }
     return values;
   }

   /// Same as above with bit width being 1.
   vector<int>& MakeSequence(vector<int>& values, int intitial_literal_length,
       int repeated_length, int trailing_literal_length) {
     return MakeSequenceBitWidth(values, intitial_literal_length, repeated_length,
         trailing_literal_length, 1);
   }
 };

 /// Basic test case for literal unpacking - two literals in a run.
 TEST_F(RleTest, TwoLiteralRun) {
   vector<int> values{1, 0};
   ValidateRle(values, 1, nullptr, -1);
   for (int width = 1; width <= MAX_WIDTH; ++width) {
     ValidateRle(values, width, nullptr, -1);
     }
 }

 TEST_F(RleTest, ValueSkipping) {
   vector<int> seq;
     for (int bit_width : {1, 3, 7, 8, 20, 32}) {
       MakeSequenceBitWidth(seq, 100, 100, 100, bit_width);
       ValidateRleSkip(seq, bit_width, 0, 7);
       ValidateRleSkip(seq, bit_width, 0, 64);
       ValidateRleSkip(seq, bit_width, 0, 75);
       ValidateRleSkip(seq, bit_width, 0, 100);
       ValidateRleSkip(seq, bit_width, 0, 105);
       ValidateRleSkip(seq, bit_width, 0, 155);
       ValidateRleSkip(seq, bit_width, 0, 200);
       ValidateRleSkip(seq, bit_width, 0, 213);
       ValidateRleSkip(seq, bit_width, 0, 267);
       ValidateRleSkip(seq, bit_width, 0, 300);
       ValidateRleSkip(seq, bit_width, 7, 7);
       ValidateRleSkip(seq, bit_width, 35, 64);
       ValidateRleSkip(seq, bit_width, 55, 75);
       ValidateRleSkip(seq, bit_width, 99, 100);
       ValidateRleSkip(seq, bit_width, 100, 11);
       ValidateRleSkip(seq, bit_width, 101, 55);
       ValidateRleSkip(seq, bit_width, 102, 155);
       ValidateRleSkip(seq, bit_width, 104, 17);
       ValidateRleSkip(seq, bit_width, 122, 178);
       ValidateRleSkip(seq, bit_width, 200, 3);
       ValidateRleSkip(seq, bit_width, 200, 65);
       ValidateRleSkip(seq, bit_width, 203, 17);
       ValidateRleSkip(seq, bit_width, 215, 70);
       ValidateRleSkip(seq, bit_width, 217, 83);
   }
 }

 // Tests value-skipping on randomly generated input and
 // random skipping positions and counts.
 TEST_F(RleTest, ValueSkippingFuzzy) {
   const int bitwidth_iteration = 10;
   const int probe_iteration = 100;
   const int total_sequence_length = 2048;

   std::default_random_engine random_eng;
   RandTestUtil::SeedRng("RLE_TEST_SEED", &random_eng);

   // Generates random number between 'bottom' and 'top' (inclusive intervals).
   auto GetRandom = [&random_eng](int bottom, int top) {
     std::uniform_int_distribution<int> uni_dist(bottom, top);
     return uni_dist(random_eng);
   };

     for (int i = 0; i < bitwidth_iteration; ++i) {
       int bit_width = GetRandom(1, 32);
       int max_run_length = GetRandom(5, 200);
       vector<int> seq = MakeRandomSequence(random_eng, total_sequence_length,
           max_run_length, bit_width);
       for (int j = 0; j < probe_iteration; ++j) {
         int skip_at = GetRandom(0, seq.size() - 1);
         int skip_count = GetRandom(1, seq.size() - skip_at);
         ValidateRleSkip(seq, bit_width, skip_at, skip_count);
       }
   }
 }

 TEST_F(RleTest, SpecificSequences) {
   const int len = 1024;
   uint8_t expected_buffer[len];
   vector<int> values;

   // Test 50 0' followed by 50 1's
   values.resize(100);
   for (int i = 0; i < 50; ++i) {
     values[i] = 0;
   }
   for (int i = 50; i < 100; ++i) {
     values[i] = 1;
   }

   // expected_buffer valid for bit width <= 1 byte
   expected_buffer[0] = (50 << 1);
   expected_buffer[1] = 0;
   expected_buffer[2] = (50 << 1);
   expected_buffer[3] = 1;
     for (int width = 1; width <= 8; ++width) {
       ValidateRle(values, width, expected_buffer, 4);
     }

     for (int width = 9; width <= MAX_WIDTH; ++width) {
       ValidateRle(values, width, NULL, 2 * (1 + BitUtil::Ceil(width, 8)));
     }

   // Test 100 0's and 1's alternating
   for (int i = 0; i < 100; ++i) {
     values[i] = i % 2;
   }
   int num_groups = BitUtil::Ceil(100, 8);
   expected_buffer[0] = static_cast<uint8_t>((num_groups << 1) | 1);
   for (int i = 1; i <= 100/8; ++i) {
     expected_buffer[i] = BOOST_BINARY(1 0 1 0 1 0 1 0);
   }
   // Values for the last 4 0 and 1's. The upper 4 bits should be padded to 0.
   expected_buffer[100/8 + 1] = BOOST_BINARY(0 0 0 0 1 0 1 0);

   // num_groups and expected_buffer only valid for bit width = 1
   ValidateRle(values, 1, expected_buffer, 1 + num_groups);
   for (int width = 2; width <= MAX_WIDTH; ++width) {
     int num_values = BitUtil::Ceil(100, 8) * 8;
     ValidateRle(values, width, NULL, 1 + BitUtil::Ceil(width * num_values, 8));
   }


 }

 TEST_F(RleTest, TestValues) {
   for (int width = 1; width <= MAX_WIDTH; ++width) {
     TestRleValues(width, 1);
     TestRleValues(width, 1024);
     TestRleValues(width, 1024, 0);
     TestRleValues(width, 1024, 1);
   }
 }

 TEST_F(RleTest, BitWidthZeroRepeated) {
   uint8_t buffer[1];
   const int num_values = 15;
   buffer[0] = num_values << 1; // repeated indicator byte
   RleBatchDecoder<uint8_t> decoder(buffer, sizeof(buffer), 0);
   // Ensure it returns the same results after Reset().
   for (int trial = 0; trial < 2; ++trial) {
     uint8_t val;
     for (int i = 0; i < num_values; ++i) {
       EXPECT_TRUE(decoder.GetSingleValue(&val));
       EXPECT_EQ(val, 0);
     }
     EXPECT_FALSE(decoder.GetSingleValue(&val));

     // Test decoding all values in a batch.
     decoder.Reset(buffer, sizeof(buffer), 0);
     uint8_t decoded_values[num_values];
     EXPECT_TRUE(GetRleValues(&decoder, num_values, decoded_values));
     for (int i = 0; i < num_values; i++) EXPECT_EQ(0, decoded_values[i]) << i;
     EXPECT_FALSE(decoder.GetSingleValue(&val));
     decoder.Reset(buffer, sizeof(buffer), 0);
   }
 }

 TEST_F(RleTest, BitWidthZeroLiteral) {
   uint8_t buffer[1];
   const int num_groups = 4;
   buffer[0] = num_groups << 1 | 1; // literal indicator byte
   RleBatchDecoder<uint8_t> decoder(buffer, sizeof(buffer), 0);
   // Ensure it returns the same results after Reset().
   for (int trial = 0; trial < 2; ++trial) {
     const int num_values = num_groups * 8;
     uint8_t val;
     for (int i = 0; i < num_values; ++i) {
       EXPECT_TRUE(decoder.GetSingleValue(&val));
       EXPECT_EQ(val, 0); // can only encode 0s with bit width 0
     }

     // Test decoding the whole batch at once.
     decoder.Reset(buffer, sizeof(buffer), 0);
     uint8_t decoded_values[num_values];
     EXPECT_TRUE(GetRleValues(&decoder, num_values, decoded_values));
     for (int i = 0; i < num_values; ++i) EXPECT_EQ(0, decoded_values[i]);

     EXPECT_FALSE(GetRleValues(&decoder, 1, decoded_values));
     decoder.Reset(buffer, sizeof(buffer), 0);
   }
 }

 // Test that writes out a repeated group and then a literal
 // group but flush before finishing.
 TEST_F(RleTest, Flush) {
     vector<int> values;
     for (int i = 0; i < 16; ++i) values.push_back(1);
     values.push_back(0);
     ValidateRle(values, 1, NULL, -1);

     values.push_back(1);
     ValidateRle(values, 1, NULL, -1);
 }

 // Test some random sequences.
 TEST_F(RleTest, Random) {
   int iters = 0;
   while (iters < 1000) {
     srand(static_cast<unsigned int>(iters++));
     if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
     vector<int> values;
     bool parity = 0;
     for (int i = 0; i < 1000; ++i) {
       int group_size = rand() % 20 + 1;
       if (group_size > 16) {
         group_size = 1;
       }
       for (int j = 0; j < group_size; ++j) {
         values.push_back(parity);
       }
       parity = !parity;
     }
     ValidateRle(values, (iters % MAX_WIDTH) + 1, NULL, -1);
   }
 }

 // Test a sequence of 1 0's, 2 1's, 3 0's. etc
 // e.g. 011000111100000
 TEST_F(RleTest, RepeatedPattern) {
   vector<int> values;
   const int min_run = 1;
   const int max_run = 32;

   for (int i = min_run; i <= max_run; ++i) {
     int v = i % 2;
     for (int j = 0; j < i; ++j) {
       values.push_back(v);
     }
   }

   // And go back down again
   for (int i = max_run; i >= min_run; --i) {
     int v = i % 2;
     for (int j = 0; j < i; ++j) {
       values.push_back(v);
     }
   }
   ValidateRle(values, 1, NULL, -1);
 }

 TEST_F(RleTest, Overflow) {
     for (int bit_width = 1; bit_width < 32; bit_width += 1) {
       for (int pad_buffer = 0; pad_buffer < 64; pad_buffer++) {
         const int len = RleEncoder::MinBufferSize(bit_width) + pad_buffer;
         uint8_t buffer[len];
         int num_added = 0;
         bool parity = true;

         RleEncoder encoder(buffer, len, bit_width);
         // Insert alternating true/false until there is no space left.
         while (true) {
           bool result = encoder.Put(static_cast<uint64_t>(parity));
           parity = !parity;
           if (!result) break;
           ++num_added;
         }

         int bytes_written = encoder.Flush();
         EXPECT_LE(bytes_written, len);
         EXPECT_GT(num_added, 0);

         RleBatchDecoder<uint32_t> decoder(buffer, bytes_written, bit_width);
         // Ensure it returns the same results after Reset().
         for (int trial = 0; trial < 2; ++trial) {
           parity = true;
           uint32_t v;
           for (int i = 0; i < num_added; ++i) {
             EXPECT_TRUE(decoder.GetSingleValue(&v));
             EXPECT_EQ(v, parity);
             parity = !parity;
           }
           // Make sure we get false when reading past end a couple times.
           EXPECT_FALSE(decoder.GetSingleValue(&v));
           EXPECT_FALSE(decoder.GetSingleValue(&v));

           decoder.Reset(buffer, bytes_written, bit_width);
           uint32_t decoded_values[num_added];
           EXPECT_TRUE(GetRleValues(&decoder, num_added, decoded_values));
           for (int i = 0; i < num_added; ++i)
             EXPECT_EQ(i % 2 == 0, decoded_values[i]) << i;

           decoder.Reset(buffer, bytes_written, bit_width);
         }
     }
   }
 }

 /// Construct data sequences in the vector values for bit_widths of 1 and 2, such that
 /// encoding using runs results in the encoding occupying more space than if the sequences
 /// had been encoded using literal values.
 void AddPathologicalValues(vector<int>& values, int bit_width) {
   EXPECT_LE(bit_width, 2);
   // Using the notation of 'RXX' for a repeated run of length XX (so R16 is a
   // run of length 16), and 'LYY' for a literal run of length YY.
   // For bit_width=1 the sequence (L8 R16 L8 R16 L8) is encoded as following
   // for different values of min_run_length.
   //                                  L8  R16 L8  R16 L8
   // min_run_length 8  (old default)  2   2   2   2   2
   // min_run_length 16 (new default)  2   2   2   2   2
   // min_run_length 24                2   2   1   2   1 (one long literal run)
   //
   // So it would have been better not to use rle for this sequence.
   // Note that min_run_length=24 is not *always* better.
   for (int i = 0; i < 8; i++) {
     // A literal sequence.
     values.push_back(i % 2);
   }
   // For bit_width = 2, a run of 8 values is needed.
   // For bit_width = 1, a run of 16 values is needed.
   int length_run = 16 / bit_width;
   // For small amounts of data, the value returned by MaxBufferSize is dominated by
   // MinBufferSize. Add enough data that this is not true.
   for (int runs = 0; runs < 200; runs++) {
     for (int i = 0; i < length_run; i++) {
       // A sequence that can be encoded as a run.
       values.push_back(1);
     }
     for (int i = 0; i < 8; i++) {
       // A literal sequence.
       values.push_back(i % 2);
     }
   }
 }

 // Tests handling of a specific data corruption scenario where
 // the literal or repeat count is decoded as 0 (which is invalid).
 TEST_F(RleTest, ZeroLiteralOrRepeatCount) {
   const int len = 1024;
   uint8_t buffer[len];
   RleBatchDecoder<uint64_t> decoder(buffer, len, 0);
   // Test the RLE repeated values path.
   memset(buffer, 0, len);
   for (int i = 0; i < 10; ++i) {
     EXPECT_EQ(0, decoder.NextNumLiterals());
     EXPECT_EQ(0, decoder.NextNumRepeats());
   }

   // Test the RLE literal values path
   memset(buffer, 1, len);
   decoder.Reset(buffer, len, 0);
   for (int i = 0; i < 10; ++i) {
     EXPECT_EQ(0, decoder.NextNumLiterals());
     EXPECT_EQ(0, decoder.NextNumRepeats());
   }
 }

 // Regression test for handing of repeat counts >= 2^31: IMPALA-6946.
 TEST_F(RleTest, RepeatCountOverflow) {
   const int BUFFER_LEN = 1024;
   uint8_t buffer[BUFFER_LEN];

   for (bool literal_run : {true, false}) {
     memset(buffer, 0, BUFFER_LEN);
     LOG(INFO) << "Testing negative " << (literal_run ? "literal" : "repeated");
     BitWriter writer(buffer, BUFFER_LEN);
     // Literal runs have lowest bit 1. Repeated runs have lowest bit 0. All other bits
     // are 1.
     const uint32_t REPEATED_RUN_HEADER = 0xfffffffe;
     const uint32_t LITERAL_RUN_HEADER = 0xffffffff;
     writer.PutUleb128<uint32_t>(literal_run ? LITERAL_RUN_HEADER : REPEATED_RUN_HEADER);
     writer.Flush();

     RleBatchDecoder<uint64_t> decoder(buffer, BUFFER_LEN, 1);
     // Repeated run length fits in an int32_t.
     if (literal_run) {
       EXPECT_EQ(0, decoder.NextNumRepeats()) << "Not a repeated run";
       // Literal run length would overflow int32_t - should gracefully fail decoding.
       EXPECT_EQ(0, decoder.NextNumLiterals());
     } else {
       EXPECT_EQ(0x7fffffff, decoder.NextNumRepeats());
       EXPECT_EQ(0, decoder.NextNumLiterals()) << "Not a literal run";
     }

     // IMPALA-6946: reading back run lengths that don't fit in int32_t hit various
     // DCHECKs.
     uint64_t val;
     if (literal_run) {
       EXPECT_EQ(0, decoder.GetValues(1, &val)) << "Decoding failed above.";
     } else {
       EXPECT_EQ(1, decoder.GetValues(1, &val));
       EXPECT_EQ(0, val) << "Buffer was initialized with all zeroes";
     }
   }
 }

 /// Test that encoded lengths are as expected as min_repeated_run_length varies.
 TEST_F(RleTest, MeasureOutputLengths) {
   vector<int> values;
   // With min_repeated_run_length = 8, a sequence of 8 is inefficient.
   EXPECT_EQ(12, RleBooleanLength(MakeSequence(values, 32, 8, 32)));
   EXPECT_EQ(12, RleBooleanLength(MakeSequence(values, 32, 16, 32)));
   EXPECT_EQ(12, RleBooleanLength(MakeSequence(values, 32, 24, 32)));
   EXPECT_EQ(12, RleBooleanLength(MakeSequence(values, 32, 32, 32)));
 }
 }