be/src/exec/parquet/parquet-plain-test.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <stdio.h>
 #include <stdlib.h>
 #include <algorithm>
 #include <iostream>
 #include "exec/parquet/parquet-common.h"
 #include "runtime/decimal-value.h"
 #include "runtime/string-value.inline.h"
 #include "runtime/timestamp-value.h"
 #include "testutil/gtest-util.h"
 #include "testutil/random-vector-generators.h"
 #include "testutil/rand-util.h"

 #include "common/names.h"

 namespace impala {

 template <typename InternalType>
 int Encode(const InternalType& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type physical_type){
   return ParquetPlainEncoder::Encode(v, encoded_byte_size, buffer);
 }

 // Handle special case of encoding decimal types stored as BYTE_ARRAY, INT32, and INT64,
 // since these are not implemented in Impala.
 // When parquet_type equals BYTE_ARRAY: 'encoded_byte_size' is the sum of the
 // minimum number of bytes required to store the unscaled value and the bytes required to
 // store the size. Value 'v' passed to it should not contain leading zeros as this
 // method does not strictly conform to the parquet spec in removing those.
 // When parquet_type is INT32 or INT64, we simply write the unscaled value to the buffer.
 template <typename DecimalType>
 int EncodeDecimal(const DecimalType& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type parquet_type) {
   if (parquet_type == parquet::Type::FIXED_LEN_BYTE_ARRAY) {
     return ParquetPlainEncoder::Encode(v, encoded_byte_size, buffer);
   } else if (parquet_type == parquet::Type::BYTE_ARRAY) {
     int decimal_size = encoded_byte_size - sizeof(int32_t);
     memcpy(buffer, &decimal_size, sizeof(int32_t));
     DecimalUtil::EncodeToFixedLenByteArray(buffer + sizeof(int32_t), decimal_size, v);
     return encoded_byte_size;
   } else if (parquet_type == parquet::Type::INT32 ||
              parquet_type == parquet::Type::INT64) {
     return ParquetPlainEncoder::Encode(v.value(), encoded_byte_size, buffer);
   }
   return -1;
 }

 template<>
 int Encode(const Decimal4Value& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type parquet_type) {
   return EncodeDecimal(v, encoded_byte_size, buffer, parquet_type);
 }

 template<>
 int Encode(const Decimal8Value& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type parquet_type) {
   return EncodeDecimal(v, encoded_byte_size, buffer, parquet_type);
 }

 template<>
 int Encode(const Decimal16Value& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type parquet_type){
   return EncodeDecimal(v, encoded_byte_size, buffer, parquet_type);
 }

 /// Test that the decoder fails when asked to decode a truncated value.
 /// This function can be used for type widening tests but also tests without type
 /// widening, in which case `WidenInternalType` is the same as `InternalType`.
 template <typename InternalType, typename WidenInternalType,
     parquet::Type::type PARQUET_TYPE>
 void TestTruncate(const InternalType& v, int expected_byte_size) {
   uint8_t buffer[expected_byte_size];
   int encoded_size = Encode(v, expected_byte_size, buffer, PARQUET_TYPE);
   EXPECT_EQ(encoded_size, expected_byte_size);

   // Check all possible truncations of the buffer.
   for (int truncated_size = encoded_size - 1; truncated_size >= 0; --truncated_size) {
     WidenInternalType result;
     /// Copy to heap-allocated buffer so that ASAN can detect buffer overruns.
     uint8_t* truncated_buffer = new uint8_t[truncated_size];
     memcpy(truncated_buffer, buffer, truncated_size);
     int decoded_size = ParquetPlainEncoder::Decode<WidenInternalType, PARQUET_TYPE>(
         truncated_buffer, truncated_buffer + truncated_size, expected_byte_size, &result);
     EXPECT_EQ(-1, decoded_size);
     delete[] truncated_buffer;
   }
 }

 /// This function can be used for type widening tests but also tests without type
 /// widening, in which case `WidenInternalType` is the same as `InternalType`.
 template <typename InternalType, typename WidenInternalType,
     parquet::Type::type PARQUET_TYPE>
 void TestTypeWidening(const InternalType& v, int expected_byte_size) {
   uint8_t buffer[expected_byte_size];
   int encoded_size = Encode(v, expected_byte_size, buffer, PARQUET_TYPE);
   EXPECT_EQ(encoded_size, expected_byte_size);

   WidenInternalType result;
   int decoded_size = ParquetPlainEncoder::Decode<WidenInternalType, PARQUET_TYPE>(
       buffer, buffer + expected_byte_size, expected_byte_size, &result);
   EXPECT_EQ(expected_byte_size, decoded_size);
   EXPECT_EQ(v, result);

   WidenInternalType batch_result;
   int batch_decoded_size
       = ParquetPlainEncoder::DecodeBatch<WidenInternalType, PARQUET_TYPE>(
           buffer, buffer + expected_byte_size, expected_byte_size, 1,
           sizeof(WidenInternalType), &batch_result);
   EXPECT_EQ(expected_byte_size, batch_decoded_size);
   EXPECT_EQ(v, batch_result);

   TestTruncate<InternalType, WidenInternalType, PARQUET_TYPE>(
       v, expected_byte_size);
 }

 template <typename InternalType, parquet::Type::type PARQUET_TYPE>
 void TestType(const InternalType& v, int expected_byte_size) {
   return TestTypeWidening<InternalType, InternalType, PARQUET_TYPE>(
       v, expected_byte_size);
 }

 TEST(PlainEncoding, Basic) {
   int8_t i8 = 12;
   int16_t i16 = 123;
   int32_t i32 = 1234;
   int64_t i64 = 12345;
   float f = 1.23;
   double d = 1.23456;
   StringValue sv("Hello");
   TimestampValue tv;

   TestType<int8_t, parquet::Type::INT32>(i8, sizeof(int32_t));
   TestType<int16_t, parquet::Type::INT32>(i16, sizeof(int32_t));
   TestType<int32_t, parquet::Type::INT32>(i32, sizeof(int32_t));
   TestType<int64_t, parquet::Type::INT64>(i64, sizeof(int64_t));
   TestType<float, parquet::Type::FLOAT>(f, sizeof(float));
   TestType<double, parquet::Type::DOUBLE>(d, sizeof(double));
   TestType<StringValue, parquet::Type::BYTE_ARRAY>(sv, sizeof(int32_t) + sv.len);
   TestType<TimestampValue, parquet::Type::INT96>(tv, 12);

   // Test type widening.
   TestTypeWidening<int32_t, int64_t, parquet::Type::INT32>(i32, sizeof(int32_t));
   TestTypeWidening<int32_t, double, parquet::Type::INT32>(i32, sizeof(int32_t));
   TestTypeWidening<float, double, parquet::Type::FLOAT>(f, sizeof(float));

   int test_val = 1234;
   int var_len_decimal_size = sizeof(int32_t)
       + 2 /*min bytes required for storing test_val*/;
   // Decimal4Value: General test case
   TestType<Decimal4Value, parquet::Type::BYTE_ARRAY>(Decimal4Value(test_val),
       var_len_decimal_size);
   TestType<Decimal4Value, parquet::Type::BYTE_ARRAY>(Decimal4Value(test_val * -1),
       var_len_decimal_size);
   TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal4Value(test_val),
       sizeof(Decimal4Value));
   TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal4Value(test_val * -1), sizeof(Decimal4Value));
   TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(test_val),
       sizeof(int32_t));
   TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(test_val * -1),
       sizeof(int32_t));

   // Decimal8Value: General test case
   TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(test_val),
       var_len_decimal_size);
   TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(test_val * -1),
       var_len_decimal_size);
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal8Value(test_val),
       sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal8Value(test_val * -1), sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(test_val),
       sizeof(int64_t));
   TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(test_val * -1),
       sizeof(int64_t));

   // Decimal16Value: General test case
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(test_val),
       var_len_decimal_size);
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(test_val * -1),
       var_len_decimal_size);
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>( Decimal16Value(test_val),
       sizeof(Decimal16Value));
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal16Value(test_val * -1), sizeof(Decimal16Value));

   // Decimal8Value: int32 limits test
   TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(
       Decimal8Value(std::numeric_limits<int32_t>::max()),
       sizeof(int32_t) + sizeof(int32_t));
   TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(
       Decimal8Value(std::numeric_limits<int32_t>::min()),
       sizeof(int32_t) + sizeof(int32_t));
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal8Value(std::numeric_limits<int32_t>::max()), sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal8Value(std::numeric_limits<int32_t>::min()), sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::INT64>(
       Decimal8Value(std::numeric_limits<int32_t>::max()), sizeof(int64_t));
   TestType<Decimal8Value, parquet::Type::INT64>(
       Decimal8Value(std::numeric_limits<int32_t>::min()), sizeof(int64_t));

   // Decimal16Value: int32 limits test
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int32_t>::max()),
       sizeof(int32_t) + sizeof(int32_t));
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int32_t>::min()),
       sizeof(int32_t) + sizeof(int32_t));
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int32_t>::max()), sizeof(Decimal16Value));
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int32_t>::min()), sizeof(Decimal16Value));

   // Decimal16Value: int64 limits test
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int64_t>::max()),
       sizeof(int32_t) + sizeof(int64_t));
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int64_t>::min()),
       sizeof(int32_t) + sizeof(int64_t));
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int64_t>::max()), sizeof(Decimal16Value));
   TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal16Value(std::numeric_limits<int64_t>::min()), sizeof(Decimal16Value));

   // two digit values can be encoded with any byte size.
   for (int i = 1; i <=16; ++i) {
     if (i <= 4) {
       TestType<Decimal4Value, parquet::Type::BYTE_ARRAY>(Decimal4Value(i),
           i + sizeof(int32_t));
       TestType<Decimal4Value, parquet::Type::BYTE_ARRAY>(Decimal4Value(-i),
           i + sizeof(int32_t));
       TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal4Value(i), i);
       TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal4Value(-i), i);
       TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(i), sizeof(int32_t));
       TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(-i), sizeof(int32_t));
     }
     if (i <= 8) {
       TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(i),
           i + sizeof(int32_t));
       TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(-i),
           i + sizeof(int32_t));
       TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal8Value(i), i);
       TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal8Value(-i), i);
       TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(i), sizeof(int64_t));
       TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(-i), sizeof(int64_t));
     }
     TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(i),
         i + sizeof(int32_t));
     TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(-i),
         i + sizeof(int32_t));
     TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal16Value(i), i);
     TestType<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal16Value(-i), i);
   }
 }

 template <typename InputType, typename OutputType>
 void ExpectEqualWithStride(const std::vector<InputType>& input,
     const std::vector<uint8_t>& output, int stride) {
   ASSERT_EQ(input.size() * stride, output.size());

   for (int i = 0; i < input.size(); i++) {
     const InputType& input_value = input[i];
     OutputType output_value;

     memcpy(&output_value, &output[i * stride], sizeof(OutputType));
     EXPECT_EQ(input_value, output_value);
   }
 }

 /// This function can be used for type widening tests but also tests without type
 /// widening, in which case `WidenInternalType` is the same as `InternalType`.
 template <typename InternalType, typename WidenInternalType,
          parquet::Type::type PARQUET_TYPE>
 void TestTypeWideningBatch(const std::vector<InternalType>& values,
     int expected_byte_size, int stride) {
   ASSERT_GE(stride, sizeof(WidenInternalType));

   constexpr bool var_length = PARQUET_TYPE == parquet::Type::BYTE_ARRAY;

   std::vector<uint8_t> buffer(values.size() * expected_byte_size, 0);
   uint8_t* output_pos = buffer.data();
   for (int i = 0; i < values.size(); i++) {
     int encoded_size = Encode(values[i], expected_byte_size, output_pos, PARQUET_TYPE);
     if (var_length) {
       /// For variable length types, the size is variable and `expected_byte_size` should
       /// be the maximum.
       EXPECT_GE(expected_byte_size, encoded_size);
     } else {
       EXPECT_EQ(expected_byte_size, encoded_size);
     }

     output_pos += encoded_size;
   }

   /// Decode one by one.
   std::vector<uint8_t> output_1by1(values.size() * stride);
   uint8_t* input_pos = buffer.data();
   for (int i = 0; i < values.size(); i++) {
     WidenInternalType* dest = reinterpret_cast<WidenInternalType*>(
         &output_1by1[i * stride]);
     int decoded_size = ParquetPlainEncoder::Decode<WidenInternalType, PARQUET_TYPE>(
         input_pos, buffer.data() + buffer.size(), expected_byte_size, dest);
     if (var_length) {
       EXPECT_GE(expected_byte_size, decoded_size);
     } else {
       EXPECT_EQ(expected_byte_size, decoded_size);
     }

     input_pos += decoded_size;
   }

   ExpectEqualWithStride<InternalType, WidenInternalType>(values, output_1by1, stride);

   /// Decode in batch.
   std::vector<uint8_t> output_batch(values.size() * stride);
   int decoded_size = ParquetPlainEncoder::DecodeBatch<WidenInternalType, PARQUET_TYPE>(
       buffer.data(), buffer.data() + buffer.size(), expected_byte_size, values.size(),
       stride, reinterpret_cast<WidenInternalType*>(output_batch.data()));
   if (var_length) {
     EXPECT_GE(buffer.size(), decoded_size);
   } else {
     EXPECT_EQ(buffer.size(), decoded_size);
   }

   ExpectEqualWithStride<InternalType, WidenInternalType>(values, output_batch, stride);
 }

 template <typename InternalType, parquet::Type::type PARQUET_TYPE>
 void TestTypeBatch(const std::vector<InternalType>& values, int expected_byte_size,
     int stride) {
   return TestTypeWideningBatch<InternalType, InternalType, PARQUET_TYPE>(values,
       expected_byte_size, stride);
 }

 TEST(PlainEncoding, Batch) {
   std::mt19937 gen;
   RandTestUtil::SeedRng("PARQUET_PLAIN_ENCODING_TEST_RANDOM_SEED", &gen);

   constexpr int NUM_ELEMENTS = 1024 * 5 + 10;
   constexpr int stride = 20;

   const std::vector<int8_t> int8_vec = RandomNumberVec<int8_t>(gen, NUM_ELEMENTS);
   TestTypeBatch<int8_t, parquet::Type::INT32>(int8_vec, sizeof(int32_t), stride);

   const std::vector<int16_t> int16_vec = RandomNumberVec<int16_t>(gen, NUM_ELEMENTS);
   TestTypeBatch<int16_t, parquet::Type::INT32>(int16_vec, sizeof(int32_t), stride);

   const std::vector<int32_t> int32_vec = RandomNumberVec<int32_t>(gen, NUM_ELEMENTS);
   TestTypeBatch<int32_t, parquet::Type::INT32>(int32_vec, sizeof(int32_t), stride);

   const std::vector<int64_t> int64_vec = RandomNumberVec<int64_t>(gen, NUM_ELEMENTS);
   TestTypeBatch<int64_t, parquet::Type::INT64>(int64_vec, sizeof(int64_t), stride);

   const std::vector<float> float_vec = RandomNumberVec<float>(gen, NUM_ELEMENTS);
   TestTypeBatch<float, parquet::Type::FLOAT>(float_vec, sizeof(float), stride);

   const std::vector<double> double_vec = RandomNumberVec<double>(gen, NUM_ELEMENTS);
   TestTypeBatch<double, parquet::Type::DOUBLE>(double_vec, sizeof(double), stride);

   constexpr int max_str_length = 100;
   const std::vector<std::string> str_vec = RandomStrVec(gen, NUM_ELEMENTS,
       max_str_length);
   std::vector<StringValue> sv_vec(str_vec.size());
   std::transform(str_vec.begin(), str_vec.end(), sv_vec.begin(),
       [] (const std::string& s) { return StringValue(s); });
   TestTypeBatch<StringValue, parquet::Type::BYTE_ARRAY>(sv_vec,
       sizeof(int32_t) + max_str_length, stride);

   const std::vector<TimestampValue> tv_vec = RandomTimestampVec(gen, NUM_ELEMENTS);
   TestTypeBatch<TimestampValue, parquet::Type::INT96>(tv_vec, 12, stride);

   // Test type widening.
   TestTypeWideningBatch<int32_t, int64_t, parquet::Type::INT32>(int32_vec,
       sizeof(int32_t), stride);
   TestTypeWideningBatch<int32_t, double, parquet::Type::INT32>(int32_vec, sizeof(int32_t),
       stride);
   TestTypeWideningBatch<float, double, parquet::Type::FLOAT>(float_vec, sizeof(float),
       stride);

   // In the Decimal batch tests, when writing the decimals as BYTE_ARRAYs, we always use
   // the size of the underlying type as the array length for simplicity.
   // The non-batch tests take care of storing them on as many bytes as needed.

   // BYTE_ARRAYs store the length of the array on 4 bytes.
   constexpr int decimal_size_bytes = sizeof(int32_t);

   // Decimal4Value
   std::vector<Decimal4Value> decimal4_vec(int32_vec.size());
   std::transform(int32_vec.begin(), int32_vec.end(), decimal4_vec.begin(),
       [] (const int32_t i) { return Decimal4Value(i); });

   TestTypeBatch<Decimal4Value, parquet::Type::BYTE_ARRAY>(decimal4_vec,
       decimal_size_bytes + sizeof(Decimal4Value::StorageType), stride);
   TestTypeBatch<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(decimal4_vec,
       sizeof(Decimal4Value::StorageType), stride);
   TestTypeBatch<Decimal4Value, parquet::Type::INT32>(decimal4_vec,
       sizeof(Decimal4Value::StorageType), stride);

   // Decimal8Value
   std::vector<Decimal8Value> decimal8_vec(int64_vec.size());
   std::transform(int64_vec.begin(), int64_vec.end(), decimal8_vec.begin(),
       [] (const int64_t i) { return Decimal8Value(i); });

   TestTypeBatch<Decimal8Value, parquet::Type::BYTE_ARRAY>(decimal8_vec,
       decimal_size_bytes + sizeof(Decimal8Value::StorageType), stride);
   TestTypeBatch<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(decimal8_vec,
       sizeof(Decimal8Value::StorageType), stride);
   TestTypeBatch<Decimal8Value, parquet::Type::INT64>(decimal8_vec,
       sizeof(Decimal8Value::StorageType), stride);

   // Decimal16Value
   // We do not test the whole 16 byte range as generating random int128_t values is
   // complicated.
   std::vector<Decimal16Value> decimal16_vec(int64_vec.size());
   std::transform(int64_vec.begin(), int64_vec.end(), decimal16_vec.begin(),
       [] (const int64_t i) { return Decimal16Value(i); });

   TestTypeBatch<Decimal16Value, parquet::Type::BYTE_ARRAY>(decimal16_vec,
       decimal_size_bytes + sizeof(Decimal16Value::StorageType), stride);
   TestTypeBatch<Decimal16Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(decimal16_vec,
       sizeof(Decimal16Value::StorageType), stride);
 }

 TEST(PlainEncoding, DecimalBigEndian) {
   // Test Basic can pass if we make the same error in encode and decode.
   // Verify the bytes are actually big endian.
   uint8_t buffer[] = {
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
   };

   // Manually generate this to avoid potential bugs in BitUtil
   uint8_t buffer_swapped[] = {
     15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
   };
   uint8_t result_buffer[16];

   Decimal4Value d4;
   Decimal8Value d8;
   Decimal16Value d16;

   memcpy(&d4, buffer, sizeof(d4));
   memcpy(&d8, buffer, sizeof(d8));
   memcpy(&d16, buffer, sizeof(d16));

   int size = ParquetPlainEncoder::Encode(d4, sizeof(d4), result_buffer);
   ASSERT_EQ(size, sizeof(d4));
   ASSERT_EQ(memcmp(result_buffer, buffer_swapped + 16 - sizeof(d4), sizeof(d4)), 0);

   size = ParquetPlainEncoder::Encode(d8, sizeof(d8), result_buffer);
   ASSERT_EQ(size, sizeof(d8));
   ASSERT_EQ(memcmp(result_buffer, buffer_swapped + 16 - sizeof(d8), sizeof(d8)), 0);

   size = ParquetPlainEncoder::Encode(d16, sizeof(d16), result_buffer);
   ASSERT_EQ(size, sizeof(d16));
   ASSERT_EQ(memcmp(result_buffer, buffer_swapped + 16 - sizeof(d16), sizeof(d16)), 0);
 }

 /// Test that corrupt strings are handled correctly.
 TEST(PlainEncoding, CorruptString) {
   // Test string with negative length.
   uint8_t buffer[sizeof(int32_t) + 10];
   int32_t len = -10;
   memcpy(buffer, &len, sizeof(int32_t));

   StringValue result;
   int decoded_size = ParquetPlainEncoder::Decode<StringValue, parquet::Type::BYTE_ARRAY>(
       buffer, buffer + sizeof(buffer), 0, &result);
   EXPECT_EQ(decoded_size, -1);
 }

 }