| /// Licensed to the Apache Software Foundation (ASF) under one |
| /// or more contributor license agreements. See the NOTICE file |
| /// distributed with this work for additional information |
| /// regarding copyright ownership. The ASF licenses this file |
| /// to you under the Apache License, Version 2.0 (the |
| /// "License"); you may not use this file except in compliance |
| /// with the License. You may obtain a copy of the License at |
| /// |
| /// http://www.apache.org/licenses/LICENSE-2.0 |
| /// |
| /// Unless required by applicable law or agreed to in writing, |
| /// software distributed under the License is distributed on an |
| /// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| /// KIND, either express or implied. See the License for the |
| /// specific language governing permissions and limitations |
| /// under the License. |
| |
| #include <vector> |
| |
| #include "gtest/gtest.h" |
| #include "parquet-byte-stream-split-coder-test-data.h" |
| #include "parquet-byte-stream-split-decoder.h" |
| #include "parquet-byte-stream-split-encoder.h" |
| |
| namespace impala { |
| |
| namespace { |
| void checkAllBytes(uint8_t* buffer, int buffer_len, uint8_t check_against) { |
| for (int i = 0; i < buffer_len; i++) { |
| EXPECT_EQ(check_against, *(buffer + i)); |
| } |
| } |
| |
| template <size_t T_SIZE> |
| ParquetByteStreamSplitDecoder<T_SIZE> createDecoder(int byte_size) { |
| if constexpr (T_SIZE != 0) { |
| EXPECT_EQ(byte_size, T_SIZE); |
| return ParquetByteStreamSplitDecoder<T_SIZE>(); |
| } else { |
| return ParquetByteStreamSplitDecoder<0>(byte_size); |
| } |
| } |
| |
| template <size_t T_SIZE> |
| ParquetByteStreamSplitEncoder<T_SIZE> createEncoder(int byte_size) { |
| if constexpr (T_SIZE != 0) { |
| EXPECT_EQ(byte_size, T_SIZE); |
| return ParquetByteStreamSplitEncoder<T_SIZE>(); |
| } else { |
| return ParquetByteStreamSplitEncoder<0>(byte_size); |
| } |
| } |
| |
| } // anonymous namespace |
| |
| // ---------------------------------- DECODER TESTS ----------------------------------- // |
| |
| class ParquetByteStreamSplitDecoderTest : public testing::Test { |
| protected: |
| ParquetByteStreamSplitDecoderTest() {} |
| |
| template <typename T> |
| static void initialization_test(const std::vector<uint8_t>& input) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| const int max_value_count = input.size() / byte_size; |
| |
| T single_output = 0; |
| std::vector<uint8_t> vec_output(max_value_count * byte_size); |
| |
| // As the decoder hasn't been initialized yet, there should be 0 values. |
| EXPECT_EQ(0, decoder.GetTotalValueCount()); |
| |
| // As the decoder hasn't been correctly initialized yet, there should be 0 values. |
| EXPECT_EQ(0, decoder.GetTotalValueCount()); |
| |
| // We should be able to initialize even with 0 values. |
| decoder.NewPage(input.data(), 0); |
| |
| // But shouldn't be able to read from it. |
| EXPECT_EQ(0, decoder.NextValue(&single_output)); |
| EXPECT_EQ(0, decoder.NextValues(1, vec_output.data(), byte_size)); |
| |
| // And it should still have 0 values. |
| EXPECT_EQ(0, decoder.GetTotalValueCount()); |
| |
| // This is a correct call. |
| // It also tests that after reassigning the decoder, it's updated correctly. |
| decoder.NewPage(input.data(), byte_size * max_value_count); |
| |
| // The decoder has finally been correctly initialized. |
| EXPECT_EQ(max_value_count, decoder.GetTotalValueCount()); |
| } |
| |
| template <typename T> |
| static void decode_singles( |
| const std::vector<uint8_t>& input, const std::vector<uint8_t>& expected) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| decoder.NewPage(input.data(), input.size()); |
| const int max_value_count = decoder.GetTotalValueCount(); |
| |
| T single_output = 0; |
| std::vector<T> expected_numerals(expected.size() / byte_size); |
| memcpy(expected_numerals.data(), expected.data(), expected.size()); |
| |
| for (int i = 0; i < max_value_count; i++) { |
| EXPECT_EQ(1, decoder.NextValue(&single_output)); |
| EXPECT_EQ(expected_numerals[i], single_output); |
| } |
| |
| // trying to overread |
| EXPECT_EQ(0, decoder.NextValue(&single_output)); |
| } |
| |
| template <size_t BYTE_SIZE> |
| static void decode_batch(const std::vector<uint8_t>& input, |
| const std::vector<uint8_t>& expected, const size_t runtime_byte_size) { |
| // The tests require at least 3 values in the input. |
| // This is to make sure that the overread tests are valid. |
| ASSERT_GE(input.size(), 3); |
| |
| ParquetByteStreamSplitDecoder<BYTE_SIZE> decoder = |
| createDecoder<BYTE_SIZE>(runtime_byte_size); |
| decoder.NewPage(input.data(), input.size()); |
| const int max_value_count = decoder.GetTotalValueCount(); |
| |
| std::vector<uint8_t> output(0, 0); |
| vector<uint8_t> expected_subset; |
| |
| EXPECT_EQ(0, decoder.NextValues(0, output.data(), runtime_byte_size)); |
| |
| // Read one value, then the remaining. |
| output.assign(max_value_count * runtime_byte_size, 0); |
| expected_subset.assign(expected.begin(), expected.begin() + runtime_byte_size); |
| expected_subset.resize(expected.size(), 0); |
| EXPECT_EQ(1, decoder.NextValues(1, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected_subset, output); |
| |
| output.assign(max_value_count * runtime_byte_size, 0); |
| expected_subset.assign(expected.begin() + 1 * runtime_byte_size, expected.end()); |
| expected_subset.resize(expected.size(), 0); |
| EXPECT_EQ(max_value_count - 1, // because we read 1 value already |
| decoder.NextValues(max_value_count - 1, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected_subset, output); |
| |
| // The page is depleted, we need to reset it. |
| decoder.NewPage(input.data(), input.size()); |
| |
| // Read two values, then overread the remaining. |
| output.assign(max_value_count * runtime_byte_size, 0); |
| expected_subset.assign(expected.begin(), expected.begin() + 2 * runtime_byte_size); |
| expected_subset.resize(expected.size(), 0); |
| EXPECT_EQ(2, decoder.NextValues(2, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected_subset, output); |
| |
| output.assign(max_value_count * runtime_byte_size, 0); |
| expected_subset.assign(expected.begin() + 2 * runtime_byte_size, expected.end()); |
| expected_subset.resize(expected.size(), 0); |
| EXPECT_EQ(max_value_count - 2, // because we read 2 values already |
| decoder.NextValues(max_value_count, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected_subset, output); |
| |
| // The page is depleted, we need to reset it. |
| decoder.NewPage(input.data(), input.size()); |
| |
| // Getting all values in page in one go. |
| output.assign(max_value_count * runtime_byte_size, 0); |
| EXPECT_EQ(max_value_count, |
| decoder.NextValues(max_value_count, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected, output); |
| |
| // Decoder page is depleted, try to overread. |
| output.assign(max_value_count * runtime_byte_size, 0); |
| EXPECT_EQ(0, decoder.NextValues(max_value_count, output.data(), runtime_byte_size)); |
| |
| // The page is depleted, we need to reset it. |
| decoder.NewPage(input.data(), input.size()); |
| |
| // Try to overread the entire page. |
| output.assign(max_value_count * runtime_byte_size, 0); |
| EXPECT_EQ(max_value_count, |
| decoder.NextValues(max_value_count + 1, output.data(), runtime_byte_size)); |
| EXPECT_EQ(expected, output); |
| } |
| |
| template <typename T> |
| static void decode_combined( |
| const std::vector<uint8_t>& input, const std::vector<uint8_t>& expected) { |
| // Decode the first value with `NextValue()` then the rest with `NextValues()` |
| |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| decoder.NewPage(input.data(), input.size()); |
| const int max_value_count = decoder.GetTotalValueCount(); |
| |
| std::vector<uint8_t> output(max_value_count * byte_size, 0); |
| |
| // Read the first value into output. |
| EXPECT_EQ(1, decoder.NextValue(reinterpret_cast<T*>(output.data()))); |
| EXPECT_EQ(expected[0], output[0]); |
| |
| // Read the rest of the values into output. |
| EXPECT_EQ(max_value_count - 1, |
| decoder.NextValues(max_value_count - 1, output.data() + byte_size, byte_size)); |
| EXPECT_EQ(expected, output); |
| |
| // Decode all values but the last with `NextValues()` then the last with `NextValue(). |
| |
| // The page is depleted, we need to reset it. |
| decoder.NewPage(input.data(), input.size()); |
| |
| // Read all values but the last into output. |
| EXPECT_EQ(max_value_count - 1, |
| decoder.NextValues(max_value_count - 1, output.data(), byte_size)); |
| EXPECT_EQ(1, |
| decoder.NextValue( |
| reinterpret_cast<T*>(output.data() + (max_value_count - 1) * byte_size))); |
| EXPECT_EQ(expected, output); |
| } |
| |
| template <typename T> |
| static void decode_with_stride(const std::vector<uint8_t>& input, |
| const std::vector<uint8_t>& expected, const size_t stride) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| decoder.NewPage(input.data(), input.size()); |
| const int max_value_count = decoder.GetTotalValueCount(); |
| |
| std::vector<uint8_t> output(max_value_count * stride, 0); |
| |
| // Read the all values into output. |
| EXPECT_EQ( |
| max_value_count, decoder.NextValues(max_value_count, output.data(), stride)); |
| |
| // Check that each value read is correct |
| for (int i = 0; i < max_value_count * byte_size; i++) { |
| EXPECT_EQ(expected[i], output[i / byte_size * stride + i % byte_size]); |
| } |
| |
| // Check that the "skipped" bytes are unchanged |
| for (int i = byte_size; i < max_value_count; i += stride) { |
| checkAllBytes(&output[i], stride - byte_size, 0); |
| } |
| } |
| |
| template <typename T> |
| static void skip( |
| const std::vector<uint8_t>& input, const std::vector<uint8_t>& expected) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| decoder.NewPage(input.data(), input.size()); |
| const int max_value_count = decoder.GetTotalValueCount(); |
| |
| std::vector<uint8_t> output(max_value_count * byte_size, 0); |
| std::vector<uint8_t> expected_subset; |
| |
| EXPECT_EQ(2, decoder.SkipValues(2)); |
| |
| output.assign(max_value_count * byte_size, 0); |
| expected_subset.assign(expected.begin() + 2 * byte_size, expected.end()); |
| expected_subset.resize(expected.size(), 0); |
| EXPECT_EQ(max_value_count - 2, // because we skipped 2 values already |
| decoder.NextValues(max_value_count - 2, output.data(), byte_size)); |
| EXPECT_EQ(expected_subset, output); |
| |
| // Reset page, then try to skip more values than there are |
| decoder.NewPage(input.data(), input.size()); |
| EXPECT_EQ( |
| expected.size() / byte_size, decoder.SkipValues(expected.size() / byte_size + 1)); |
| } |
| }; |
| |
| // -------------------- Basic Functionality Tests --------------------- // |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatBaseFuncTest) { |
| initialization_test<float>(float_encoded_200v); |
| }; |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32BaseFuncTest) { |
| initialization_test<int32_t>(int_encoded_200v); |
| }; |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleBaseFuncTest) { |
| initialization_test<double>(double_encoded_200v); |
| }; |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64BaseFuncTest) { |
| initialization_test<int64_t>(long_encoded_200v); |
| }; |
| |
| // -------------------- Basic Single Values Tests --------------------- // |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatDecodeSinglesTest) { |
| decode_singles<float>(float_encoded_200v, float_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32DecodeSinglesTest) { |
| decode_singles<int32_t>(int_encoded_200v, int_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleDecodeSinglesTest) { |
| decode_singles<double>(double_encoded_200v, double_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64DecodeSinglesTest) { |
| decode_singles<int64_t>(long_encoded_200v, long_decoded_200v); |
| } |
| |
| // --------------------- Decoding In Batch Tests ---------------------- // |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatDecodeBatchTest) { |
| decode_batch<sizeof(float)>(float_encoded_200v, float_decoded_200v, sizeof(float)); |
| decode_batch<0>(float_encoded_200v, float_decoded_200v, sizeof(float)); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32DecodeBatchTest) { |
| decode_batch<sizeof(int32_t)>(int_encoded_200v, int_decoded_200v, sizeof(int32_t)); |
| decode_batch<0>(int_encoded_200v, int_decoded_200v, sizeof(int32_t)); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleDecodeBatchTest) { |
| decode_batch<sizeof(double)>(double_encoded_200v, double_decoded_200v, sizeof(double)); |
| decode_batch<0>(double_encoded_200v, double_decoded_200v, sizeof(double)); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64DecodeBatchTest) { |
| decode_batch<sizeof(int64_t)>(long_encoded_200v, long_decoded_200v, sizeof(int64_t)); |
| decode_batch<0>(long_encoded_200v, long_decoded_200v, sizeof(int64_t)); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, VaryingSizeDecodeBatchTest) { |
| decode_batch<0>(fix3b_encoded_200v, fix3b_decoded_200v, 3); |
| decode_batch<0>(fix5b_encoded_200v, fix5b_decoded_200v, 5); |
| decode_batch<0>(fix7b_encoded_200v, fix7b_decoded_200v, 7); |
| decode_batch<0>(fix11b_encoded_200v, fix11b_decoded_200v, 11); |
| } |
| |
| // --------------------- Decoding Combined Tests ---------------------- // |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatDecodeCombinedTest) { |
| decode_combined<float>(float_encoded_200v, float_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32DecodeCombinedTest) { |
| decode_combined<int32_t>(int_encoded_200v, int_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleDecodeCombinedTest) { |
| decode_combined<double>(double_encoded_200v, double_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64DecodeCombinedTest) { |
| decode_combined<int64_t>(long_encoded_200v, long_decoded_200v); |
| } |
| |
| // -------------------- Decoding With Stride Tests -------------------- // |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatDecodeStrideTest) { |
| decode_with_stride<float>( |
| float_encoded_200v, float_decoded_200v, 2 * sizeof(float) + 2); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32DecodeStrideTest) { |
| decode_with_stride<int32_t>( |
| int_encoded_200v, int_decoded_200v, 2 * sizeof(int32_t) + 2); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleDecodeStrideTest) { |
| decode_with_stride<double>( |
| double_encoded_200v, double_decoded_200v, 2 * sizeof(double) + 2); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64DecodeStrideTest) { |
| decode_with_stride<int64_t>( |
| long_encoded_200v, long_decoded_200v, 2 * sizeof(int64_t) + 2); |
| } |
| |
| //--------------------Skip Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, FloatSkipTest) { |
| skip<float>(float_encoded_200v, float_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int32SkipTest) { |
| skip<int32_t>(int_encoded_200v, int_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, DoubleSkipTest) { |
| skip<double>(double_encoded_200v, double_decoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitDecoderTest, Int64SkipTest) { |
| skip<int64_t>(long_encoded_200v, long_decoded_200v); |
| } |
| |
| // ---------------------------------- ENCODER TESTS ----------------------------------- // |
| |
| class ParquetByteStreamSplitEncoderTest : public testing::Test { |
| protected: |
| ParquetByteStreamSplitEncoderTest() {} |
| |
| template <typename T> |
| static void initialization_test(const std::vector<T>& values) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitEncoder<byte_size> encoder; |
| const int max_value_count = values.size(); |
| std::vector<uint8_t> input(byte_size * max_value_count); |
| std::vector<uint8_t> output(byte_size * max_value_count); |
| |
| // should encode 0 values if the buffer size is 0 |
| encoder.NewPage(input.data(), 0); |
| EXPECT_EQ(0, encoder.FinalizePage(output.data(), 0)); |
| |
| // should encode 0 values if there was none put in |
| encoder.NewPage(input.data(), input.size()); |
| EXPECT_EQ(0, encoder.FinalizePage(output.data(), output.size())); |
| |
| // should encode as many values as successfully put in |
| encoder.NewPage(input.data(), input.size()); |
| EXPECT_TRUE(encoder.Put(values[0])); |
| EXPECT_TRUE(encoder.Put(values[0])); |
| EXPECT_TRUE(encoder.Put(values[0])); |
| EXPECT_EQ(3, encoder.FinalizePage(output.data(), output.size())); |
| |
| // everything should be reset, so this should be fine |
| encoder.NewPage(input.data(), input.size()); |
| EXPECT_TRUE(encoder.Put(values[0])); |
| EXPECT_EQ(1, encoder.FinalizePage(output.data(), output.size())); |
| } |
| |
| template <typename T> |
| static void put_and_finalize( |
| const std::vector<T>& values, const std::vector<uint8_t>& expected) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitEncoder<byte_size> encoder; |
| std::vector<uint8_t> input(byte_size * values.size()); |
| std::vector<uint8_t> output(byte_size * values.size()); |
| |
| // put just one value, finalize and check |
| encoder.NewPage(input.data(), input.size()); |
| EXPECT_TRUE(encoder.Put(values[0])); |
| EXPECT_EQ(1, encoder.FinalizePage(output.data(), output.size())); |
| // since we're only decoding one value, the output should be the same as the input |
| EXPECT_EQ(0, memcmp(values.data(), output.data(), sizeof(T))); |
| |
| // put 2 values (more than 1, but not filling up the buffer), finalize and check |
| encoder.NewPage(input.data(), input.size()); |
| if (values.size() >= 2) { |
| for (int i = 0; i < 2; i++) { |
| EXPECT_TRUE(encoder.Put(values[i])); |
| } |
| |
| // this is needed, because the expected vector has the values scattered |
| // here we gather the bytes of the first 2 values |
| std::vector<uint8_t> expected_subset; |
| for (int i = 0; i < sizeof(T); i++) { |
| expected_subset.push_back(expected[i * values.size()]); |
| expected_subset.push_back(expected[i * values.size() + 1]); |
| } |
| |
| EXPECT_EQ(2, encoder.FinalizePage(output.data(), output.size())); |
| EXPECT_EQ(0, memcmp(expected_subset.data(), output.data(), sizeof(T) * 2)); |
| } |
| |
| // put all, finalize and check |
| encoder.NewPage(input.data(), input.size()); |
| for (int i = 0; i < values.size(); i++) { |
| EXPECT_TRUE(encoder.Put(values[i])); |
| } |
| |
| // try to put more than there is space for |
| EXPECT_FALSE(encoder.Put(values[0])); |
| EXPECT_EQ(values.size(), encoder.FinalizePage(output.data(), output.size())); |
| |
| EXPECT_EQ(expected, output); |
| } |
| |
| template <typename T> |
| static void prepopulated_init_test( |
| const std::vector<T>& values, const std::vector<uint8_t>& expected) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitEncoder<byte_size> encoder; |
| std::vector<uint8_t> input(byte_size * values.size()); |
| std::vector<uint8_t> output(byte_size * values.size()); |
| |
| memcpy(input.data(), values.data(), byte_size * values.size()); |
| |
| encoder.NewPage(input.data(), input.size(), values.size()); |
| EXPECT_EQ(values.size(), encoder.FinalizePage(output.data(), output.size())); |
| EXPECT_EQ(0, memcmp(expected.data(), output.data(), byte_size * values.size())); |
| } |
| }; |
| |
| // -------------------- Basic Functionality Tests --------------------- // |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, FloatBasicFuncTest) { |
| initialization_test(float_values_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int32BasicFuncTest) { |
| initialization_test(int_values_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, DoubleBasicFuncTest) { |
| initialization_test(double_values_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int64BasicFuncTest) { |
| initialization_test(long_values_200v); |
| } |
| |
| // ---------------------- Put and Finalize Tests ---------------------- // |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, FloatPutFinalizeTest) { |
| put_and_finalize(float_values_200v, float_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int32PutFinalizeTest) { |
| put_and_finalize(int_values_200v, int_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, DoublePutFinalizeTest) { |
| put_and_finalize(double_values_200v, double_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int64PutFinalizeTest) { |
| put_and_finalize(long_values_200v, long_encoded_200v); |
| } |
| |
| // ------------------------ Prepopulated Tests ------------------------ // |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, FloatPrepopulatedTest) { |
| prepopulated_init_test(float_values_200v, float_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int32PrepopulatedTest) { |
| prepopulated_init_test(int_values_200v, int_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, DoublePrepopulatedTest) { |
| prepopulated_init_test(double_values_200v, double_encoded_200v); |
| } |
| |
| TEST_F(ParquetByteStreamSplitEncoderTest, Int64PrepopulatedTest) { |
| prepopulated_init_test(long_values_200v, long_encoded_200v); |
| } |
| |
| // --------------------------- TWO-DIRECTIONAL CODER TESTS ---------------------------- // |
| |
| class ParquetByteStreamSplitCoderTest : public testing::Test { |
| protected: |
| ParquetByteStreamSplitCoderTest() {} |
| |
| template <typename T> |
| static void encode_then_decode(const std::vector<T>& input, const int size) { |
| constexpr size_t byte_size = sizeof(T); |
| |
| ParquetByteStreamSplitEncoder<byte_size> encoder; |
| std::vector<uint8_t> encoder_buff(byte_size * size); |
| std::vector<uint8_t> encoded(byte_size * size); |
| encoder.NewPage(encoder_buff.data(), encoder_buff.size()); |
| for (int i = 0; i < size; i++) { |
| EXPECT_TRUE(encoder.Put(input[i])); |
| } |
| |
| EXPECT_EQ(size, encoder.FinalizePage(encoded.data(), encoded.size())); |
| |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| std::vector<T> result(size); |
| decoder.NewPage(encoded.data(), encoded.size()); |
| for (int i = 0; i < size; i++) { |
| EXPECT_EQ(1, decoder.NextValue(&result[i])); |
| } |
| |
| EXPECT_EQ(0, memcmp(input.data(), result.data(), size * byte_size)); |
| } |
| |
| template <int BYTE_SIZE> |
| static void encode_then_decode_in_batch(const std::vector<uint8_t>& input, |
| const int test_size, const size_t runtime_byte_size) { |
| ParquetByteStreamSplitEncoder<BYTE_SIZE> encoder = |
| createEncoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> temp(runtime_byte_size * test_size); |
| std::vector<uint8_t> encoded(runtime_byte_size * test_size); |
| encoder.NewPage(temp.data(), temp.size()); |
| for (int i = 0; i < test_size; i++) { |
| EXPECT_TRUE(encoder.PutBytes(&input[i * runtime_byte_size])); |
| } |
| EXPECT_EQ(test_size, encoder.FinalizePage(encoded.data(), encoded.size())); |
| |
| ParquetByteStreamSplitDecoder<BYTE_SIZE> decoder = |
| createDecoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> result(runtime_byte_size * test_size); |
| decoder.NewPage(encoded.data(), encoded.size()); |
| EXPECT_EQ(test_size, decoder.NextValues(test_size, result.data(), runtime_byte_size)); |
| |
| EXPECT_EQ(0, memcmp(input.data(), result.data(), test_size * runtime_byte_size)); |
| } |
| |
| template <int BYTE_SIZE> |
| static void encode_then_decode_with_stride(const std::vector<uint8_t>& input, |
| const int stride, const int test_size, const size_t runtime_byte_size) { |
| ParquetByteStreamSplitEncoder<BYTE_SIZE> encoder = |
| createEncoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> temp(runtime_byte_size * test_size); |
| std::vector<uint8_t> encoded(runtime_byte_size * test_size); |
| encoder.NewPage(temp.data(), temp.size()); |
| for (int i = 0; i < test_size; i++) { |
| EXPECT_TRUE(encoder.PutBytes(&input[i * runtime_byte_size])); |
| } |
| EXPECT_EQ(test_size, encoder.FinalizePage(encoded.data(), encoded.size())); |
| |
| ParquetByteStreamSplitDecoder<BYTE_SIZE> decoder = |
| createDecoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> result(test_size * stride); |
| decoder.NewPage(encoded.data(), encoded.size()); |
| EXPECT_EQ(test_size, decoder.NextValues(test_size, result.data(), stride)); |
| |
| for (int i = 0; i < test_size; i++) { |
| EXPECT_EQ(0, |
| memcmp(&input[i * runtime_byte_size], &result[i * stride], runtime_byte_size)); |
| checkAllBytes( |
| &result[i * stride + runtime_byte_size], stride - runtime_byte_size, 0); |
| } |
| } |
| |
| template <typename T> |
| static void decode_then_encode(const std::vector<uint8_t>& input, const int size) { |
| constexpr size_t byte_size = sizeof(T); |
| ParquetByteStreamSplitDecoder<byte_size> decoder; |
| decoder.NewPage(input.data(), size * byte_size); |
| |
| std::vector<T> decoded(size); |
| |
| for (int i = 0; i < size; i++) { |
| EXPECT_EQ(1, decoder.NextValue(&decoded[i])); |
| } |
| |
| ParquetByteStreamSplitEncoder<byte_size> encoder; |
| std::vector<uint8_t> temp(byte_size * size); |
| std::vector<uint8_t> result(byte_size * size); |
| encoder.NewPage(temp.data(), temp.size()); |
| for (int i = 0; i < size; i++) { |
| EXPECT_TRUE(encoder.Put(decoded[i])); |
| } |
| |
| EXPECT_EQ(size, encoder.FinalizePage(result.data(), result.size())); |
| EXPECT_EQ(0, memcmp(input.data(), result.data(), size * byte_size)); |
| } |
| |
| template <int BYTE_SIZE> |
| static void decode_in_batch_then_encode(const std::vector<uint8_t>& input, |
| const int test_size, const size_t runtime_byte_size) { |
| ParquetByteStreamSplitDecoder<BYTE_SIZE> decoder = |
| createDecoder<BYTE_SIZE>(runtime_byte_size); |
| |
| int num_input_values = input.size() / runtime_byte_size; |
| decoder.NewPage(input.data(), num_input_values * runtime_byte_size); |
| std::vector<uint8_t> decoded(test_size * runtime_byte_size); |
| |
| EXPECT_EQ( |
| test_size, decoder.NextValues(test_size, decoded.data(), runtime_byte_size)); |
| |
| ParquetByteStreamSplitEncoder<BYTE_SIZE> encoder = |
| createEncoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> temp(runtime_byte_size * test_size); |
| std::vector<uint8_t> result(runtime_byte_size * test_size); |
| encoder.NewPage(temp.data(), temp.size()); |
| for (int i = 0; i < test_size; i++) { |
| EXPECT_TRUE(encoder.PutBytes(&decoded[i * runtime_byte_size])); |
| } |
| |
| EXPECT_EQ(test_size, encoder.FinalizePage(result.data(), result.size())); |
| |
| // The `input` has all values encoded, while `result` has only `test_size` |
| // values. This causes `input` to have more values than `result`, so we need to |
| // compare them in chunks of `test_size`. For example if input had 10 vales, and |
| // `test_size` was 5: input = a1 b1 c1 d1 e1 f1 g1 h1 i1 j1 a2 b2 c2 d2 e2 f2 g2 h2 |
| // i2 j2 ... result = a1 b1 c1 d1 e1 a2 b2 c2 d2 e2 a3 b3 c3 d3 e3 a4 b4 c4 d4 e4 ... |
| for (int i = 0; i < runtime_byte_size; i++) { |
| EXPECT_EQ( |
| 0, memcmp(&input[i * num_input_values], &result[i * test_size], test_size)); |
| } |
| } |
| |
| template <int BYTE_SIZE> |
| static void decode_with_stride_then_encode(const std::vector<uint8_t>& input, |
| const int stride, const int test_size, const size_t runtime_byte_size) { |
| ParquetByteStreamSplitDecoder<BYTE_SIZE> decoder = |
| createDecoder<BYTE_SIZE>(runtime_byte_size); |
| |
| int num_input_values = input.size() / runtime_byte_size; |
| decoder.NewPage (input.data(), num_input_values * runtime_byte_size); |
| std::vector<uint8_t> decoded(test_size * stride); |
| |
| EXPECT_EQ(test_size, decoder.NextValues(test_size, decoded.data(), stride)); |
| |
| ParquetByteStreamSplitEncoder<BYTE_SIZE> encoder = |
| createEncoder<BYTE_SIZE>(runtime_byte_size); |
| std::vector<uint8_t> temp(runtime_byte_size * test_size); |
| std::vector<uint8_t> result(runtime_byte_size * test_size); |
| encoder.NewPage(temp.data(), temp.size()); |
| for (int i = 0; i < test_size; i++) { |
| EXPECT_TRUE(encoder.PutBytes(&decoded[i * stride])); |
| } |
| |
| EXPECT_EQ(test_size, encoder.FinalizePage(result.data(), result.size())); |
| // The `input` has all values encoded, while `result` has only `test_size` |
| // values. This causes `input` to have more values than `result`, so we need to |
| // compare them in chunks of `test_size`. For example if `input` had 10 vales, and |
| // `test_size` was 5: |
| // input = a1 b1 c1 d1 e1 f1 g1 h1 i1 j1|a2 b2 c2 d2 e2 f2 g2 h2 i2 j2 ... |
| // result = a1 b1 c1 d1 e1|a2 b2 c2 d2 e2|a3 b3 c3 d3 e3|a4 b4 c4 d4 e4 ... |
| for (int i = 0; i < runtime_byte_size; i++) { |
| EXPECT_EQ( |
| 0, memcmp(&input[i * num_input_values], &result[i * test_size], test_size)); |
| } |
| } |
| }; |
| |
| //--------------------Encode -> Decode Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatEncodeDecodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode(float_values_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode(float_values_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32EncodeDecodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode(int_values_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode(int_values_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleEncodeDecodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode(double_values_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode(double_values_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64EncodeDecodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode(long_values_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode(long_values_200v, test_size); |
| } |
| } |
| |
| //--------------------Encode -> Decode In Batch Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatEncodeDecodeBatchTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_in_batch<sizeof(float)>( |
| float_decoded_200v, test_size, sizeof(float)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_in_batch<sizeof(float)>( |
| float_decoded_200v, test_size, sizeof(float)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32EncodeDecodeBatchTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_in_batch<sizeof(int32_t)>( |
| int_decoded_200v, test_size, sizeof(int32_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_in_batch<sizeof(int32_t)>( |
| int_decoded_200v, test_size, sizeof(int32_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleEncodeDecodeBatchTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_in_batch<sizeof(double)>( |
| double_decoded_200v, test_size, sizeof(double)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_in_batch<sizeof(double)>( |
| double_decoded_200v, test_size, sizeof(double)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64EncodeDecodeBatchTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_in_batch<sizeof(int64_t)>( |
| long_decoded_200v, test_size, sizeof(int64_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_in_batch<sizeof(int64_t)>( |
| long_decoded_200v, test_size, sizeof(int64_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, VaryingSizeEncodeDecodeBatchTest) { |
| for (int b_size = 1; b_size <= 10; b_size++) { |
| vector<uint8_t> float_decoded = float_decoded_200v; |
| vector<uint8_t> int_decoded = int_decoded_200v; |
| vector<uint8_t> double_decoded = double_decoded_200v; |
| vector<uint8_t> long_decoded = long_decoded_200v; |
| |
| // With `vec.size() / b_size`, we get the maximum number of values that can be read |
| // from the encoded vector with the given byte size. |
| int value_count4b = float_decoded_200v.size() / b_size; |
| int value_count8b = double_decoded_200v.size() / b_size; |
| |
| float_decoded.resize(value_count4b * b_size); |
| int_decoded.resize(value_count4b * b_size); |
| double_decoded.resize(value_count8b * b_size); |
| long_decoded.resize(value_count8b * b_size); |
| |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_in_batch<0>(float_decoded, test_size, b_size); |
| encode_then_decode_in_batch<0>(int_decoded, test_size, b_size); |
| encode_then_decode_in_batch<0>(double_decoded, test_size, b_size); |
| encode_then_decode_in_batch<0>(long_decoded, test_size, b_size); |
| } |
| for (int test_size = value_count4b - 10; test_size <= value_count4b; test_size++) { |
| encode_then_decode_in_batch<0>(float_decoded, test_size, b_size); |
| encode_then_decode_in_batch<0>(int_decoded, test_size, b_size); |
| } |
| for (int test_size = value_count8b - 10; test_size <= value_count8b; test_size++) { |
| encode_then_decode_in_batch<0>(double_decoded, test_size, b_size); |
| encode_then_decode_in_batch<0>(long_decoded, test_size, b_size); |
| } |
| } |
| } |
| |
| //--------------------Encode -> Decode With Stride Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatEncodeDecodeStrideTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 1, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 2, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 3, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) * 3, test_size, sizeof(float)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 1, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 2, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) + 3, test_size, sizeof(float)); |
| encode_then_decode_with_stride<sizeof(float)>( |
| float_decoded_200v, sizeof(float) * 3, test_size, sizeof(float)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32EncodeDecodeStrideTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 1, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 2, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 3, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) * 3, test_size, sizeof(int32_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 1, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 2, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) + 3, test_size, sizeof(int32_t)); |
| encode_then_decode_with_stride<sizeof(int32_t)>( |
| int_decoded_200v, sizeof(int32_t) * 3, test_size, sizeof(int32_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleEncodeDecodeStrideTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 1, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 2, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 3, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) * 3, test_size, sizeof(double)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 1, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 2, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) + 3, test_size, sizeof(double)); |
| encode_then_decode_with_stride<sizeof(double)>( |
| double_decoded_200v, sizeof(double) * 3, test_size, sizeof(double)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64EncodeDecodeStrideTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 1, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 2, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 3, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) * 3, test_size, sizeof(int64_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 1, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 2, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) + 3, test_size, sizeof(int64_t)); |
| encode_then_decode_with_stride<sizeof(int64)>( |
| long_decoded_200v, sizeof(int64_t) * 3, test_size, sizeof(int64_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, VaryingSizeEncodeDecodeStrideTest) { |
| for (int b_size = 1; b_size <= 10; b_size++) { |
| vector<uint8_t> float_decoded = float_decoded_200v; |
| vector<uint8_t> int_decoded = int_decoded_200v; |
| vector<uint8_t> double_decoded = double_decoded_200v; |
| vector<uint8_t> long_decoded = long_decoded_200v; |
| |
| // With `count / b_size`, we get the maximum number of values that can be read from |
| // the encoded vector with the given byte size. |
| int value_count4b = float_decoded_200v.size() / b_size; |
| int value_count8b = double_decoded_200v.size() / b_size; |
| |
| float_decoded.resize(value_count4b * b_size); |
| int_decoded.resize(value_count4b * b_size); |
| double_decoded.resize(value_count8b * b_size); |
| long_decoded.resize(value_count8b * b_size); |
| |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| encode_then_decode_with_stride<0>(float_decoded, b_size + 1, test_size, b_size); |
| encode_then_decode_with_stride<0>(int_decoded, b_size + 2, test_size, b_size); |
| encode_then_decode_with_stride<0>(double_decoded, b_size + 3, test_size, b_size); |
| encode_then_decode_with_stride<0>(long_decoded, b_size * 3, test_size, b_size); |
| } |
| for (int test_size = value_count4b - 10; test_size <= value_count4b; test_size++) { |
| encode_then_decode_with_stride<0>(float_decoded, b_size + 1, test_size, b_size); |
| encode_then_decode_with_stride<0>(int_decoded, b_size + 2, test_size, b_size); |
| } |
| for (int test_size = value_count8b - 10; test_size <= value_count8b; test_size++) { |
| encode_then_decode_with_stride<0>(double_decoded, b_size + 3, test_size, b_size); |
| encode_then_decode_with_stride<0>(long_decoded, b_size * 3, test_size, b_size); |
| } |
| } |
| } |
| |
| //--------------------Decode -> Encode Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatDecodeEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_then_encode<float>(float_encoded_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_then_encode<float>(float_encoded_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32DecodeEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_then_encode<int32_t>(int_encoded_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_then_encode<int32_t>(int_encoded_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleDecodeEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_then_encode<double>(double_encoded_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_then_encode<double>(double_encoded_200v, test_size); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64DecodeEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_then_encode<int64_t>(long_encoded_200v, test_size); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_then_encode<int64_t>(long_encoded_200v, test_size); |
| } |
| } |
| |
| //--------------------Decode In Batch -> Encode Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatDecodeBatchEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(float)>( |
| float_encoded_200v, test_size, sizeof(float)); |
| } |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(float)>( |
| float_encoded_200v, test_size, sizeof(float)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32DecodeBatchEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(int32_t)>( |
| int_encoded_200v, test_size, sizeof(int32_t)); |
| } |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(int32_t)>( |
| int_encoded_200v, test_size, sizeof(int32_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleDecodeBatchEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(double)>( |
| double_encoded_200v, test_size, sizeof(double)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_in_batch_then_encode<sizeof(double)>( |
| double_encoded_200v, test_size, sizeof(double)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64DecodeBatchEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<sizeof(int64_t)>( |
| long_encoded_200v, test_size, sizeof(int64_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_in_batch_then_encode<sizeof(int64_t)>( |
| long_encoded_200v, test_size, sizeof(int64_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, VaryingSizeDecodeBatchEncodeTest) { |
| for (int b_size = 1; b_size <= 10; b_size++) { |
| vector<uint8_t> float_encoded = float_encoded_200v; |
| vector<uint8_t> int_encoded = int_encoded_200v; |
| vector<uint8_t> double_encoded = double_encoded_200v; |
| vector<uint8_t> long_encoded = long_encoded_200v; |
| |
| // With `count / b_size`, we get the maximum number of values that can be read from |
| // the encoded vector with the given byte size. |
| int value_count4b = float_encoded_200v.size() / b_size; |
| int value_count8b = float_encoded_200v.size() / b_size; |
| |
| float_encoded.resize(value_count4b * b_size); |
| int_encoded.resize(value_count4b * b_size); |
| double_encoded.resize(value_count8b * b_size); |
| long_encoded.resize(value_count8b * b_size); |
| |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_in_batch_then_encode<0>(float_encoded_200v, test_size, b_size); |
| decode_in_batch_then_encode<0>(int_encoded_200v, test_size, b_size); |
| decode_in_batch_then_encode<0>(double_encoded_200v, test_size, b_size); |
| decode_in_batch_then_encode<0>(long_encoded_200v, test_size, b_size); |
| } |
| for (int test_size = value_count4b - 10; test_size <= value_count4b; test_size++) { |
| decode_in_batch_then_encode<0>(float_encoded_200v, test_size, b_size); |
| decode_in_batch_then_encode<0>(int_encoded_200v, test_size, b_size); |
| } |
| for (int test_size = value_count8b - 10; test_size <= value_count8b; test_size++) { |
| decode_in_batch_then_encode<0>(double_encoded_200v, test_size, b_size); |
| decode_in_batch_then_encode<0>(long_encoded_200v, test_size, b_size); |
| } |
| } |
| } |
| |
| //--------------------Decode With Stride -> Encode Tests--------------------// |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, FloatDecodeStrideEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_with_stride_then_encode<sizeof(float)>( |
| float_encoded_200v, 2 * sizeof(float) + 2, test_size, sizeof(float)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_with_stride_then_encode<sizeof(float)>( |
| float_encoded_200v, 2 * sizeof(float) + 2, test_size, sizeof(float)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int32DecodeStrideEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_with_stride_then_encode<sizeof(int32_t)>( |
| int_encoded_200v, 2 * sizeof(int32_t) + 2, test_size, sizeof(int32_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_with_stride_then_encode<sizeof(int32_t)>( |
| int_encoded_200v, 2 * sizeof(int32_t) + 2, test_size, sizeof(int32_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, DoubleDecodeStrideEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_with_stride_then_encode<sizeof(double)>( |
| double_encoded_200v, 2 * sizeof(double) + 2, test_size, sizeof(double)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_with_stride_then_encode<sizeof(double)>( |
| double_encoded_200v, 2 * sizeof(double) + 2, test_size, sizeof(double)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, Int64DecodeStrideEncodeTest) { |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_with_stride_then_encode<sizeof(int64_t)>( |
| double_encoded_200v, 2 * sizeof(int64_t) + 2, test_size, sizeof(int64_t)); |
| } |
| for (int test_size = 190; test_size <= 200; test_size++) { |
| decode_with_stride_then_encode<sizeof(int64_t)>( |
| double_encoded_200v, 2 * sizeof(int64_t) + 2, test_size, sizeof(int64_t)); |
| } |
| } |
| |
| TEST_F(ParquetByteStreamSplitCoderTest, VaryingSizeDecodeStrideEncodeTest) { |
| for (int b_size = 1; b_size <= 10; b_size++) { |
| vector<uint8_t> float_encoded = float_encoded_200v; |
| vector<uint8_t> int_encoded = int_encoded_200v; |
| vector<uint8_t> double_encoded = double_encoded_200v; |
| vector<uint8_t> long_encoded = long_encoded_200v; |
| |
| // With `count / b_size`, we get the maximum number of values that can be read from |
| // the encoded vector with the given byte size. |
| int value_count4b = float_encoded_200v.size() / b_size; |
| int value_count8b = double_encoded_200v.size() / b_size; |
| |
| float_encoded.resize(value_count4b * b_size); |
| int_encoded.resize(value_count4b * b_size); |
| double_encoded.resize(value_count8b * b_size); |
| long_encoded.resize(value_count8b * b_size); |
| |
| for (int test_size = 10; test_size <= 20; test_size++) { |
| decode_with_stride_then_encode<0>(float_encoded, b_size + 1, test_size, b_size); |
| decode_with_stride_then_encode<0>(int_encoded, b_size + 2, test_size, b_size); |
| decode_with_stride_then_encode<0>(double_encoded, b_size + 3, test_size, b_size); |
| decode_with_stride_then_encode<0>(long_encoded, b_size * 3, test_size, b_size); |
| } |
| for (int test_size = value_count4b - 10; test_size <= value_count4b; test_size++) { |
| decode_with_stride_then_encode<0>(float_encoded, b_size + 1, test_size, b_size); |
| decode_with_stride_then_encode<0>(int_encoded, b_size + 2, test_size, b_size); |
| } |
| for (int test_size = value_count8b - 10; test_size <= value_count8b; test_size++) { |
| decode_with_stride_then_encode<0>(double_encoded, b_size + 3, test_size, b_size); |
| decode_with_stride_then_encode<0>(long_encoded, b_size * 3, test_size, b_size); |
| } |
| } |
| } |
| |
| } // namespace impala |