blob: ded6c242d020ad4c4d22f0bde61e7cbeb57fccaa [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/parquet/parquet-bool-decoder.h"
#include "testutil/gtest-util.h"
#include <vector>
#include "common/names.h"
namespace impala {
void EncodeData(const vector<bool>& data, parquet::Encoding::type encoding,
uint8_t* buffer, int buffer_len) {
if (encoding == parquet::Encoding::PLAIN) {
BitWriter writer(buffer, buffer_len);
for (int b : data) {
ASSERT_TRUE(writer.PutValue(b, 1));
}
writer.Flush();
} else {
DCHECK(encoding == parquet::Encoding::RLE);
// We need to pass 'buffer + 4' because the ParquetBoolDecoder ignores the first 4
// bytes (in Parquet RLE, the first 4 bytes are used to encode the data size).
RleEncoder encoder(buffer + 4, buffer_len - 4, 1);
for (int b : data) {
ASSERT_TRUE(encoder.Put(b));
}
encoder.Flush();
}
}
void TestSkipping(parquet::Encoding::type encoding, uint8_t* encoded_data,
int encoded_data_len, const vector<bool>& expected_data, int skip_at,
int skip_count) {
using namespace parquet;
ParquetBoolDecoder decoder;
decoder.SetData(encoding, encoded_data, encoded_data_len);
auto Decode = [encoding, &decoder]() {
bool b;
if (encoding == Encoding::PLAIN) {
EXPECT_TRUE(decoder.DecodeValue<Encoding::PLAIN>(&b));
} else {
EXPECT_TRUE(decoder.DecodeValue<Encoding::RLE>(&b));
}
return b;
};
for (int i = 0; i < skip_at && i < expected_data.size(); ++i) {
EXPECT_EQ(Decode(), expected_data[i]) << i;
}
decoder.SkipValues(skip_count);
for (int i = skip_at + skip_count; i < expected_data.size(); ++i) {
EXPECT_EQ(Decode(), expected_data[i]) << i;
}
}
TEST(ParquetBoolDecoder, TestDecodeAndSkipping) {
vector<bool> expected_data;
// Write 100 falses, 100 trues, 100 alternating falses and trues, 100 falses
for (int i = 0; i < 100; ++i) expected_data.push_back(false);
for (int i = 0; i < 100; ++i) expected_data.push_back(true);
for (int i = 0; i < 100; ++i) expected_data.push_back(i % 2);
for (int i = 0; i < 100; ++i) expected_data.push_back(false);
for (auto encoding : {parquet::Encoding::PLAIN, parquet::Encoding::RLE}) {
constexpr int buffer_len = 128;
uint8_t buffer[buffer_len];
EncodeData(expected_data, encoding, buffer, buffer_len);
TestSkipping(encoding, buffer, buffer_len, expected_data, 0, 8);
TestSkipping(encoding, buffer, buffer_len, expected_data, 0, 79);
TestSkipping(encoding, buffer, buffer_len, expected_data, 0, 160);
TestSkipping(encoding, buffer, buffer_len, expected_data, 0, 260);
TestSkipping(encoding, buffer, buffer_len, expected_data, 0, 370);
TestSkipping(encoding, buffer, buffer_len, expected_data, 27, 13);
TestSkipping(encoding, buffer, buffer_len, expected_data, 50, 112);
TestSkipping(encoding, buffer, buffer_len, expected_data, 50, 183);
TestSkipping(encoding, buffer, buffer_len, expected_data, 50, 270);
TestSkipping(encoding, buffer, buffer_len, expected_data, 50, 350);
TestSkipping(encoding, buffer, buffer_len, expected_data, 123, 8);
TestSkipping(encoding, buffer, buffer_len, expected_data, 125, 100);
TestSkipping(encoding, buffer, buffer_len, expected_data, 225, 17);
TestSkipping(encoding, buffer, buffer_len, expected_data, 225, 70);
TestSkipping(encoding, buffer, buffer_len, expected_data, 235, 160);
TestSkipping(encoding, buffer, buffer_len, expected_data, 337, 17);
TestSkipping(encoding, buffer, buffer_len, expected_data, 337, 60);
TestSkipping(encoding, buffer, buffer_len, expected_data, 337, 63);
}
}
}