blob: c5db5591b09fc8da53cc520dc2d79f612cc72d00 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ByteRLE.hh"
#include "MemoryOutputStream.hh"
#include "wrap/gtest-wrapper.h"
#include "wrap/orc-proto-wrapper.hh"
#include <cstdlib>
namespace orc {
const int DEFAULT_MEM_STREAM_SIZE = 1024 * 1024; // 1M
void generateNotNull(uint64_t numValues,
uint64_t numNulls,
char* notNull) {
if (numNulls != 0 && notNull != nullptr) {
memset(notNull, 1, numValues);
while (numNulls > 0) {
uint64_t pos = static_cast<uint64_t>(std::rand()) % numValues;
if (notNull[pos]) {
notNull[pos] = static_cast<char>(0);
--numNulls;
}
}
}
}
void generateData(uint64_t numValues,
char* data,
uint64_t numNulls = 0,
char* notNull = nullptr) {
generateNotNull(numValues, numNulls, notNull);
for (uint64_t i = 0; i < numValues; ++i) {
data[i] = static_cast<char>(std::rand() % 256);
}
}
void generateBoolData(uint64_t numValues,
char* data,
uint64_t numNulls = 0,
char* notNull = nullptr) {
generateNotNull(numValues, numNulls, notNull);
for (uint64_t i = 0; i < numValues; ++i) {
data[i] = static_cast<char>(std::rand() % 2);
}
}
void decodeAndVerify(
const MemoryOutputStream& memStream,
char * data,
uint64_t numValues,
char* notNull) {
std::unique_ptr<SeekableInputStream> inStream(
new SeekableArrayInputStream(memStream.getData(), memStream.getLength()));
std::unique_ptr<ByteRleDecoder> decoder =
createByteRleDecoder(std::move(inStream));
char* decodedData = new char[numValues];
decoder->next(decodedData, numValues, notNull);
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
EXPECT_EQ(data[i], decodedData[i]);
}
}
delete [] decodedData;
}
void decodeAndVerifyBoolean(
const MemoryOutputStream& memStream,
char * data,
uint64_t numValues,
char* notNull) {
std::unique_ptr<SeekableInputStream> inStream(
new SeekableArrayInputStream(memStream.getData(), memStream.getLength()));
std::unique_ptr<ByteRleDecoder> decoder =
createBooleanRleDecoder(std::move(inStream));
char* decodedData = new char[numValues];
decoder->next(decodedData, numValues, notNull);
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
bool expect = data[i] != 0;
bool actual = decodedData[i] != 0;
EXPECT_EQ(expect, actual);
}
}
delete [] decodedData;
}
TEST(ByteRleEncoder, random_chars) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool * pool = getDefaultPool();
uint64_t capacity = 500 * 1024;
uint64_t block = 1024;
BufferedOutputStream bufStream(*pool, &memStream, capacity, block);
std::unique_ptr<BufferedOutputStream> outStream(
new BufferedOutputStream(*pool, &memStream, capacity, block));
std::unique_ptr<ByteRleEncoder> encoder =
createByteRleEncoder(std::move(outStream));
char* data = new char[102400];
generateData(102400, data);
encoder->add(data, 102400, nullptr);
encoder->flush();
decodeAndVerify(memStream, data, 102400, nullptr);
delete [] data;
}
TEST(ByteRleEncoder, random_chars_with_null) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool * pool = getDefaultPool();
uint64_t capacity = 500 * 1024;
uint64_t block = 1024;
BufferedOutputStream bufStream(*pool, &memStream, capacity, block);
std::unique_ptr<BufferedOutputStream> outStream(
new BufferedOutputStream(*pool, &memStream, capacity, block));
std::unique_ptr<ByteRleEncoder> encoder =
createByteRleEncoder(std::move(outStream));
char* notNull = new char[102400];
char* data = new char[102400];
generateData(102400, data, 377, notNull);
encoder->add(data, 102400, notNull);
encoder->flush();
decodeAndVerify(memStream, data, 102400, notNull);
delete [] data;
delete [] notNull;
}
TEST(BooleanRleEncoder, random_bits_not_aligned) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool * pool = getDefaultPool();
uint64_t capacity = 500 * 1024;
uint64_t block = 1024;
BufferedOutputStream bufStream(*pool, &memStream, capacity, block);
std::unique_ptr<BufferedOutputStream> outStream(
new BufferedOutputStream(*pool, &memStream, capacity, block));
std::unique_ptr<ByteRleEncoder> encoder =
createBooleanRleEncoder(std::move(outStream));
char* data = new char[1779];
generateBoolData(1779, data);
encoder->add(data, 1779, nullptr);
encoder->flush();
decodeAndVerifyBoolean(memStream, data, 1779, nullptr);
delete [] data;
}
TEST(BooleanRleEncoder, random_bits_aligned) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool * pool = getDefaultPool();
uint64_t capacity = 500 * 1024;
uint64_t block = 1024;
BufferedOutputStream bufStream(*pool, &memStream, capacity, block);
std::unique_ptr<BufferedOutputStream> outStream(
new BufferedOutputStream(*pool, &memStream, capacity, block));
std::unique_ptr<ByteRleEncoder> encoder =
createBooleanRleEncoder(std::move(outStream));
char* data = new char[8000];
generateBoolData(8000, data);
encoder->add(data, 8000, nullptr);
encoder->flush();
decodeAndVerifyBoolean(memStream, data, 8000, nullptr);
delete [] data;
}
TEST(BooleanRleEncoder, random_bits_aligned_with_null) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool * pool = getDefaultPool();
uint64_t capacity = 500 * 1024;
uint64_t block = 1024;
BufferedOutputStream bufStream(*pool, &memStream, capacity, block);
std::unique_ptr<BufferedOutputStream> outStream(
new BufferedOutputStream(*pool, &memStream, capacity, block));
std::unique_ptr<ByteRleEncoder> encoder =
createBooleanRleEncoder(std::move(outStream));
char* notNull = new char[8000];
char* data = new char[8000];
generateBoolData(8000, data, 515, notNull);
encoder->add(data, 8000, notNull);
encoder->flush();
decodeAndVerifyBoolean(memStream, data, 8000, notNull);
delete [] data;
delete [] notNull;
}
}