blob: f9d814cba67000db9d28d9548785b997cb919ff8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License a
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <gtest/gtest.h>
#include <random>
#include <string>
#include <unordered_set>
#include <vector>
#include "encoding/dictionary_decoder.h"
#include "encoding/dictionary_encoder.h"
namespace storage {
class DictionaryTest : public ::testing::Test {};
TEST_F(DictionaryTest, DictionaryEncoder) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
encoder.encode("apple", stream);
encoder.encode("banana", stream);
encoder.encode("cherry", stream);
encoder.encode("apple", stream);
encoder.flush(stream);
uint8_t buf[1024] = {0};
uint32_t want_len, read_len;
want_len = stream.total_size();
stream.read_buf(buf, want_len, read_len);
// Generated using Java Edition
uint8_t expected_buf[] = {6, 10, 97, 112, 112, 108, 101, 12, 98,
97, 110, 97, 110, 97, 12, 99, 104, 101,
114, 114, 121, 5, 2, 3, 4, 24, 0};
EXPECT_EQ(read_len, sizeof(expected_buf));
for (size_t i = 0; i < (size_t)sizeof(expected_buf); i++) {
EXPECT_EQ(expected_buf[i], buf[i]);
}
}
TEST_F(DictionaryTest, DictionaryEncoderAndDecoder) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
encoder.encode("apple", stream);
encoder.encode("banana", stream);
encoder.encode("cherry", stream);
encoder.encode("apple", stream);
encoder.flush(stream);
DictionaryDecoder decoder;
decoder.init();
ASSERT_TRUE(decoder.has_next(stream));
ASSERT_EQ(decoder.read_string(stream), "apple");
ASSERT_TRUE(decoder.has_next(stream));
ASSERT_EQ(decoder.read_string(stream), "banana");
ASSERT_TRUE(decoder.has_next(stream));
ASSERT_EQ(decoder.read_string(stream), "cherry");
ASSERT_TRUE(decoder.has_next(stream));
ASSERT_EQ(decoder.read_string(stream), "apple");
}
TEST_F(DictionaryTest, DictionaryEncoderAndDecoderOneItem) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
encoder.encode("apple", stream);
encoder.flush(stream);
DictionaryDecoder decoder;
decoder.init();
ASSERT_TRUE(decoder.has_next(stream));
ASSERT_EQ(decoder.read_string(stream), "apple");
ASSERT_FALSE(decoder.has_next(stream));
}
TEST_F(DictionaryTest, DictionaryEncoderAndDecoderRepeatedItems) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
for (char c = 'a'; c <= 'z'; c++) {
for (int i = 0; i < 100; i++) {
encoder.encode(std::string(c, 3), stream);
}
}
encoder.flush(stream);
DictionaryDecoder decoder;
decoder.init();
for (char c = 'a'; c <= 'z'; c++) {
for (int i = 0; i < 100; i++) {
ASSERT_EQ(decoder.read_string(stream), std::string(c, 3));
}
}
}
TEST_F(DictionaryTest,
DictionaryEncoderAndDecoderLargeQuantitiesWithRandomStrings) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
// Prepare random string generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> length_dist(5, 20); // String length range
std::uniform_int_distribution<> char_dist(33,
126); // Printable ASCII range
// Generate 10000 random strings
const int num_strings = 10000;
std::vector<std::string> test_strings;
std::unordered_set<std::string> string_set; // For ensuring uniqueness
while (test_strings.size() < num_strings) {
int length = length_dist(gen);
std::string str;
str.reserve(length);
for (int i = 0; i < length; ++i) {
str.push_back(static_cast<char>(char_dist(gen)));
}
// Ensure string uniqueness
if (string_set.insert(str).second) {
test_strings.push_back(str);
}
}
// Encode all strings
for (const auto& str : test_strings) {
encoder.encode(str, stream);
}
encoder.flush(stream);
DictionaryDecoder decoder;
decoder.init();
// Decode and verify all strings
for (const auto& expected_str : test_strings) {
std::string decoded_str = decoder.read_string(stream);
ASSERT_EQ(decoded_str, expected_str);
}
}
} // namespace storage