| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "meta-store/codec.h" |
| |
| #include <gtest/gtest.h> |
| |
| #include <cstring> |
| #include <random> |
| |
| #include "common/util.h" |
| |
| using namespace doris; |
| |
| int main(int argc, char** argv) { |
| ::testing::InitGoogleTest(&argc, argv); |
| return RUN_ALL_TESTS(); |
| } |
| |
| TEST(CodecTest, StringCodecTest) { |
| using namespace doris::cloud; |
| std::mt19937 gen(std::random_device("/dev/urandom")()); |
| const int max_len = (2 << 16) + 10086; |
| std::uniform_int_distribution<int> rd_len(0, max_len); |
| std::uniform_int_distribution<short> rd_char(std::numeric_limits<char>::min(), |
| std::numeric_limits<char>::max()); |
| |
| int ret = -1; |
| |
| // Correctness test |
| { |
| int case_count = 50; |
| std::string str1; |
| std::string str2; |
| str1.reserve(max_len); |
| str2.reserve(max_len); |
| std::string b1; |
| std::string b2; |
| std::string d1; |
| std::string d2; |
| b1.reserve(1 + max_len * 2 + 2); |
| b2.reserve(1 + max_len * 2 + 2); |
| d1.reserve(max_len); |
| d2.reserve(max_len); |
| while (case_count--) { |
| str1.clear(); |
| str2.clear(); |
| b1.clear(); |
| b2.clear(); |
| d1.clear(); |
| d2.clear(); |
| int len1 = rd_len(gen); |
| int len2 = rd_len(gen); |
| int zero_count1 = 0; |
| int zero_count2 = 0; |
| while (str1.size() < len1) { |
| str1.push_back(rd_char(gen)); |
| str1.back() == 0x00 ? ++zero_count1 : zero_count1 += 0; |
| } |
| while (str2.size() < len2) { |
| str2.push_back(rd_char(gen)); |
| str2.back() == 0x00 ? ++zero_count2 : zero_count2 += 0; |
| } |
| cloud::encode_bytes(str1, &b1); |
| cloud::encode_bytes(str2, &b2); |
| // clang-format off |
| int sequence = std::memcmp(&str1[0], &str2[0], str1.size() > str2.size() ? str2.size() : str1.size()); |
| int sequence_encoded = std::memcmp(&b1[0], &b2[0], b1.size() > b2.size() ? b2.size() : b1.size()); |
| #define CASE_INFO "sequence=" << sequence << " sequence_encoded=" << sequence_encoded << " str1=" << hex(str1)<< " str2=" << hex(str2)<< " b1=" << hex(b1)<< " b2=" << hex(b2) << " len1=" << len1 << " len2=" << len2 |
| if (len1 > 0 && len2 > 0) { |
| EXPECT_TRUE((sequence * sequence_encoded > 0) || (sequence == 0 && sequence_encoded == 0)) << CASE_INFO; |
| } else { // sequence is not 0 if one of len1,len2 is 0 |
| if (len1 > len2) { |
| EXPECT_GT(sequence_encoded, 0) << CASE_INFO; |
| } else if (len1 < len2) { |
| EXPECT_LT(sequence_encoded, 0) << CASE_INFO; |
| } else { // len1 == len2 == 0 |
| EXPECT_TRUE(len1 == 0 && len2 == 0 && sequence == 0 && sequence == sequence_encoded) << CASE_INFO; |
| } |
| } |
| #undef CASE_INFO |
| EXPECT_EQ(b1[0], cloud::EncodingTag::BYTES_TAG) << " str1=" << hex(str1)<< " str2=" << hex(str2)<< " b1=" << hex(b1)<< " b2=" << hex(b2); |
| EXPECT_EQ(b2[0], cloud::EncodingTag::BYTES_TAG) << " str1=" << hex(str1)<< " str2=" << hex(str2)<< " b1=" << hex(b1)<< " b2=" << hex(b2); |
| // Check encoded value size, marker + zero_escape + terminator |
| EXPECT_EQ(b1.size(), (str1.size() + 1 + zero_count1 + 2)) << "zc1=" << zero_count1; |
| EXPECT_EQ(b2.size(), (str2.size() + 1 + zero_count2 + 2)) << "zc2=" << zero_count2; |
| |
| // Decoding test |
| b1 += "cloud is good"; |
| b2 += "cloud will be better"; |
| std::string_view b1_sv(b1); |
| ret = cloud::decode_bytes(&b1_sv, &d1); |
| EXPECT_EQ(ret, 0); |
| EXPECT_EQ(d1, str1); |
| std::string_view b2_sv(b2); |
| ret = cloud::decode_bytes(&b2_sv, &d2); |
| EXPECT_EQ(ret, 0); |
| EXPECT_EQ(d2, str2); |
| EXPECT_EQ(b1_sv, "cloud is good"); |
| EXPECT_EQ(b2_sv, "cloud will be better"); |
| // clang-format on |
| } |
| } |
| |
| // Boundary tests |
| { |
| std::vector<std::string> strs; |
| std::vector<std::string> expected; |
| |
| int zeroes = 1 * 1024 * 1024; |
| strs.emplace_back(zeroes, static_cast<char>(0x00)); |
| expected.push_back(""); |
| expected.back().push_back(cloud::EncodingTag::BYTES_TAG); |
| while (zeroes--) { |
| expected.back().push_back(cloud::EncodingTag::BYTE_ESCAPE); |
| expected.back().push_back(cloud::EncodingTag::ESCAPED_00); |
| } |
| expected.back().push_back(cloud::EncodingTag::BYTE_ESCAPE); |
| expected.back().push_back(cloud::EncodingTag::BYTES_ENDING); |
| |
| ASSERT_TRUE(strs.size() == expected.size()); |
| for (int i = 0; i < strs.size(); ++i) { |
| std::string b1; |
| std::string d1; |
| std::string_view sv(strs[i]); |
| cloud::encode_bytes(sv, &b1); |
| ASSERT_TRUE(b1.size() == expected[i].size()); |
| for (int j = 0; j < b1.size(); ++j) { |
| ASSERT_TRUE(expected[i][j] == b1[j]); |
| } |
| std::string_view b1_sv(b1); |
| ret = cloud::decode_bytes(&b1_sv, &d1); |
| ASSERT_EQ(ret, 0); |
| ASSERT_EQ(b1_sv.size(), 0); |
| ASSERT_EQ(d1.size(), strs[i].size()); |
| ASSERT_TRUE(d1 == strs[i]); |
| } |
| } |
| |
| // Other tests |
| { |
| std::string str1 = "This is"; |
| std::string str2 = "tHIS IS"; |
| // Append something strange |
| str1.push_back(static_cast<char>(0x00)); |
| str1 += "an string"; |
| str1.push_back(static_cast<char>(0xff)); |
| str2.push_back(static_cast<char>(0x00)); |
| str2 += "AN STRING"; |
| str2.push_back(static_cast<unsigned char>(0xff)); |
| |
| // Output byte array |
| std::string b1; |
| std::string b2; |
| |
| cloud::encode_bytes(str1, &b1); |
| cloud::encode_bytes(str2, &b2); |
| ASSERT_TRUE(std::memcmp(&b1[0], &b2[0], b1.size() > b2.size() ? b2.size() : b1.size()) < 0); |
| |
| std::string str11; |
| std::string str22; |
| std::string_view b1_sv(b1); |
| std::string_view b2_sv(b2); |
| cloud::decode_bytes(&b1_sv, &str11); |
| cloud::decode_bytes(&b2_sv, &str22); |
| ASSERT_TRUE(str1 == str11); |
| ASSERT_TRUE(str2 == str22); |
| } |
| } |
| |
| TEST(CodecTest, Int64CodecTest) { |
| using namespace doris::cloud; |
| int ret = 0; |
| |
| // Basic test |
| { |
| std::string out1; |
| cloud::encode_int64(10086, &out1); |
| ASSERT_EQ(out1[0], cloud::EncodingTag::POSITIVE_FIXED_INT_TAG); |
| std::cout << hex(out1) << std::endl; |
| int64_t val1 = 10010; |
| std::string_view in(out1); |
| ret = cloud::decode_int64(&in, &val1); |
| ASSERT_EQ(ret, 0); |
| ASSERT_EQ(val1, 10086); |
| |
| std::string out2; |
| cloud::encode_int64(-1001011, &out2); |
| ASSERT_EQ(out2[0], cloud::EncodingTag::NEGATIVE_FIXED_INT_TAG); |
| std::cout << hex(out2) << std::endl; |
| int64_t val2 = 10086; |
| in = out2; |
| ret = cloud::decode_int64(&in, &val2); |
| ASSERT_EQ(ret, 0); |
| ASSERT_EQ(val2, -1001011); |
| |
| // Compare lexical order |
| ASSERT_LT(out2, out1); |
| } |
| } |