blob: 008ae259ed72099c07feef804380b65a223b115b [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/utils/roaring_bitmap32.h"
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <memory>
#include <utility>
#include "gtest/gtest.h"
#include "paimon/io/byte_array_input_stream.h"
#include "paimon/result.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(RoaringBitmap32Test, TestSimple) {
RoaringBitmap32 roaring;
ASSERT_TRUE(roaring.IsEmpty());
roaring.Add(10);
roaring.Add(100);
ASSERT_TRUE(roaring.Contains(10));
ASSERT_TRUE(roaring.Contains(100));
ASSERT_FALSE(roaring.CheckedAdd(10));
ASSERT_TRUE(roaring.CheckedAdd(20));
ASSERT_TRUE(roaring.Contains(20));
ASSERT_FALSE(roaring.IsEmpty());
ASSERT_EQ("{10,20,100}", roaring.ToString());
ASSERT_EQ(3, roaring.Cardinality());
roaring.AddRange(10, 15);
ASSERT_EQ("{10,11,12,13,14,20,100}", roaring.ToString());
ASSERT_EQ(7, roaring.Cardinality());
roaring.RemoveRange(12, 20);
ASSERT_EQ("{10,11,20,100}", roaring.ToString());
ASSERT_EQ(4, roaring.Cardinality());
}
TEST(RoaringBitmap32Test, TestCompatibleWithJava) {
auto pool = GetDefaultPool();
{
RoaringBitmap32 roaring;
for (int32_t i = 0; i < 100; i++) {
roaring.Add(i);
}
for (int32_t i = 0; i < 100; i++) {
ASSERT_TRUE(roaring.Contains(i));
}
auto bytes = roaring.Serialize(pool.get());
std::vector<uint8_t> result(bytes->data(), bytes->data() + bytes->size());
std::vector<uint8_t> expected = {59, 48, 0, 0, 1, 0, 0, 99, 0, 1, 0, 0, 0, 99, 0};
ASSERT_EQ(result, expected);
RoaringBitmap32 de_roaring;
ASSERT_OK(de_roaring.Deserialize(bytes->data(), bytes->size()));
for (int32_t i = 0; i < 100; i++) {
ASSERT_TRUE(de_roaring.Contains(i));
}
}
{
RoaringBitmap32 roaring;
for (int32_t i = RoaringBitmap32::MAX_VALUE - 1; i > RoaringBitmap32::MAX_VALUE - 101;
i--) {
roaring.Add(i);
}
for (int32_t i = RoaringBitmap32::MAX_VALUE - 1; i > RoaringBitmap32::MAX_VALUE - 101;
i--) {
ASSERT_TRUE(roaring.Contains(i));
}
auto bytes = roaring.Serialize(pool.get());
std::vector<uint8_t> result(bytes->data(), bytes->data() + bytes->size());
std::vector<uint8_t> expected = {59, 48, 0, 0, 1, 255, 127, 99, 0, 1, 0, 155, 255, 99, 0};
ASSERT_EQ(result, expected);
RoaringBitmap32 de_roaring;
ASSERT_OK(de_roaring.Deserialize(bytes->data(), bytes->size()));
for (int32_t i = RoaringBitmap32::MAX_VALUE - 1; i > RoaringBitmap32::MAX_VALUE - 101;
i--) {
ASSERT_TRUE(de_roaring.Contains(i));
}
}
{
RoaringBitmap32 roaring;
for (int32_t i = 5000; i < 10000; i += 17) {
roaring.Add(i);
}
for (int32_t i = 5000; i < 10000; i += 17) {
ASSERT_TRUE(roaring.Contains(i));
}
auto bytes = roaring.Serialize(pool.get());
std::vector<uint8_t> result(bytes->data(), bytes->data() + bytes->size());
std::vector<uint8_t> expected = {
58, 48, 0, 0, 1, 0, 0, 0, 0, 0, 38, 1, 16, 0, 0, 0, 136, 19,
153, 19, 170, 19, 187, 19, 204, 19, 221, 19, 238, 19, 255, 19, 16, 20, 33, 20,
50, 20, 67, 20, 84, 20, 101, 20, 118, 20, 135, 20, 152, 20, 169, 20, 186, 20,
203, 20, 220, 20, 237, 20, 254, 20, 15, 21, 32, 21, 49, 21, 66, 21, 83, 21,
100, 21, 117, 21, 134, 21, 151, 21, 168, 21, 185, 21, 202, 21, 219, 21, 236, 21,
253, 21, 14, 22, 31, 22, 48, 22, 65, 22, 82, 22, 99, 22, 116, 22, 133, 22,
150, 22, 167, 22, 184, 22, 201, 22, 218, 22, 235, 22, 252, 22, 13, 23, 30, 23,
47, 23, 64, 23, 81, 23, 98, 23, 115, 23, 132, 23, 149, 23, 166, 23, 183, 23,
200, 23, 217, 23, 234, 23, 251, 23, 12, 24, 29, 24, 46, 24, 63, 24, 80, 24,
97, 24, 114, 24, 131, 24, 148, 24, 165, 24, 182, 24, 199, 24, 216, 24, 233, 24,
250, 24, 11, 25, 28, 25, 45, 25, 62, 25, 79, 25, 96, 25, 113, 25, 130, 25,
147, 25, 164, 25, 181, 25, 198, 25, 215, 25, 232, 25, 249, 25, 10, 26, 27, 26,
44, 26, 61, 26, 78, 26, 95, 26, 112, 26, 129, 26, 146, 26, 163, 26, 180, 26,
197, 26, 214, 26, 231, 26, 248, 26, 9, 27, 26, 27, 43, 27, 60, 27, 77, 27,
94, 27, 111, 27, 128, 27, 145, 27, 162, 27, 179, 27, 196, 27, 213, 27, 230, 27,
247, 27, 8, 28, 25, 28, 42, 28, 59, 28, 76, 28, 93, 28, 110, 28, 127, 28,
144, 28, 161, 28, 178, 28, 195, 28, 212, 28, 229, 28, 246, 28, 7, 29, 24, 29,
41, 29, 58, 29, 75, 29, 92, 29, 109, 29, 126, 29, 143, 29, 160, 29, 177, 29,
194, 29, 211, 29, 228, 29, 245, 29, 6, 30, 23, 30, 40, 30, 57, 30, 74, 30,
91, 30, 108, 30, 125, 30, 142, 30, 159, 30, 176, 30, 193, 30, 210, 30, 227, 30,
244, 30, 5, 31, 22, 31, 39, 31, 56, 31, 73, 31, 90, 31, 107, 31, 124, 31,
141, 31, 158, 31, 175, 31, 192, 31, 209, 31, 226, 31, 243, 31, 4, 32, 21, 32,
38, 32, 55, 32, 72, 32, 89, 32, 106, 32, 123, 32, 140, 32, 157, 32, 174, 32,
191, 32, 208, 32, 225, 32, 242, 32, 3, 33, 20, 33, 37, 33, 54, 33, 71, 33,
88, 33, 105, 33, 122, 33, 139, 33, 156, 33, 173, 33, 190, 33, 207, 33, 224, 33,
241, 33, 2, 34, 19, 34, 36, 34, 53, 34, 70, 34, 87, 34, 104, 34, 121, 34,
138, 34, 155, 34, 172, 34, 189, 34, 206, 34, 223, 34, 240, 34, 1, 35, 18, 35,
35, 35, 52, 35, 69, 35, 86, 35, 103, 35, 120, 35, 137, 35, 154, 35, 171, 35,
188, 35, 205, 35, 222, 35, 239, 35, 0, 36, 17, 36, 34, 36, 51, 36, 68, 36,
85, 36, 102, 36, 119, 36, 136, 36, 153, 36, 170, 36, 187, 36, 204, 36, 221, 36,
238, 36, 255, 36, 16, 37, 33, 37, 50, 37, 67, 37, 84, 37, 101, 37, 118, 37,
135, 37, 152, 37, 169, 37, 186, 37, 203, 37, 220, 37, 237, 37, 254, 37, 15, 38,
32, 38, 49, 38, 66, 38, 83, 38, 100, 38, 117, 38, 134, 38, 151, 38, 168, 38,
185, 38, 202, 38, 219, 38, 236, 38, 253, 38, 14, 39};
ASSERT_EQ(result, expected);
RoaringBitmap32 de_roaring;
ASSERT_OK(de_roaring.Deserialize(bytes->data(), bytes->size()));
for (int32_t i = 5000; i < 10000; i += 17) {
ASSERT_TRUE(de_roaring.Contains(i));
}
}
// empty
{
RoaringBitmap32 roaring;
auto bytes = roaring.Serialize(pool.get());
std::vector<uint8_t> result(bytes->data(), bytes->data() + bytes->size());
std::vector<uint8_t> expected = {58, 48, 0, 0, 0, 0, 0, 0};
ASSERT_EQ(result, expected);
RoaringBitmap32 de_roaring;
ASSERT_OK(de_roaring.Deserialize(bytes->data(), bytes->size()));
ASSERT_TRUE(de_roaring.IsEmpty());
ASSERT_FALSE(de_roaring.Contains(58));
}
}
TEST(RoaringBitmap32Test, TestDeserializeFailed) {
RoaringBitmap32 roaring;
std::vector<char> invalid_bytes = {0, 100};
ASSERT_NOK_WITH_MSG(roaring.Deserialize(invalid_bytes.data(), invalid_bytes.size()),
"catch exception in Deserialize() of RoaringBitmap32");
}
TEST(RoaringBitmap32Test, TestAndOr) {
RoaringBitmap32 roaring1 = RoaringBitmap32::From({10, 100});
RoaringBitmap32 roaring2 = RoaringBitmap32::From({20, 100, 200});
auto and_roaring = RoaringBitmap32::And(roaring1, roaring2);
ASSERT_EQ(and_roaring, RoaringBitmap32::From({100}));
auto or_roaring = RoaringBitmap32::Or(roaring1, roaring2);
ASSERT_EQ(or_roaring, RoaringBitmap32::From({10, 20, 100, 200}));
}
TEST(RoaringBitmap32Test, TestAssignAndMove) {
RoaringBitmap32 roaring1 = RoaringBitmap32::From({10, 100});
RoaringBitmap32 roaring2 = roaring1;
ASSERT_EQ(roaring1, roaring2);
ASSERT_FALSE(roaring1.IsEmpty());
ASSERT_FALSE(roaring2.IsEmpty());
RoaringBitmap32 roaring3 = std::move(roaring1);
ASSERT_EQ(roaring3, roaring2);
ASSERT_FALSE(roaring1.roaring_bitmap_); // NOLINT(bugprone-use-after-move)
roaring3.Add(20);
ASSERT_FALSE(roaring3 == roaring2);
roaring3 = roaring2;
ASSERT_EQ(roaring3, roaring2);
roaring2 = std::move(roaring3);
ASSERT_FALSE(roaring3.roaring_bitmap_); // NOLINT(bugprone-use-after-move)
ASSERT_EQ("{10,100}", roaring2.ToString());
roaring3 = roaring2;
ASSERT_EQ("{10,100}", roaring3.ToString());
roaring3 = std::move(roaring2);
ASSERT_EQ("{10,100}", roaring3.ToString());
ASSERT_FALSE(roaring2.roaring_bitmap_); // NOLINT(bugprone-use-after-move)
}
TEST(RoaringBitmap32Test, TestFastUnion) {
RoaringBitmap32 roaring1 = RoaringBitmap32::From({10, 100});
RoaringBitmap32 roaring2 = RoaringBitmap32::From({1, 10, 20, 100, 300});
RoaringBitmap32 roaring3 = RoaringBitmap32::From({2, 100, 800});
RoaringBitmap32 res = RoaringBitmap32::FastUnion({&roaring1, &roaring2, &roaring3});
ASSERT_EQ(res, RoaringBitmap32::From({1, 2, 10, 20, 100, 300, 800}));
std::vector<RoaringBitmap32> roarings = {roaring1, roaring2, roaring3};
RoaringBitmap32 res2 = RoaringBitmap32::FastUnion(roarings);
ASSERT_EQ(res2, RoaringBitmap32::From({1, 2, 10, 20, 100, 300, 800}));
}
TEST(RoaringBitmap32Test, TestFlip) {
RoaringBitmap32 roaring = RoaringBitmap32::From({1, 2, 4});
roaring.Flip(0, 5);
ASSERT_EQ(roaring, RoaringBitmap32::From({0, 3}));
}
TEST(RoaringBitmap32Test, TestAndNot) {
RoaringBitmap32 roaring1 = RoaringBitmap32::From({10, 20, 100, 200});
RoaringBitmap32 roaring2 = RoaringBitmap32::From({5, 20, 80, 200, 250});
auto andnot_roaring = RoaringBitmap32::AndNot(roaring1, roaring2);
ASSERT_EQ(andnot_roaring, RoaringBitmap32::From({10, 100}));
}
TEST(RoaringBitmap32Test, TestIterator) {
RoaringBitmap32 roaring = RoaringBitmap32::From({10, 20});
auto iter = roaring.Begin();
ASSERT_EQ(*iter, 10);
auto iter2 = ++iter;
ASSERT_EQ(*iter, 20);
ASSERT_EQ(*iter2, 20);
++iter;
ASSERT_EQ(iter, roaring.End());
iter = roaring.EqualOrLarger(5);
ASSERT_EQ(*iter, 10);
iter = roaring.EqualOrLarger(10);
ASSERT_EQ(*iter, 10);
iter = roaring.EqualOrLarger(15);
ASSERT_EQ(*iter, 20);
iter = roaring.EqualOrLarger(20);
ASSERT_EQ(*iter, 20);
iter = roaring.EqualOrLarger(100);
ASSERT_EQ(iter, roaring.End());
}
TEST(RoaringBitmap32Test, TestIteratorAssignAndMove) {
RoaringBitmap32 roaring = RoaringBitmap32::From({10, 100, 200});
auto iter1 = roaring.EqualOrLarger(10);
auto iter2 = iter1;
ASSERT_EQ(iter1, iter2);
ASSERT_EQ(10, *iter1);
ASSERT_EQ(10, *iter2);
auto iter3 = std::move(iter1);
ASSERT_EQ(iter2, iter3);
ASSERT_FALSE(iter1.iterator_); // NOLINT(bugprone-use-after-move)
++iter3;
ASSERT_NE(iter3, iter2);
ASSERT_EQ(100, *iter3);
iter3 = iter2;
ASSERT_EQ(iter3, iter2);
iter2 = std::move(iter3);
ASSERT_FALSE(iter3.iterator_); // NOLINT(bugprone-use-after-move)
ASSERT_EQ(10, *iter2);
iter3 = iter2;
ASSERT_EQ(iter2, iter3);
ASSERT_EQ(10, *iter2);
ASSERT_EQ(10, *iter3);
iter3 = std::move(iter2);
ASSERT_EQ(10, *iter3);
ASSERT_FALSE(iter2.iterator_); // NOLINT(bugprone-use-after-move)
}
TEST(RoaringBitmap32Test, TestDeserializeFromInputStream) {
RoaringBitmap32 roaring1 = RoaringBitmap32::From({10, 20, 100});
RoaringBitmap32 roaring2 = RoaringBitmap32::From({10, 50, 150, 200});
RoaringBitmap32 roaring3 = RoaringBitmap32::From({2, 4, 6, 1000});
size_t len1 = roaring1.GetSizeInBytes();
size_t len2 = roaring2.GetSizeInBytes();
size_t len3 = roaring3.GetSizeInBytes();
auto pool = GetDefaultPool();
auto bytes1 = roaring1.Serialize(pool.get());
ASSERT_EQ(bytes1->size(), len1);
auto bytes2 = roaring2.Serialize(pool.get());
ASSERT_EQ(bytes2->size(), len2);
auto bytes3 = roaring3.Serialize(pool.get());
ASSERT_EQ(bytes3->size(), len3);
auto concat_bytes = std::make_shared<Bytes>(len1 + len2 + len3, pool.get());
memcpy(concat_bytes->data(), bytes1->data(), len1);
memcpy(concat_bytes->data() + len1, bytes2->data(), len2);
memcpy(concat_bytes->data() + len1 + len2, bytes3->data(), len3);
ByteArrayInputStream byte_array_input_stream(concat_bytes->data(), concat_bytes->size());
RoaringBitmap32 de_roaring1;
ASSERT_OK(de_roaring1.Deserialize(&byte_array_input_stream));
ASSERT_EQ(de_roaring1, roaring1);
ASSERT_EQ(byte_array_input_stream.GetPos().value(), len1);
RoaringBitmap32 de_roaring2;
ASSERT_OK(de_roaring2.Deserialize(&byte_array_input_stream));
ASSERT_EQ(de_roaring2, roaring2);
ASSERT_EQ(byte_array_input_stream.GetPos().value(), len1 + len2);
RoaringBitmap32 de_roaring3;
ASSERT_OK(de_roaring3.Deserialize(&byte_array_input_stream));
ASSERT_EQ(de_roaring3, roaring3);
ASSERT_EQ(byte_array_input_stream.GetPos().value(), len1 + len2 + len3);
}
TEST(RoaringBitmap32Test, TestHighCardinality) {
std::srand(time(nullptr));
auto pool = GetDefaultPool();
RoaringBitmap32 roaring;
for (int32_t i = 0; i < 10000; i++) {
roaring.Add(std::rand());
}
auto bytes = roaring.Serialize(pool.get());
RoaringBitmap32 de_roaring;
ASSERT_OK(de_roaring.Deserialize(bytes->data(), bytes->size()));
ASSERT_EQ(roaring, de_roaring);
}
TEST(RoaringBitmap32Test, TestInplaceAndOr) {
RoaringBitmap32 roaring = RoaringBitmap32::From({0, 1, 100});
RoaringBitmap32 roaring1 = RoaringBitmap32::From({1, 2, 5, 200});
roaring |= roaring1;
ASSERT_EQ(roaring.ToString(), "{0,1,2,5,100,200}");
RoaringBitmap32 roaring2 = RoaringBitmap32::From({1, 2, 3, 5, 100, 500});
roaring &= roaring2;
ASSERT_EQ(roaring.ToString(), "{1,2,5,100}");
RoaringBitmap32 roaring3 = RoaringBitmap32::From({2, 100});
roaring -= roaring3;
ASSERT_EQ(roaring.ToString(), "{1,5}");
}
TEST(RoaringBitmap32Test, TestContainsAny) {
RoaringBitmap32 roaring = RoaringBitmap32::From({10, 11, 100});
ASSERT_TRUE(roaring.ContainsAny(10, 11));
ASSERT_TRUE(roaring.ContainsAny(10, 20));
ASSERT_TRUE(roaring.ContainsAny(10, 101));
ASSERT_TRUE(roaring.ContainsAny(20, 200));
ASSERT_TRUE(roaring.ContainsAny(5, 11));
ASSERT_FALSE(roaring.ContainsAny(5, 10));
ASSERT_FALSE(roaring.ContainsAny(20, 100));
ASSERT_FALSE(roaring.ContainsAny(20, 30));
ASSERT_FALSE(roaring.ContainsAny(500, 520));
}
} // namespace paimon::test