blob: 510ef4e8dcce525bea937b97564583c9c8911cc3 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/utils/roaring_bitmap64.h"
#include <cassert>
#include <memory>
#include <utility>
#include "paimon/fs/file_system.h"
#include "paimon/io/byte_array_input_stream.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/result.h"
#include "roaring.hh" // NOLINT(build/include_subdir)
namespace paimon {
namespace {
roaring::Roaring64Map::const_iterator& GetIterator(void* iter) {
return *(static_cast<roaring::Roaring64Map::const_iterator*>(iter));
}
roaring::Roaring64Map& GetRoaringBitmap(void* bitmap) {
return *(static_cast<roaring::Roaring64Map*>(bitmap));
}
} // namespace
RoaringBitmap64::Iterator::Iterator(const RoaringBitmap64& bitmap) {
iterator_ =
new roaring::Roaring64Map::const_iterator(GetRoaringBitmap(bitmap.roaring_bitmap_).begin());
}
RoaringBitmap64::Iterator::~Iterator() {
if (iterator_) {
delete static_cast<roaring::Roaring64Map::const_iterator*>(iterator_);
}
}
RoaringBitmap64::Iterator::Iterator(const RoaringBitmap64::Iterator& other) noexcept {
*this = other;
}
RoaringBitmap64::Iterator& RoaringBitmap64::Iterator::operator=(
const RoaringBitmap64::Iterator& other) noexcept {
if (&other == this) {
return *this;
}
if (!iterator_) {
iterator_ = new roaring::Roaring64Map::const_iterator(GetIterator(other.iterator_));
} else {
GetIterator(iterator_) = GetIterator(other.iterator_);
}
return *this;
}
RoaringBitmap64::Iterator::Iterator(RoaringBitmap64::Iterator&& other) noexcept {
*this = std::move(other);
}
RoaringBitmap64::Iterator& RoaringBitmap64::Iterator::operator=(
RoaringBitmap64::Iterator&& other) noexcept {
if (&other == this) {
return *this;
}
if (iterator_) {
delete static_cast<roaring::Roaring64Map::const_iterator*>(iterator_);
}
iterator_ = other.iterator_;
other.iterator_ = nullptr;
return *this;
}
int64_t RoaringBitmap64::Iterator::operator*() const {
return *GetIterator(iterator_);
}
RoaringBitmap64::Iterator& RoaringBitmap64::Iterator::operator++() {
++GetIterator(iterator_);
return *this;
}
bool RoaringBitmap64::Iterator::operator==(const Iterator& other) const {
if (&other == this) {
return true;
}
assert(iterator_ && other.iterator_);
return GetIterator(iterator_) == GetIterator(other.iterator_);
}
bool RoaringBitmap64::Iterator::operator!=(const Iterator& other) const {
return !(*this == other);
}
void RoaringBitmap64::Iterator::EqualOrLarger(int64_t value) {
[[maybe_unused]] bool _ = GetIterator(iterator_).move(value);
}
RoaringBitmap64::RoaringBitmap64() {
roaring_bitmap_ = new roaring::Roaring64Map();
}
RoaringBitmap64::~RoaringBitmap64() {
if (roaring_bitmap_) {
delete static_cast<roaring::Roaring64Map*>(roaring_bitmap_);
}
}
RoaringBitmap64::RoaringBitmap64(const RoaringBitmap64& other) noexcept {
*this = other;
}
RoaringBitmap64& RoaringBitmap64::operator=(const RoaringBitmap64& other) noexcept {
if (&other == this) {
return *this;
}
if (!roaring_bitmap_) {
roaring_bitmap_ = new roaring::Roaring64Map(GetRoaringBitmap(other.roaring_bitmap_));
} else {
GetRoaringBitmap(roaring_bitmap_) = GetRoaringBitmap(other.roaring_bitmap_);
}
return *this;
}
RoaringBitmap64::RoaringBitmap64(const RoaringBitmap32& other) noexcept {
*this = other;
}
RoaringBitmap64& RoaringBitmap64::operator=(const RoaringBitmap32& other) noexcept {
auto bitmap32 = static_cast<roaring::Roaring*>(other.roaring_bitmap_);
if (!roaring_bitmap_) {
roaring_bitmap_ = new roaring::Roaring64Map(*bitmap32);
} else {
GetRoaringBitmap(roaring_bitmap_) = roaring::Roaring64Map(*bitmap32);
}
return *this;
}
RoaringBitmap64::RoaringBitmap64(RoaringBitmap64&& other) noexcept {
*this = std::move(other);
}
RoaringBitmap64& RoaringBitmap64::operator=(RoaringBitmap64&& other) noexcept {
if (&other == this) {
return *this;
}
if (roaring_bitmap_) {
delete static_cast<roaring::Roaring64Map*>(roaring_bitmap_);
}
roaring_bitmap_ = other.roaring_bitmap_;
other.roaring_bitmap_ = nullptr;
return *this;
}
RoaringBitmap64& RoaringBitmap64::operator|=(const RoaringBitmap64& other) {
GetRoaringBitmap(roaring_bitmap_) |= GetRoaringBitmap(other.roaring_bitmap_);
return *this;
}
RoaringBitmap64& RoaringBitmap64::operator&=(const RoaringBitmap64& other) {
GetRoaringBitmap(roaring_bitmap_) &= GetRoaringBitmap(other.roaring_bitmap_);
return *this;
}
RoaringBitmap64& RoaringBitmap64::operator-=(const RoaringBitmap64& other) {
GetRoaringBitmap(roaring_bitmap_) -= GetRoaringBitmap(other.roaring_bitmap_);
return *this;
}
void RoaringBitmap64::Add(int64_t x) {
GetRoaringBitmap(roaring_bitmap_).add(static_cast<uint64_t>(x));
}
void RoaringBitmap64::AddMany(size_t n, const int64_t* values) {
if (n == 0 || !values) {
return;
}
auto& bitmap = GetRoaringBitmap(roaring_bitmap_);
// Bucket values by their high-32 bits in a single pass. K (the number of
// distinct buckets) is typically very small (often 1, rarely > a handful).
struct Bucket {
uint32_t high;
std::vector<uint32_t> lows;
explicit Bucket(uint32_t high_val) : high(high_val) {}
};
std::vector<Bucket> buckets;
buckets.reserve(4);
for (size_t i = 0; i < n; ++i) {
const auto v = static_cast<uint64_t>(values[i]);
const auto high = static_cast<uint32_t>(v >> 32);
const auto low = static_cast<uint32_t>(v);
Bucket* target = nullptr;
// Fast path: the previous bucket is overwhelmingly likely to match for
// sequential row-id streams produced by a B-tree scan.
if (!buckets.empty() && buckets.back().high == high) {
target = &buckets.back();
} else {
for (auto& b : buckets) {
if (b.high == high) {
target = &b;
break;
}
}
if (target == nullptr) {
buckets.emplace_back(high);
// Reserve generously on first encounter; we may end up using
// more memory than necessary if K turns out to be > 1, but
// this avoids repeated grow-and-copy in the common K == 1 case.
buckets.back().lows.reserve(buckets.size() == 1 ? n : n / 4);
target = &buckets.back();
}
}
target->lows.push_back(low);
}
// Hand each bucket to the inner 32-bit Roaring's true-batch addMany,
// which performs container-level bulk insertion.
for (const auto& b : buckets) {
bitmap.getOrCreateInner(b.high).addMany(b.lows.size(), b.lows.data());
}
}
void RoaringBitmap64::AddRange(int64_t min, int64_t max) {
GetRoaringBitmap(roaring_bitmap_).addRange(min, max);
}
void RoaringBitmap64::RemoveRange(int64_t min, int64_t max) {
GetRoaringBitmap(roaring_bitmap_).removeRange(min, max);
}
bool RoaringBitmap64::ContainsAny(int64_t min, int64_t max) const {
auto iter = EqualOrLarger(min);
if (iter != End() && *iter < max) {
return true;
}
return false;
}
bool RoaringBitmap64::CheckedAdd(int64_t x) {
if (Contains(x)) {
return false;
}
Add(x);
return true;
}
int64_t RoaringBitmap64::Cardinality() const {
return GetRoaringBitmap(roaring_bitmap_).cardinality();
}
bool RoaringBitmap64::Contains(int64_t x) const {
return GetRoaringBitmap(roaring_bitmap_).contains(static_cast<uint64_t>(x));
}
bool RoaringBitmap64::IsEmpty() const {
return GetRoaringBitmap(roaring_bitmap_).isEmpty();
}
size_t RoaringBitmap64::GetSizeInBytes() const {
return GetRoaringBitmap(roaring_bitmap_).getSizeInBytes();
}
void RoaringBitmap64::Flip(int64_t min, int64_t max) {
GetRoaringBitmap(roaring_bitmap_).flip(min, max);
}
bool RoaringBitmap64::operator==(const RoaringBitmap64& other) const noexcept {
if (this == &other) {
return true;
}
assert(roaring_bitmap_ && other.roaring_bitmap_);
return GetRoaringBitmap(roaring_bitmap_) == GetRoaringBitmap(other.roaring_bitmap_);
}
PAIMON_UNIQUE_PTR<Bytes> RoaringBitmap64::Serialize(MemoryPool* pool) const {
GetRoaringBitmap(roaring_bitmap_).runOptimize();
auto& bitmap = GetRoaringBitmap(roaring_bitmap_);
// Use default pool if no pool is provided
if (pool == nullptr) {
pool = GetDefaultPool().get();
}
auto bytes = Bytes::AllocateBytes(bitmap.getSizeInBytes(), pool);
bitmap.write(bytes->data());
return bytes;
}
Status RoaringBitmap64::Deserialize(ByteArrayInputStream* input_stream) {
const char* data = input_stream->GetRawData();
PAIMON_ASSIGN_OR_RAISE(int64_t pos, input_stream->GetPos());
PAIMON_ASSIGN_OR_RAISE(int64_t total_length, input_stream->Length());
try {
GetRoaringBitmap(roaring_bitmap_) =
roaring::Roaring64Map::readSafe(data, /*maxbytes*/ total_length - pos);
} catch (...) {
return Status::Invalid("catch exception in Deserialize() of RoaringBitmap64");
}
return input_stream->Seek(GetRoaringBitmap(roaring_bitmap_).getSizeInBytes(),
SeekOrigin::FS_SEEK_CUR);
}
Status RoaringBitmap64::Deserialize(const char* begin, size_t length) {
try {
GetRoaringBitmap(roaring_bitmap_) = roaring::Roaring64Map::readSafe(begin, length);
} catch (...) {
return Status::Invalid("catch exception in Deserialize() of RoaringBitmap64");
}
return Status::OK();
}
std::string RoaringBitmap64::ToString() const {
return GetRoaringBitmap(roaring_bitmap_).toString();
}
RoaringBitmap64 RoaringBitmap64::And(const RoaringBitmap64& lhs, const RoaringBitmap64& rhs) {
RoaringBitmap64 res;
GetRoaringBitmap(res.roaring_bitmap_) =
(GetRoaringBitmap(lhs.roaring_bitmap_) & GetRoaringBitmap(rhs.roaring_bitmap_));
return res;
}
RoaringBitmap64 RoaringBitmap64::Or(const RoaringBitmap64& lhs, const RoaringBitmap64& rhs) {
RoaringBitmap64 res;
GetRoaringBitmap(res.roaring_bitmap_) =
(GetRoaringBitmap(lhs.roaring_bitmap_) | GetRoaringBitmap(rhs.roaring_bitmap_));
return res;
}
RoaringBitmap64 RoaringBitmap64::AndNot(const RoaringBitmap64& lhs, const RoaringBitmap64& rhs) {
RoaringBitmap64 res;
GetRoaringBitmap(res.roaring_bitmap_) =
(GetRoaringBitmap(lhs.roaring_bitmap_) - GetRoaringBitmap(rhs.roaring_bitmap_));
return res;
}
RoaringBitmap64 RoaringBitmap64::From(const std::vector<int64_t>& values) {
RoaringBitmap64 res;
for (const auto& value : values) {
res.Add(value);
}
return res;
}
RoaringBitmap64 RoaringBitmap64::FastUnion(const std::vector<const RoaringBitmap64*>& inputs) {
std::vector<roaring::Roaring64Map*> roaring_inputs;
roaring_inputs.reserve(inputs.size());
for (const auto* roaring : inputs) {
roaring_inputs.push_back(&GetRoaringBitmap(roaring->roaring_bitmap_));
}
RoaringBitmap64 res;
GetRoaringBitmap(res.roaring_bitmap_) = roaring::Roaring64Map::fastunion(
roaring_inputs.size(), const_cast<const roaring::Roaring64Map**>(roaring_inputs.data()));
return res;
}
RoaringBitmap64 RoaringBitmap64::FastUnion(const std::vector<RoaringBitmap64>& inputs) {
std::vector<const RoaringBitmap64*> inputs_ptr;
inputs_ptr.reserve(inputs.size());
for (const auto& bitmap : inputs) {
inputs_ptr.push_back(&bitmap);
}
return FastUnion(inputs_ptr);
}
RoaringBitmap64::Iterator RoaringBitmap64::Begin() const {
return RoaringBitmap64::Iterator(*this);
}
RoaringBitmap64::Iterator RoaringBitmap64::End() const {
RoaringBitmap64::Iterator iter(*this);
GetIterator(iter.iterator_) = GetRoaringBitmap(roaring_bitmap_).end();
return iter;
}
RoaringBitmap64::Iterator RoaringBitmap64::EqualOrLarger(int64_t key) const {
RoaringBitmap64::Iterator iter(*this);
bool not_end = GetIterator(iter.iterator_).move(key);
if (!not_end) {
return End();
}
return iter;
}
} // namespace paimon