| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <parallel_hashmap/btree.h> |
| #include <parallel_hashmap/phmap.h> |
| |
| #include <algorithm> |
| #include <cstdarg> |
| #include <cstdint> |
| #include <cstdio> |
| #include <limits> |
| #include <map> |
| #include <memory> |
| #include <new> |
| #include <numeric> |
| #include <roaring/roaring.hh> |
| #include <set> |
| #include <stdexcept> |
| #include <string> |
| #include <utility> |
| |
| #include "common/config.h" |
| #include "common/exception.h" |
| #include "common/logging.h" |
| #include "gutil/integral_types.h" |
| #include "udf/udf.h" |
| #include "util/coding.h" |
| #include "vec/common/pod_array.h" |
| #include "vec/common/pod_array_fwd.h" |
| namespace doris { |
| |
| // serialized bitmap := TypeCode(1), Payload |
| // The format of payload depends on value of TypeCode which is defined below |
| struct BitmapTypeCode { |
| enum type { |
| // An empty bitmap. Payload is 0 byte. |
| // added in 0.11 |
| EMPTY = 0, |
| // A bitmap containing only one element that is in [0, UINT32_MAX] |
| // Payload := UInt32LittleEndian(4 byte) |
| // added in 0.11 |
| SINGLE32 = 1, |
| // A bitmap whose maximum element is in [0, UINT32_MAX] |
| // Payload := the standard RoaringBitmap format described by |
| // https://github.com/RoaringBitmap/RoaringFormatSpec/ |
| // added in 0.11 |
| BITMAP32 = 2, |
| // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX] |
| // Payload := UInt64LittleEndian(8 byte) |
| // added in 0.12 |
| SINGLE64 = 3, |
| // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX]. |
| // |
| // To support 64-bits elements, all elements with the same high 32 bits are stored in a |
| // RoaringBitmap containing only the lower 32 bits. Thus we could use |
| // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints. |
| // |
| // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below |
| // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring |
| // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte) |
| // - MapValue := the standard RoaringBitmap format |
| // |
| // added in 0.12 |
| BITMAP64 = 4, |
| SET = 5, // V1 |
| SET_V2 = 10, |
| BITMAP32_V2 = 12, |
| BITMAP64_V2 = 13, |
| TYPE_MAX |
| }; |
| Status static inline validate(int bitmap_type) { |
| if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) { |
| std::string err_msg = |
| fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}", |
| BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type); |
| LOG(ERROR) << err_msg; |
| return Status::Corruption(err_msg); |
| } |
| return Status::OK(); |
| } |
| }; |
| |
| namespace detail { |
| |
| class Roaring64MapSetBitForwardIterator; |
| |
| // Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh |
| // What we change includes |
| // - a custom serialization format is used inside read()/write()/getSizeInBytes() |
| // - added clear() and is32BitsEnough() |
| class Roaring64Map { |
| public: |
| /** |
| * Create an empty bitmap |
| */ |
| Roaring64Map() = default; |
| |
| /** |
| * Construct a bitmap from a list of 32-bit integer values. |
| */ |
| Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); } |
| |
| /** |
| * Construct a bitmap from a list of 64-bit integer values. |
| */ |
| Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); } |
| |
| /** |
| * Construct a 64-bit map from a 32-bit one |
| */ |
| explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); } |
| |
| Roaring64Map(const Roaring64Map& r) = default; |
| |
| Roaring64Map(Roaring64Map&& r) = default; |
| |
| /** |
| * Assignment operator. |
| */ |
| Roaring64Map& operator=(const Roaring64Map& r) { |
| roarings = r.roarings; |
| return *this; |
| } |
| |
| /** |
| * Add value x |
| * |
| */ |
| void add(uint32_t x) { |
| roarings[0].add(x); |
| roarings[0].setCopyOnWrite(copyOnWrite); |
| } |
| void add(uint64_t x) { |
| roarings[highBytes(x)].add(lowBytes(x)); |
| roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); |
| } |
| |
| template <typename T> |
| void addMany(size_t n_args, const T* vals) { |
| if constexpr (sizeof(T) == sizeof(uint32_t)) { |
| auto& roaring = roarings[0]; |
| roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals)); |
| roaring.setCopyOnWrite(copyOnWrite); |
| } else if constexpr (sizeof(T) < sizeof(uint32_t)) { |
| auto& roaring = roarings[0]; |
| std::vector<uint32_t> values(n_args); |
| for (size_t i = 0; i != n_args; ++i) { |
| values[i] = uint32_t(vals[i]); |
| } |
| roaring.addMany(n_args, values.data()); |
| roaring.setCopyOnWrite(copyOnWrite); |
| } else { |
| for (size_t lcv = 0; lcv < n_args; lcv++) { |
| roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); |
| roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); |
| } |
| } |
| } |
| |
| void addMany(size_t n_args, const uint64_t* vals) { |
| for (size_t lcv = 0; lcv < n_args; lcv++) { |
| roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); |
| roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); |
| } |
| } |
| |
| /** |
| * Remove value x |
| * |
| */ |
| void remove(uint32_t x) { roarings[0].remove(x); } |
| void remove(uint64_t x) { |
| auto roaring_iter = roarings.find(highBytes(x)); |
| if (roaring_iter != roarings.cend()) { |
| roaring_iter->second.remove(lowBytes(x)); |
| } |
| } |
| /** |
| * Return the largest value (if not empty) |
| * |
| */ |
| uint64_t maximum() const { |
| for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend(); |
| ++roaring_iter) { |
| if (!roaring_iter->second.isEmpty()) { |
| return uniteBytes(roaring_iter->first, roaring_iter->second.maximum()); |
| } |
| } |
| // we put std::numeric_limits<>::max/min in parenthesis |
| // to avoid a clash with the Windows.h header under Windows |
| return (std::numeric_limits<uint64_t>::min)(); |
| } |
| |
| /** |
| * Return the smallest value (if not empty) |
| * |
| */ |
| uint64_t minimum() const { |
| for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend(); |
| ++roaring_iter) { |
| if (!roaring_iter->second.isEmpty()) { |
| return uniteBytes(roaring_iter->first, roaring_iter->second.minimum()); |
| } |
| } |
| // we put std::numeric_limits<>::max/min in parenthesis |
| // to avoid a clash with the Windows.h header under Windows |
| return (std::numeric_limits<uint64_t>::max)(); |
| } |
| |
| /** |
| * Check if value x is present |
| */ |
| bool contains(uint32_t x) const { |
| return roarings.count(0) == 0 ? false : roarings.at(0).contains(x); |
| } |
| bool contains(uint64_t x) const { |
| return roarings.count(highBytes(x)) == 0 ? false |
| : roarings.at(highBytes(x)).contains(lowBytes(x)); |
| } |
| |
| /** |
| * Compute the intersection between the current bitmap and the provided |
| * bitmap, |
| * writing the result in the current bitmap. The provided bitmap is not |
| * modified. |
| */ |
| Roaring64Map& operator&=(const Roaring64Map& r) { |
| for (auto& map_entry : roarings) { |
| if (r.roarings.count(map_entry.first) == 1) { |
| map_entry.second &= r.roarings.at(map_entry.first); |
| } else { |
| map_entry.second = roaring::Roaring(); |
| } |
| } |
| return *this; |
| } |
| |
| /** |
| * Compute the difference between the current bitmap and the provided |
| * bitmap, |
| * writing the result in the current bitmap. The provided bitmap is not |
| * modified. |
| */ |
| Roaring64Map& operator-=(const Roaring64Map& r) { |
| for (auto& map_entry : roarings) { |
| if (r.roarings.count(map_entry.first) == 1) { |
| map_entry.second -= r.roarings.at(map_entry.first); |
| } |
| } |
| return *this; |
| } |
| |
| /** |
| * Compute the union between the current bitmap and the provided bitmap, |
| * writing the result in the current bitmap. The provided bitmap is not |
| * modified. |
| * |
| * See also the fastunion function to aggregate many bitmaps more quickly. |
| */ |
| Roaring64Map& operator|=(const Roaring64Map& r) { |
| for (const auto& map_entry : r.roarings) { |
| if (roarings.count(map_entry.first) == 0) { |
| roarings[map_entry.first] = map_entry.second; |
| roarings[map_entry.first].setCopyOnWrite(copyOnWrite); |
| } else { |
| roarings[map_entry.first] |= map_entry.second; |
| } |
| } |
| return *this; |
| } |
| |
| /** |
| * Compute the symmetric union between the current bitmap and the provided |
| * bitmap, |
| * writing the result in the current bitmap. The provided bitmap is not |
| * modified. |
| */ |
| Roaring64Map& operator^=(const Roaring64Map& r) { |
| for (const auto& map_entry : r.roarings) { |
| if (roarings.count(map_entry.first) == 0) { |
| roarings[map_entry.first] = map_entry.second; |
| roarings[map_entry.first].setCopyOnWrite(copyOnWrite); |
| } else { |
| roarings[map_entry.first] ^= map_entry.second; |
| } |
| } |
| return *this; |
| } |
| |
| /** |
| * Exchange the content of this bitmap with another. |
| */ |
| void swap(Roaring64Map& r) { roarings.swap(r.roarings); } |
| |
| /** |
| * Get the cardinality of the bitmap (number of elements). |
| * Throws std::length_error in the special case where the bitmap is full |
| * (cardinality() == 2^64). Check isFull() before calling to avoid |
| * exception. |
| */ |
| uint64_t cardinality() const { |
| if (isFull()) { |
| throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, |
| "bitmap is full, cardinality is 2^64, " |
| "unable to represent in a 64-bit integer"); |
| } |
| return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0, |
| [](uint64_t previous, |
| const std::pair<const uint32_t, roaring::Roaring>& map_entry) { |
| return previous + map_entry.second.cardinality(); |
| }); |
| } |
| /** |
| * Computes the size of the intersection between two bitmaps. |
| * |
| */ |
| uint64_t andCardinality(const Roaring64Map& r) const { |
| uint64_t card = 0; |
| for (const auto& map_entry : roarings) { |
| if (r.roarings.count(map_entry.first) == 1) { |
| card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first)); |
| } |
| } |
| return card; |
| } |
| |
| /** |
| * Computes the size of the union between two bitmaps. |
| * |
| */ |
| uint64_t orCardinality(const Roaring64Map& r) const { |
| uint64_t card = 0; |
| for (const auto& map_entry : roarings) { |
| if (r.roarings.count(map_entry.first) == 0) { |
| card += map_entry.second.cardinality(); |
| } |
| } |
| for (const auto& map_entry : r.roarings) { |
| if (roarings.count(map_entry.first) == 0) { |
| card += map_entry.second.cardinality(); |
| } else { |
| card += roarings.at(map_entry.first).or_cardinality(map_entry.second); |
| } |
| } |
| return card; |
| } |
| |
| /** |
| * Computes the size of the difference (andnot) between two bitmaps. |
| * r1.cardinality - (r1 & r2).cardinality |
| */ |
| uint64_t andnotCardinality(const Roaring64Map& r) const { |
| uint64_t card = 0; |
| for (const auto& map_entry : roarings) { |
| card += map_entry.second.cardinality(); |
| if (r.roarings.count(map_entry.first) == 1) { |
| card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second); |
| } |
| } |
| return card; |
| } |
| |
| /** |
| * Computes the size of the symmetric difference (andnot) between two |
| * bitmaps. |
| * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality |
| */ |
| uint64_t xorCardinality(const Roaring64Map& r) const { |
| uint64_t card = 0; |
| for (const auto& map_entry : roarings) { |
| card += map_entry.second.cardinality(); |
| } |
| for (const auto& map_entry : r.roarings) { |
| card += map_entry.second.cardinality(); |
| if (roarings.count(map_entry.first) == 1) { |
| card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second); |
| } |
| } |
| return card; |
| } |
| |
| /** |
| * Returns true if the bitmap is empty (cardinality is zero). |
| */ |
| bool isEmpty() const { |
| return std::all_of(roarings.cbegin(), roarings.cend(), |
| [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) { |
| return map_entry.second.isEmpty(); |
| }); |
| } |
| |
| /** |
| * Returns true if the bitmap is full (cardinality is max uint64_t + 1). |
| */ |
| bool isFull() const { |
| // only bother to check if map is fully saturated |
| // |
| // we put std::numeric_limits<>::max/min in parenthesis |
| // to avoid a clash with the Windows.h header under Windows |
| return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1 |
| ? std::all_of(roarings.cbegin(), roarings.cend(), |
| [](const std::pair<const uint32_t, roaring::Roaring>& |
| roaring_map_entry) { |
| // roarings within map are saturated if cardinality |
| // is uint32_t max + 1 |
| return roaring_map_entry.second.cardinality() == |
| ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + |
| 1; |
| }) |
| : false; |
| } |
| |
| /** |
| * Return true if the two bitmaps contain the same elements. |
| */ |
| bool operator==(const Roaring64Map& r) const { |
| // we cannot use operator == on the map because either side may contain |
| // empty Roaring Bitmaps |
| auto lhs_iter = roarings.cbegin(); |
| auto lhs_cend = roarings.cend(); |
| auto rhs_iter = r.roarings.cbegin(); |
| auto rhs_cend = r.roarings.cend(); |
| while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) { |
| auto lhs_key = lhs_iter->first; |
| auto rhs_key = rhs_iter->first; |
| const auto& lhs_map = lhs_iter->second; |
| const auto& rhs_map = rhs_iter->second; |
| if (lhs_map.isEmpty()) { |
| ++lhs_iter; |
| continue; |
| } |
| if (rhs_map.isEmpty()) { |
| ++rhs_iter; |
| continue; |
| } |
| if (!(lhs_key == rhs_key)) { |
| return false; |
| } |
| if (!(lhs_map == rhs_map)) { |
| return false; |
| } |
| ++lhs_iter; |
| ++rhs_iter; |
| } |
| while (lhs_iter != lhs_cend) { |
| if (!lhs_iter->second.isEmpty()) { |
| return false; |
| } |
| ++lhs_iter; |
| } |
| while (rhs_iter != rhs_cend) { |
| if (!rhs_iter->second.isEmpty()) { |
| return false; |
| } |
| ++rhs_iter; |
| } |
| return true; |
| } |
| |
| /** convert array and bitmap containers to run containers when it is more |
| * efficient; |
| * also convert from run containers when more space efficient. Returns |
| * true if the result has at least one run container. |
| * Additional savings might be possible by calling shrinkToFit(). |
| */ |
| bool runOptimize() { |
| return std::accumulate( |
| roarings.begin(), roarings.end(), true, |
| [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) { |
| return map_entry.second.runOptimize() && previous; |
| }); |
| } |
| |
| /** |
| * If needed, reallocate memory to shrink the memory usage. Returns |
| * the number of bytes saved. |
| */ |
| size_t shrinkToFit() { |
| size_t savedBytes = 0; |
| auto iter = roarings.begin(); |
| while (iter != roarings.cend()) { |
| if (iter->second.isEmpty()) { |
| // empty Roarings are 84 bytes |
| savedBytes += 88; |
| iter = roarings.erase(iter); |
| } else { |
| savedBytes += iter->second.shrinkToFit(); |
| iter++; |
| } |
| } |
| return savedBytes; |
| } |
| |
| /** |
| * Iterate over the bitmap elements. The function iterator is called once |
| * for all the values with ptr (can be nullptr) as the second parameter of each |
| * call. |
| * |
| * roaring_iterator is simply a pointer to a function that returns bool |
| * (true means that the iteration should continue while false means that it |
| * should stop), and takes (uint32_t,void*) as inputs. |
| */ |
| void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const { |
| for (const auto& map_entry : roarings) { |
| bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator, |
| uint64_t(map_entry.first) << 32, ptr); |
| if (!should_continue) { |
| break; |
| } |
| } |
| } |
| |
| /** |
| * write a bitmap to a char buffer. |
| * Returns how many bytes were written which should be getSizeInBytes(). |
| */ |
| size_t write(char* buf, int serialize_version) const { |
| bool is_v1 = serialize_version == 1; |
| BitmapTypeCode::type type_bitmap32 = |
| is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2; |
| BitmapTypeCode::type type_bitmap64 = |
| is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2; |
| |
| if (is32BitsEnough()) { |
| *(buf++) = type_bitmap32; |
| auto it = roarings.find(0); |
| if (it == roarings.end()) { // empty bitmap |
| roaring::Roaring r; |
| return r.write(buf, is_v1) + 1; |
| } |
| return it->second.write(buf, is_v1) + 1; |
| } |
| |
| const char* orig = buf; |
| // put type code |
| *(buf++) = type_bitmap64; |
| // push map size |
| buf = (char*)encode_varint64((uint8_t*)buf, roarings.size()); |
| std::for_each(roarings.cbegin(), roarings.cend(), |
| [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) { |
| // push map key |
| encode_fixed32_le((uint8_t*)buf, map_entry.first); |
| buf += sizeof(uint32_t); |
| // push map value Roaring |
| buf += map_entry.second.write(buf, is_v1); |
| }); |
| return buf - orig; |
| } |
| |
| /** |
| * read a bitmap from a serialized version. |
| * |
| * This function is unsafe in the sense that if you provide bad data, |
| * many bytes could be read, possibly causing a buffer overflow. See also readSafe. |
| */ |
| static Roaring64Map read(const char* buf) { |
| Roaring64Map result; |
| |
| bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf; |
| bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf; |
| bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf; |
| if (is_bitmap32) { |
| roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1); |
| result.emplaceOrInsert(0, std::move(read)); |
| return result; |
| } |
| |
| DCHECK(is_bitmap64); |
| buf++; |
| |
| // get map size (varint64 took 1~10 bytes) |
| uint64_t map_size; |
| buf = reinterpret_cast<const char*>( |
| decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf), |
| reinterpret_cast<const uint8_t*>(buf + 10), &map_size)); |
| DCHECK(buf != nullptr); |
| for (uint64_t lcv = 0; lcv < map_size; lcv++) { |
| // get map key |
| uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf)); |
| buf += sizeof(uint32_t); |
| // read map value Roaring |
| roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1); |
| // forward buffer past the last Roaring Bitmap |
| buf += read_var.getSizeInBytes(is_v1); |
| result.emplaceOrInsert(key, std::move(read_var)); |
| } |
| return result; |
| } |
| |
| /** |
| * How many bytes are required to serialize this bitmap |
| */ |
| size_t getSizeInBytes(int serialize_version) const { |
| bool is_v1 = serialize_version == 1; |
| if (is32BitsEnough()) { |
| auto it = roarings.find(0); |
| if (it == roarings.end()) { // empty bitmap |
| roaring::Roaring r; |
| return r.getSizeInBytes(is_v1) + 1; |
| } |
| return it->second.getSizeInBytes(is_v1) + 1; |
| } |
| // start with type code, map size and size of keys for each map entry |
| size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t); |
| return std::accumulate( |
| roarings.cbegin(), roarings.cend(), init, |
| [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) { |
| // add in bytes used by each Roaring |
| return previous + map_entry.second.getSizeInBytes(is_v1); |
| }); |
| } |
| |
| /** |
| * remove all elements |
| */ |
| void clear() { roarings.clear(); } |
| |
| /** |
| * Return whether all elements can be represented in 32 bits |
| */ |
| bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); } |
| |
| /** |
| * Computes the intersection between two bitmaps and returns new bitmap. |
| * The current bitmap and the provided bitmap are unchanged. |
| */ |
| Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; } |
| |
| /** |
| * Computes the difference between two bitmaps and returns new bitmap. |
| * The current bitmap and the provided bitmap are unchanged. |
| */ |
| Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; } |
| |
| /** |
| * Computes the union between two bitmaps and returns new bitmap. |
| * The current bitmap and the provided bitmap are unchanged. |
| */ |
| Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; } |
| |
| /** |
| * Computes the symmetric union between two bitmaps and returns new bitmap. |
| * The current bitmap and the provided bitmap are unchanged. |
| */ |
| Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; } |
| |
| /** |
| * computes the logical or (union) between "n" bitmaps (referenced by a |
| * pointer). |
| */ |
| static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) { |
| Roaring64Map ans; |
| // not particularly fast |
| for (size_t lcv = 0; lcv < n; ++lcv) { |
| ans |= *(inputs[lcv]); |
| } |
| return ans; |
| } |
| |
| friend class Roaring64MapSetBitForwardIterator; |
| using const_iterator = Roaring64MapSetBitForwardIterator; |
| |
| /** |
| * Returns an iterator that can be used to access the position of the |
| * set bits. The running time complexity of a full scan is proportional to |
| * the |
| * number |
| * of set bits: be aware that if you have long strings of 1s, this can be |
| * very inefficient. |
| * |
| * It can be much faster to use the toArray method if you want to |
| * retrieve the set bits. |
| */ |
| const_iterator begin() const; |
| |
| /** |
| * A bogus iterator that can be used together with begin() |
| * for constructions such as for(auto i = b.begin(); |
| * i!=b.end(); ++i) {} |
| */ |
| const_iterator end() const; |
| |
| private: |
| phmap::btree_map<uint32_t, roaring::Roaring> roarings {}; |
| bool copyOnWrite {false}; |
| static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } |
| static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } |
| static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) { |
| return (uint64_t(highBytes) << 32) | uint64_t(lowBytes); |
| } |
| void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) { |
| roarings.emplace(std::make_pair(key, value)); |
| } |
| |
| void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) { |
| roarings.emplace(key, value); |
| } |
| }; |
| |
| // Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh |
| // Used to go through the set bits. Not optimally fast, but convenient. |
| class Roaring64MapSetBitForwardIterator { |
| public: |
| using type_of_iterator = Roaring64MapSetBitForwardIterator; |
| |
| /** |
| * Provides the location of the set bit. |
| */ |
| uint64_t operator*() const { |
| return Roaring64Map::uniteBytes(map_iter->first, i.current_value); |
| } |
| |
| type_of_iterator& operator++() { // ++i, must returned inc. value |
| if (i.has_value) { |
| roaring_advance_uint32_iterator(&i); |
| } |
| while (!i.has_value) { |
| map_iter++; |
| if (map_iter == map_end) return *this; |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| } |
| return *this; |
| } |
| |
| type_of_iterator operator++(int) { // i++, must return orig. value |
| Roaring64MapSetBitForwardIterator orig(*this); |
| roaring_advance_uint32_iterator(&i); |
| while (!i.has_value) { |
| map_iter++; |
| if (map_iter == map_end) return orig; |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| } |
| return orig; |
| } |
| |
| bool move(const uint64_t& x) { |
| map_iter = p.lower_bound(Roaring64Map::highBytes(x)); |
| if (map_iter != p.cend()) { |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| if (map_iter->first == Roaring64Map::highBytes(x)) { |
| if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x))) |
| return true; |
| map_iter++; |
| if (map_iter == map_end) return false; |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| bool operator==(const Roaring64MapSetBitForwardIterator& o) const { |
| if (map_iter == map_end && o.map_iter == o.map_end) return true; |
| if (o.map_iter == o.map_end) return false; |
| return **this == *o; |
| } |
| |
| bool operator!=(const Roaring64MapSetBitForwardIterator& o) const { |
| if (map_iter == map_end && o.map_iter == o.map_end) return false; |
| if (o.map_iter == o.map_end) return true; |
| return **this != *o; |
| } |
| Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) { |
| map_iter = r.map_iter; |
| map_end = r.map_end; |
| i = r.i; |
| return *this; |
| } |
| |
| Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default; |
| |
| Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false) |
| : p(parent.roarings), map_end(parent.roarings.cend()) { |
| if (exhausted || parent.roarings.empty()) { |
| map_iter = parent.roarings.cend(); |
| } else { |
| map_iter = parent.roarings.cbegin(); |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| while (!i.has_value) { |
| map_iter++; |
| if (map_iter == map_end) return; |
| roaring_init_iterator(&map_iter->second.roaring, &i); |
| } |
| } |
| } |
| |
| protected: |
| const phmap::btree_map<uint32_t, roaring::Roaring>& p; |
| phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {}; |
| phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {}; |
| roaring::api::roaring_uint32_iterator_t i {}; |
| }; |
| |
| inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const { |
| return {*this}; |
| } |
| |
| inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const { |
| return {*this, true}; |
| } |
| |
| } // namespace detail |
| |
| // Represent the in-memory and on-disk structure of Doris's BITMAP data type. |
| // Optimize for the case where the bitmap contains 0 or 1 element which is common |
| // for streaming load scenario. |
| class BitmapValueIterator; |
| class BitmapValue { |
| public: |
| template <typename T> |
| using SetContainer = phmap::flat_hash_set<T>; |
| |
| // Construct an empty bitmap. |
| BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); } |
| |
| // Construct a bitmap with one element. |
| explicit BitmapValue(uint64_t value) |
| : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {} |
| |
| // Construct a bitmap from serialized data. |
| explicit BitmapValue(const char* src) : _is_shared(false) { |
| bool res = deserialize(src); |
| DCHECK(res); |
| } |
| |
| BitmapValue(const BitmapValue& other) { |
| _type = other._type; |
| switch (other._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| _sv = other._sv; |
| break; |
| case BITMAP: |
| _bitmap = other._bitmap; |
| break; |
| case SET: |
| _set = other._set; |
| break; |
| } |
| |
| if (other._type == BITMAP) { |
| _is_shared = true; |
| // should also set other's state to shared, so that other bitmap value will |
| // create a new bitmap when it wants to modify it. |
| const_cast<BitmapValue&>(other)._is_shared = true; |
| } |
| } |
| |
| BitmapValue(BitmapValue&& other) noexcept { |
| _type = other._type; |
| switch (other._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| _sv = other._sv; |
| break; |
| case BITMAP: |
| _bitmap = std::move(other._bitmap); |
| other._bitmap = nullptr; |
| break; |
| case SET: |
| _set = std::move(other._set); |
| break; |
| } |
| _is_shared = other._is_shared; |
| other._type = EMPTY; |
| other._is_shared = false; |
| } |
| |
| BitmapValue& operator=(const BitmapValue& other) { |
| if (this == &other) { |
| return *this; |
| } |
| reset(); |
| _type = other._type; |
| switch (other._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| _sv = other._sv; |
| break; |
| case BITMAP: |
| _bitmap = other._bitmap; |
| break; |
| case SET: |
| _set = other._set; |
| break; |
| } |
| |
| if (other._type == BITMAP) { |
| _is_shared = true; |
| // should also set other's state to shared, so that other bitmap value will |
| // create a new bitmap when it wants to modify it. |
| const_cast<BitmapValue&>(other)._is_shared = true; |
| } |
| return *this; |
| } |
| |
| static BitmapValue empty_bitmap() { return BitmapValue {}; } |
| |
| BitmapValue& operator=(BitmapValue&& other) noexcept { |
| if (this == &other) { |
| return *this; |
| } |
| reset(); |
| |
| _type = other._type; |
| switch (other._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| _sv = other._sv; |
| break; |
| case BITMAP: |
| _bitmap = std::move(other._bitmap); |
| other._bitmap = nullptr; |
| break; |
| case SET: |
| _set = std::move(other._set); |
| break; |
| } |
| _is_shared = other._is_shared; |
| return *this; |
| } |
| |
| // Construct a bitmap from given elements. |
| explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) { |
| if (bits.size() == 0) { |
| _type = EMPTY; |
| return; |
| } |
| |
| if (bits.size() == 1) { |
| _type = SINGLE; |
| _sv = bits[0]; |
| return; |
| } |
| |
| if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) { |
| _type = BITMAP; |
| _prepare_bitmap_for_write(); |
| _bitmap->addMany(bits.size(), &bits[0]); |
| } else { |
| _type = SET; |
| for (auto v : bits) { |
| _set.insert(v); |
| } |
| } |
| } |
| |
| BitmapTypeCode::type get_type_code() const { |
| switch (_type) { |
| case EMPTY: |
| return BitmapTypeCode::EMPTY; |
| case SINGLE: |
| if (_sv <= std::numeric_limits<uint32_t>::max()) { |
| return BitmapTypeCode::SINGLE32; |
| } else { |
| return BitmapTypeCode::SINGLE64; |
| } |
| case SET: |
| return BitmapTypeCode::SET; |
| case BITMAP: |
| bool is_v1 = (config::bitmap_serialize_version == 1); |
| if (_bitmap->is32BitsEnough()) { |
| return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2; |
| } else { |
| return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2; |
| } |
| } |
| __builtin_unreachable(); |
| } |
| |
| template <typename T> |
| void add_many(const T* values, const size_t count) { |
| switch (_type) { |
| case EMPTY: |
| if (count == 1) { |
| _sv = values[0]; |
| _type = SINGLE; |
| } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) { |
| for (size_t i = 0; i != count; ++i) { |
| _set.insert(values[i]); |
| } |
| _type = SET; |
| } else { |
| _prepare_bitmap_for_write(); |
| _bitmap->addMany(count, values); |
| _type = BITMAP; |
| } |
| break; |
| case SINGLE: |
| if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) { |
| _set.insert(_sv); |
| for (size_t i = 0; i != count; ++i) { |
| _set.insert(values[i]); |
| } |
| _type = SET; |
| _convert_to_bitmap_if_need(); |
| } else { |
| _prepare_bitmap_for_write(); |
| _bitmap->add(_sv); |
| _bitmap->addMany(count, values); |
| _type = BITMAP; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| _bitmap->addMany(count, values); |
| break; |
| case SET: |
| for (size_t i = 0; i != count; ++i) { |
| _set.insert(values[i]); |
| } |
| _convert_to_bitmap_if_need(); |
| break; |
| } |
| } |
| |
| void add(uint64_t value) { |
| switch (_type) { |
| case EMPTY: |
| if (!config::enable_set_in_bitmap_value) { |
| _sv = value; |
| _type = SINGLE; |
| } else { |
| _set.insert(value); |
| _type = SET; |
| } |
| break; |
| case SINGLE: |
| //there is no need to convert the type if two variables are equal |
| if (_sv == value) { |
| break; |
| } |
| if (config::enable_set_in_bitmap_value) { |
| _set.insert(_sv); |
| _set.insert(value); |
| _type = SET; |
| } else { |
| _prepare_bitmap_for_write(); |
| _bitmap->add(_sv); |
| _bitmap->add(value); |
| _type = BITMAP; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| _bitmap->add(value); |
| break; |
| case SET: |
| _set.insert(value); |
| _convert_to_bitmap_if_need(); |
| break; |
| } |
| } |
| |
| void remove(uint64_t value) { |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| //there is need to convert the type if two variables are equal |
| if (_sv == value) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| _bitmap->remove(value); |
| _convert_to_smaller_type(); |
| break; |
| case SET: |
| _set.erase(value); |
| _convert_to_smaller_type(); |
| break; |
| } |
| } |
| |
| // Compute the union between the current bitmap and the provided bitmap. |
| BitmapValue& operator-=(const BitmapValue& rhs) { |
| switch (rhs._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| remove(rhs._sv); |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| if (rhs._bitmap->contains(_sv)) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| *_bitmap -= *rhs._bitmap; |
| _convert_to_smaller_type(); |
| break; |
| case SET: { |
| for (auto it = _set.begin(); it != _set.end();) { |
| if (rhs.contains(*it)) { |
| it = _set.erase(it); |
| } else { |
| ++it; |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| } |
| } |
| break; |
| case SET: { |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| if (rhs._set.contains(_sv)) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| for (auto v : rhs._set) { |
| if (_bitmap->contains(v)) { |
| _bitmap->remove(v); |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| case SET: { |
| for (auto it = _set.begin(); it != _set.end();) { |
| if (rhs.contains(*it)) { |
| it = _set.erase(it); |
| } else { |
| ++it; |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| } |
| } |
| } |
| } |
| return *this; |
| } |
| |
| // Compute the union between the current bitmap and the provided bitmap. |
| // Possible type transitions are: |
| // EMPTY -> SINGLE |
| // EMPTY -> BITMAP |
| // SINGLE -> BITMAP |
| BitmapValue& operator|=(const BitmapValue& rhs) { |
| switch (rhs._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| add(rhs._sv); |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| _bitmap = rhs._bitmap; |
| const_cast<BitmapValue&>(rhs)._is_shared = true; |
| _is_shared = true; |
| _type = BITMAP; |
| break; |
| case SINGLE: |
| _bitmap = rhs._bitmap; |
| const_cast<BitmapValue&>(rhs)._is_shared = true; |
| _is_shared = true; |
| _prepare_bitmap_for_write(); |
| _bitmap->add(_sv); |
| _type = BITMAP; |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| *_bitmap |= *rhs._bitmap; |
| break; |
| case SET: { |
| _prepare_bitmap_for_write(); |
| *_bitmap = *rhs._bitmap; |
| |
| for (auto v : _set) { |
| _bitmap->add(v); |
| } |
| _type = BITMAP; |
| break; |
| } |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| _set = rhs._set; |
| _type = SET; |
| break; |
| case SINGLE: { |
| if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) { |
| _prepare_bitmap_for_write(); |
| _bitmap->add(_sv); |
| for (auto v : rhs._set) { |
| _bitmap->add(v); |
| } |
| _type = BITMAP; |
| } else { |
| _set = rhs._set; |
| _set.insert(_sv); |
| _type = SET; |
| } |
| break; |
| } |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| for (auto v : rhs._set) { |
| _bitmap->add(v); |
| } |
| break; |
| case SET: { |
| for (auto v : rhs._set) { |
| _set.insert(v); |
| } |
| _convert_to_bitmap_if_need(); |
| break; |
| } |
| } |
| break; |
| } |
| return *this; |
| } |
| |
| BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) { |
| std::vector<const detail::Roaring64Map*> bitmaps; |
| std::vector<uint64_t> single_values; |
| std::vector<const SetContainer<uint64_t>*> sets; |
| for (const auto* value : values) { |
| switch (value->_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| single_values.push_back(value->_sv); |
| break; |
| case BITMAP: |
| bitmaps.push_back(value->_bitmap.get()); |
| break; |
| case SET: |
| sets.push_back(&value->_set); |
| break; |
| } |
| } |
| |
| if (!bitmaps.empty()) { |
| _prepare_bitmap_for_write(); |
| switch (_type) { |
| case EMPTY: |
| *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); |
| break; |
| case SINGLE: |
| *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); |
| _bitmap->add(_sv); |
| break; |
| case BITMAP: |
| for (const auto* bitmap : bitmaps) { |
| *_bitmap |= *bitmap; |
| } |
| break; |
| case SET: { |
| *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); |
| for (auto v : _set) { |
| _bitmap->add(v); |
| } |
| _set.clear(); |
| break; |
| } |
| } |
| _type = BITMAP; |
| } |
| |
| if (!sets.empty()) { |
| for (auto& set : sets) { |
| for (auto v : *set) { |
| _set.insert(v); |
| } |
| } |
| switch (_type) { |
| case EMPTY: |
| _type = SET; |
| break; |
| case SINGLE: { |
| _set.insert(_sv); |
| _type = SET; |
| break; |
| } |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| for (auto v : _set) { |
| _bitmap->add(v); |
| } |
| _type = BITMAP; |
| _set.clear(); |
| break; |
| case SET: { |
| break; |
| } |
| } |
| if (_type == SET) { |
| _convert_to_bitmap_if_need(); |
| } |
| } |
| |
| if (_type == EMPTY && single_values.size() == 1) { |
| if (config::enable_set_in_bitmap_value) { |
| _type = SET; |
| _set.insert(single_values[0]); |
| } else { |
| _sv = single_values[0]; |
| _type = SINGLE; |
| } |
| } else if (!single_values.empty()) { |
| switch (_type) { |
| case EMPTY: |
| case SINGLE: |
| if (config::enable_set_in_bitmap_value) { |
| _set.insert(single_values.cbegin(), single_values.cend()); |
| if (_type == SINGLE) { |
| _set.insert(_sv); |
| } |
| _type = SET; |
| _convert_to_bitmap_if_need(); |
| } else { |
| _prepare_bitmap_for_write(); |
| _bitmap->addMany(single_values.size(), single_values.data()); |
| if (_type == SINGLE) { |
| _bitmap->add(_sv); |
| } |
| _type = BITMAP; |
| _convert_to_smaller_type(); |
| } |
| break; |
| case BITMAP: { |
| _prepare_bitmap_for_write(); |
| _bitmap->addMany(single_values.size(), single_values.data()); |
| break; |
| } |
| case SET: |
| _set.insert(single_values.cbegin(), single_values.cend()); |
| _convert_to_bitmap_if_need(); |
| break; |
| } |
| } |
| |
| return *this; |
| } |
| |
| // Compute the intersection between the current bitmap and the provided bitmap. |
| // Possible type transitions are: |
| // SINGLE -> EMPTY |
| // BITMAP -> EMPTY |
| // BITMAP -> SINGLE |
| BitmapValue& operator&=(const BitmapValue& rhs) { |
| switch (rhs._type) { |
| case EMPTY: |
| reset(); // empty & any = empty |
| break; |
| case SINGLE: |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| if (_sv != rhs._sv) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| if (!_bitmap->contains(rhs._sv)) { |
| _type = EMPTY; |
| } else { |
| _type = SINGLE; |
| _sv = rhs._sv; |
| } |
| _bitmap.reset(); |
| _is_shared = false; |
| break; |
| case SET: |
| if (!_set.contains(rhs._sv)) { |
| _type = EMPTY; |
| } else { |
| _type = SINGLE; |
| _sv = rhs._sv; |
| } |
| _set.clear(); |
| break; |
| } |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| if (!rhs._bitmap->contains(_sv)) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| *_bitmap &= *rhs._bitmap; |
| _convert_to_smaller_type(); |
| break; |
| case SET: |
| for (auto it = _set.begin(); it != _set.end();) { |
| if (!rhs._bitmap->contains(*it)) { |
| it = _set.erase(it); |
| } else { |
| ++it; |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| if (!rhs._set.contains(_sv)) { |
| _type = EMPTY; |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| for (auto v : rhs._set) { |
| if (_bitmap->contains(v)) { |
| _set.insert(v); |
| } |
| } |
| _type = SET; |
| _bitmap.reset(); |
| _is_shared = false; |
| _convert_to_smaller_type(); |
| break; |
| case SET: |
| for (auto it = _set.begin(); it != _set.end();) { |
| if (!rhs._set.contains(*it)) { |
| it = _set.erase(it); |
| } else { |
| ++it; |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| } |
| break; |
| } |
| return *this; |
| } |
| |
| // Compute the symmetric union between the current bitmap and the provided bitmap. |
| // Possible type transitions are: |
| // SINGLE -> EMPTY |
| // BITMAP -> EMPTY |
| // BITMAP -> SINGLE |
| BitmapValue& operator^=(const BitmapValue& rhs) { |
| switch (rhs._type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| switch (_type) { |
| case EMPTY: |
| add(rhs._sv); |
| break; |
| case SINGLE: |
| if (_sv == rhs._sv) { |
| _type = EMPTY; |
| } else { |
| add(rhs._sv); |
| } |
| break; |
| case BITMAP: |
| if (!_bitmap->contains(rhs._sv)) { |
| add(rhs._sv); |
| } else { |
| _prepare_bitmap_for_write(); |
| _bitmap->remove(rhs._sv); |
| } |
| break; |
| case SET: |
| if (!_set.contains(rhs._sv)) { |
| _set.insert(rhs._sv); |
| } else { |
| _set.erase(rhs._sv); |
| } |
| break; |
| } |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| _bitmap = rhs._bitmap; |
| const_cast<BitmapValue&>(rhs)._is_shared = true; |
| _is_shared = true; |
| _type = BITMAP; |
| break; |
| case SINGLE: |
| _bitmap = rhs._bitmap; |
| const_cast<BitmapValue&>(rhs)._is_shared = true; |
| _is_shared = true; |
| _type = BITMAP; |
| _prepare_bitmap_for_write(); |
| if (!rhs._bitmap->contains(_sv)) { |
| _bitmap->add(_sv); |
| } else { |
| _bitmap->remove(_sv); |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| *_bitmap ^= *rhs._bitmap; |
| _convert_to_smaller_type(); |
| break; |
| case SET: |
| _prepare_bitmap_for_write(); |
| *_bitmap = *rhs._bitmap; |
| for (auto v : _set) { |
| if (_bitmap->contains(v)) { |
| _bitmap->remove(v); |
| } else { |
| _bitmap->add(v); |
| } |
| } |
| _type = BITMAP; |
| _convert_to_smaller_type(); |
| break; |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| _set = rhs._set; |
| _type = SET; |
| break; |
| case SINGLE: |
| _set = rhs._set; |
| if (!rhs._set.contains(_sv)) { |
| _set.insert(_sv); |
| } else { |
| _set.erase(_sv); |
| } |
| _type = SET; |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| for (auto v : rhs._set) { |
| if (_bitmap->contains(v)) { |
| _bitmap->remove(v); |
| } else { |
| _bitmap->add(v); |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| case SET: |
| for (auto v : rhs._set) { |
| if (_set.contains(v)) { |
| _set.erase(v); |
| } else { |
| _set.insert(v); |
| } |
| } |
| _convert_to_smaller_type(); |
| break; |
| } |
| break; |
| } |
| return *this; |
| } |
| |
| // check if value x is present |
| bool contains(uint64_t x) const { |
| switch (_type) { |
| case EMPTY: |
| return false; |
| case SINGLE: |
| return _sv == x; |
| case BITMAP: |
| return _bitmap->contains(x); |
| case SET: |
| return _set.contains(x); |
| } |
| return false; |
| } |
| |
| // true if contains a value that belongs to the range [left, right]. |
| bool contains_any(uint64_t left, uint64_t right) const; |
| |
| uint64_t cardinality() const { |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return 1; |
| case BITMAP: |
| return _bitmap->cardinality(); |
| case SET: |
| return _set.size(); |
| } |
| return 0; |
| } |
| |
| uint64_t and_cardinality(const BitmapValue& rhs) const { |
| switch (rhs._type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return _sv == rhs._sv; |
| case BITMAP: |
| return _bitmap->contains(rhs._sv); |
| case SET: |
| return _set.contains(rhs._sv); |
| } |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return rhs._bitmap->contains(_sv); |
| case BITMAP: |
| return _bitmap->andCardinality(*rhs._bitmap); |
| case SET: { |
| uint64_t cardinality = 0; |
| for (auto v : _set) { |
| if (rhs._bitmap->contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return rhs._set.contains(_sv); |
| case BITMAP: { |
| uint64_t cardinality = 0; |
| for (auto v : rhs._set) { |
| if (_bitmap->contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| case SET: { |
| uint64_t cardinality = 0; |
| for (auto v : _set) { |
| if (rhs._set.contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| uint64_t or_cardinality(const BitmapValue& rhs) const { |
| switch (rhs._type) { |
| case EMPTY: |
| return cardinality(); |
| case SINGLE: |
| switch (_type) { |
| case EMPTY: |
| return 1; |
| case SINGLE: |
| return 1 + (_sv != rhs._sv); |
| case BITMAP: |
| return cardinality() + !_bitmap->contains(rhs._sv); |
| case SET: |
| return _set.size() + !_set.contains(rhs._sv); |
| } |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| return rhs.cardinality(); |
| case SINGLE: |
| return rhs.cardinality() + !rhs._bitmap->contains(_sv); |
| case BITMAP: |
| return _bitmap->orCardinality(*rhs._bitmap); |
| case SET: { |
| uint64_t cardinality = rhs._bitmap->cardinality(); |
| for (auto v : _set) { |
| if (!rhs._bitmap->contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| return rhs.cardinality(); |
| case SINGLE: |
| return rhs.cardinality() + !rhs._set.contains(_sv); |
| case BITMAP: { |
| uint64_t cardinality = _bitmap->cardinality(); |
| for (auto v : rhs._set) { |
| if (!_bitmap->contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| case SET: { |
| uint64_t cardinality = _set.size(); |
| for (auto v : _set) { |
| if (!rhs._set.contains(v)) { |
| ++cardinality; |
| } |
| } |
| return cardinality; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| uint64_t andnot_cardinality(const BitmapValue& rhs) const { |
| switch (rhs._type) { |
| case EMPTY: |
| return cardinality(); |
| case SINGLE: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return 1 - (_sv == rhs._sv); |
| case BITMAP: |
| return cardinality() - _bitmap->contains(rhs._sv); |
| case SET: |
| return cardinality() - _set.contains(rhs._sv); |
| } |
| break; |
| case BITMAP: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return !rhs._bitmap->contains(_sv); |
| case BITMAP: |
| return _bitmap->andnotCardinality(*rhs._bitmap); |
| case SET: { |
| uint64_t cardinality = _set.size(); |
| for (auto v : _set) { |
| if (rhs._bitmap->contains(v)) { |
| cardinality -= 1; |
| } |
| } |
| return cardinality; |
| } |
| } |
| break; |
| case SET: |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: |
| return !rhs._set.contains(_sv); |
| case BITMAP: { |
| uint64_t cardinality = _bitmap->cardinality(); |
| for (auto v : rhs._set) { |
| if (_bitmap->contains(v)) { |
| cardinality -= 1; |
| } |
| } |
| return cardinality; |
| } |
| case SET: { |
| uint64_t cardinality = _set.size(); |
| for (auto v : rhs._set) { |
| if (_set.contains(v)) { |
| cardinality -= 1; |
| } |
| } |
| return cardinality; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| // Return how many bytes are required to serialize this bitmap. |
| // See BitmapTypeCode for the serialized format. |
| size_t getSizeInBytes() { |
| size_t res = 0; |
| switch (_type) { |
| case EMPTY: |
| res = 1; |
| break; |
| case SINGLE: |
| if (_sv <= std::numeric_limits<uint32_t>::max()) { |
| res = 1 + sizeof(uint32_t); |
| } else { |
| res = 1 + sizeof(uint64_t); |
| } |
| break; |
| case BITMAP: |
| _prepare_bitmap_for_write(); |
| _bitmap->runOptimize(); |
| _bitmap->shrinkToFit(); |
| res = _bitmap->getSizeInBytes(config::bitmap_serialize_version); |
| break; |
| case SET: |
| /// 1 byte for type, 1 byte for count |
| res = 2 + sizeof(uint64_t) * _set.size(); |
| break; |
| } |
| return res; |
| } |
| |
| // Serialize the bitmap value to dst, which should be large enough. |
| // Client should call `getSizeInBytes` first to get the serialized size. |
| void write_to(char* dst) const { |
| switch (_type) { |
| case EMPTY: |
| *dst = BitmapTypeCode::EMPTY; |
| break; |
| case SINGLE: |
| if (_sv <= std::numeric_limits<uint32_t>::max()) { |
| *(dst++) = BitmapTypeCode::SINGLE32; |
| encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv)); |
| } else { |
| *(dst++) = BitmapTypeCode::SINGLE64; |
| encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv); |
| } |
| break; |
| case SET: |
| DCHECK(config::enable_set_in_bitmap_value); |
| *dst = BitmapTypeCode::SET; |
| ++dst; |
| *dst = static_cast<uint8_t>(_set.size()); |
| ++dst; |
| for (auto v : _set) { |
| encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v); |
| dst += sizeof(uint64_t); |
| } |
| break; |
| case BITMAP: |
| _bitmap->write(dst, config::bitmap_serialize_version); |
| break; |
| } |
| } |
| |
| // Deserialize a bitmap value from `src`. |
| // Return false if `src` begins with unknown type code, true otherwise. |
| bool deserialize(const char* src) { |
| switch (*src) { |
| case BitmapTypeCode::EMPTY: |
| _type = EMPTY; |
| break; |
| case BitmapTypeCode::SINGLE32: |
| _type = SINGLE; |
| _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1)); |
| if (config::enable_set_in_bitmap_value) { |
| _type = SET; |
| _set.insert(_sv); |
| } |
| break; |
| case BitmapTypeCode::SINGLE64: |
| _type = SINGLE; |
| _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1)); |
| if (config::enable_set_in_bitmap_value) { |
| _type = SET; |
| _set.insert(_sv); |
| } |
| break; |
| case BitmapTypeCode::BITMAP32: |
| case BitmapTypeCode::BITMAP64: |
| case BitmapTypeCode::BITMAP32_V2: |
| case BitmapTypeCode::BITMAP64_V2: |
| _type = BITMAP; |
| _is_shared = false; |
| _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src)); |
| break; |
| case BitmapTypeCode::SET: { |
| _type = SET; |
| ++src; |
| uint8_t count = *src; |
| ++src; |
| CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count"; |
| for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) { |
| _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src))); |
| } |
| CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count"; |
| |
| if (!config::enable_set_in_bitmap_value) { |
| _prepare_bitmap_for_write(); |
| for (auto v : _set) { |
| _bitmap->add(v); |
| } |
| _type = BITMAP; |
| _set.clear(); |
| } |
| break; |
| } |
| case BitmapTypeCode::SET_V2: { |
| uint32_t size = 0; |
| memcpy(&size, src + 1, sizeof(uint32_t)); |
| src += sizeof(uint32_t) + 1; |
| |
| if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) { |
| _type = BITMAP; |
| _prepare_bitmap_for_write(); |
| |
| for (int i = 0; i < size; ++i) { |
| uint64_t key {}; |
| memcpy(&key, src, sizeof(uint64_t)); |
| _bitmap->add(key); |
| src += sizeof(uint64_t); |
| } |
| } else { |
| _type = SET; |
| _set.reserve(size); |
| |
| for (int i = 0; i < size; ++i) { |
| uint64_t key {}; |
| memcpy(&key, src, sizeof(uint64_t)); |
| _set.insert(key); |
| src += sizeof(uint64_t); |
| } |
| } |
| break; |
| } |
| default: |
| LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY |
| << " and " << BitmapTypeCode::BITMAP64 << " actual is " |
| << static_cast<int>(*src); |
| return false; |
| } |
| return true; |
| } |
| |
| int64_t minimum() const { |
| switch (_type) { |
| case SINGLE: |
| return _sv; |
| case BITMAP: |
| return _bitmap->minimum(); |
| case SET: |
| return _min_in_set(); |
| default: |
| return 0; |
| } |
| } |
| |
| // TODO limit string size to avoid OOM |
| std::string to_string() const { |
| std::stringstream ss; |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: |
| ss << _sv; |
| break; |
| case BITMAP: { |
| struct IterCtx { |
| std::stringstream* ss = nullptr; |
| bool first = true; |
| } iter_ctx; |
| iter_ctx.ss = &ss; |
| |
| _bitmap->iterate( |
| [](uint64_t value, void* c) -> bool { |
| auto ctx = reinterpret_cast<IterCtx*>(c); |
| if (ctx->first) { |
| ctx->first = false; |
| } else { |
| (*ctx->ss) << ","; |
| } |
| (*ctx->ss) << value; |
| return true; |
| }, |
| &iter_ctx); |
| break; |
| } |
| case SET: { |
| struct IterCtx { |
| std::stringstream* ss = nullptr; |
| bool first = true; |
| } iter_ctx; |
| iter_ctx.ss = &ss; |
| |
| std::vector<uint64_t> values(_set.begin(), _set.end()); |
| std::sort(values.begin(), values.end()); |
| |
| for (auto v : values) { |
| if (iter_ctx.first) { |
| iter_ctx.first = false; |
| } else { |
| (*iter_ctx.ss) << ","; |
| } |
| (*iter_ctx.ss) << v; |
| } |
| break; |
| } |
| } |
| return ss.str(); |
| } |
| |
| int64_t maximum() const { |
| switch (_type) { |
| case SINGLE: |
| return _sv; |
| case BITMAP: |
| return _bitmap->maximum(); |
| case SET: |
| return _max_in_set(); |
| default: |
| return 0; |
| } |
| } |
| |
| uint64_t max(bool* empty) const { |
| return min_or_max(empty, [&]() { return maximum(); }); |
| } |
| |
| uint64_t min(bool* empty) const { |
| return min_or_max(empty, [&]() { return minimum(); }); |
| } |
| |
| uint64_t _min_in_set() const { |
| DCHECK_EQ(_type, SET); |
| return *std::min_element(_set.begin(), _set.end()); |
| } |
| |
| uint64_t _max_in_set() const { |
| DCHECK_EQ(_type, SET); |
| return *std::max_element(_set.begin(), _set.end()); |
| } |
| |
| bool empty() const { return _type == EMPTY; } |
| |
| /** |
| * Return new set with specified range (not include the range_end) |
| */ |
| int64_t sub_range(const int64_t& range_start, const int64_t& range_end, |
| BitmapValue* ret_bitmap) { |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: { |
| //only single value, so _sv must in [range_start,range_end) |
| if (range_start <= _sv && _sv < range_end) { |
| ret_bitmap->add(_sv); |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| case BITMAP: { |
| int64_t count = 0; |
| for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { |
| if (*it < range_start) { |
| continue; |
| } |
| if (*it < range_end) { |
| ret_bitmap->add(*it); |
| ++count; |
| } else { |
| break; |
| } |
| } |
| return count; |
| } |
| case SET: { |
| int64_t count = 0; |
| std::vector<uint64_t> values(_set.begin(), _set.end()); |
| std::sort(values.begin(), values.end()); |
| for (auto it = values.begin(); it != values.end(); ++it) { |
| if (*it < range_start || *it >= range_end) { |
| continue; |
| } |
| ret_bitmap->add(*it); |
| ++count; |
| } |
| return count; |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * Return new set with specified start and limit |
| * @param range_start the start value for the range |
| * @param cardinality_limit the length of the subset |
| * @return the real count for subset, maybe less than cardinality_limit |
| */ |
| int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit, |
| BitmapValue* ret_bitmap) { |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: { |
| //only single value, so range_start must less than _sv |
| if (range_start > _sv) { |
| return 0; |
| } else { |
| ret_bitmap->add(_sv); |
| return 1; |
| } |
| } |
| case BITMAP: { |
| int64_t count = 0; |
| for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { |
| if (*it < range_start) { |
| continue; |
| } |
| if (count < cardinality_limit) { |
| ret_bitmap->add(*it); |
| ++count; |
| } else { |
| break; |
| } |
| } |
| return count; |
| } |
| case SET: { |
| int64_t count = 0; |
| |
| std::vector<uint64_t> values(_set.begin(), _set.end()); |
| std::sort(values.begin(), values.end()); |
| for (auto it = values.begin(); it != values.end(); ++it) { |
| if (*it < range_start) { |
| continue; |
| } |
| if (count < cardinality_limit) { |
| ret_bitmap->add(*it); |
| ++count; |
| } else { |
| break; |
| } |
| } |
| return count; |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * Returns the bitmap elements, starting from the offset position. |
| * The number of returned elements is limited by the cardinality_limit parameter. |
| * Analog of the substring string function, but for bitmap. |
| */ |
| int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) { |
| switch (_type) { |
| case EMPTY: |
| return 0; |
| case SINGLE: { |
| //only single value, so offset must start 0 |
| if (offset == 0) { |
| ret_bitmap->add(_sv); |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| default: |
| break; |
| } |
| if (_type == BITMAP) { |
| if (std::abs(offset) >= _bitmap->cardinality()) { |
| return 0; |
| } |
| int64_t abs_offset = offset; |
| if (offset < 0) { |
| abs_offset = _bitmap->cardinality() + offset; |
| } |
| |
| int64_t count = 0; |
| int64_t offset_count = 0; |
| auto it = _bitmap->begin(); |
| for (; it != _bitmap->end() && offset_count < abs_offset; ++it) { |
| ++offset_count; |
| } |
| for (; it != _bitmap->end() && count < limit; ++it, ++count) { |
| ret_bitmap->add(*it); |
| } |
| return count; |
| } else { |
| if (std::abs(offset) > _set.size()) { |
| return 0; |
| } |
| |
| int64_t abs_offset = offset; |
| if (offset < 0) { |
| abs_offset = _set.size() + offset; |
| } |
| |
| std::vector<uint64_t> values(_set.begin(), _set.end()); |
| std::sort(values.begin(), values.end()); |
| |
| int64_t count = 0; |
| size_t index = 0; |
| for (auto v : values) { |
| if (index < abs_offset) { |
| ++index; |
| continue; |
| } |
| if (count == limit || index == values.size()) { |
| break; |
| } |
| ++count; |
| ++index; |
| ret_bitmap->add(v); |
| } |
| return count; |
| } |
| } |
| |
| //for function bitmap_to_array |
| void to_array(vectorized::PaddedPODArray<int64_t>& data) const { |
| switch (_type) { |
| case EMPTY: |
| break; |
| case SINGLE: { |
| data.emplace_back(_sv); |
| break; |
| } |
| case BITMAP: { |
| for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { |
| data.emplace_back(*it); |
| } |
| break; |
| } |
| case SET: { |
| std::vector<uint64_t> values(_set.begin(), _set.end()); |
| std::sort(values.begin(), values.end()); |
| for (auto v : values) { |
| data.emplace_back(v); |
| } |
| break; |
| } |
| } |
| } |
| |
| void reset() { |
| _type = EMPTY; |
| _sv = 0; |
| _set.clear(); |
| _is_shared = false; |
| _bitmap = nullptr; |
| } |
| // Implement an iterator for convenience |
| friend class BitmapValueIterator; |
| typedef BitmapValueIterator b_iterator; |
| |
| b_iterator begin() const; |
| b_iterator end() const; |
| b_iterator lower_bound(uint64_t val) const; |
| |
| private: |
| void _convert_to_smaller_type() { |
| if (_type == BITMAP) { |
| uint64_t c = _bitmap->cardinality(); |
| if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) { |
| return; |
| } else if (c > 1) { |
| return; |
| } |
| if (c == 0) { |
| _type = EMPTY; |
| } else if (c == 1 && !config::enable_set_in_bitmap_value) { |
| _type = SINGLE; |
| _sv = _bitmap->minimum(); |
| } else { |
| _type = SET; |
| for (auto v : *_bitmap) { |
| _set.insert(v); |
| } |
| } |
| _bitmap.reset(); |
| } else if (_type == SET) { |
| if (_set.size() == 1 && !config::enable_set_in_bitmap_value) { |
| _type = SINGLE; |
| _sv = *_set.begin(); |
| _set.clear(); |
| } |
| } |
| } |
| |
| uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const { |
| bool is_empty = false; |
| uint64_t result = 0; |
| switch (_type) { |
| case SINGLE: |
| result = _sv; |
| break; |
| case BITMAP: |
| case SET: |
| result = func(); |
| break; |
| default: |
| is_empty = true; |
| } |
| if (empty) { |
| *empty = is_empty; |
| } |
| return result; |
| } |
| |
| void _prepare_bitmap_for_write() { |
| if (!_bitmap) { |
| _bitmap = std::make_shared<detail::Roaring64Map>(); |
| _is_shared = false; |
| return; |
| } |
| |
| if (!_is_shared) { |
| // the state is not shared, not need to check use count any more |
| return; |
| } |
| |
| if (_bitmap.use_count() > 1) { |
| auto new_one = std::make_shared<detail::Roaring64Map>(); |
| *new_one = *_bitmap; |
| _bitmap = new_one; |
| } |
| _is_shared = false; |
| } |
| |
| void _convert_to_bitmap_if_need() { |
| if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) { |
| return; |
| } |
| _prepare_bitmap_for_write(); |
| for (auto v : _set) { |
| _bitmap->add(v); |
| } |
| _type = BITMAP; |
| _set.clear(); |
| } |
| |
| enum BitmapDataType { |
| EMPTY = 0, |
| SINGLE = 1, // single element |
| BITMAP = 2, // more than one elements |
| SET = 3 // elements count less or equal than 32 |
| }; |
| uint64_t _sv = 0; // store the single value when _type == SINGLE |
| std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP |
| SetContainer<uint64_t> _set; |
| BitmapDataType _type {EMPTY}; |
| // Indicate whether the state is shared among multi BitmapValue object |
| bool _is_shared = true; |
| static constexpr uint64_t SET_TYPE_THRESHOLD = 32; |
| }; |
| |
| // A simple implement of bitmap value iterator(Read only) |
| // Usage: |
| // BitmapValueIterator iter = bitmap_value.begin(); |
| // BitmapValueIterator end = bitmap_value.end(); |
| // for (; iter != end(); ++iter) { |
| // uint64_t v = *iter; |
| // ... do something with "v" ... |
| // } |
| class BitmapValueIterator { |
| public: |
| BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) { |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::EMPTY: |
| _end = true; |
| break; |
| case BitmapValue::BitmapDataType::SINGLE: |
| _sv = _bitmap._sv; |
| break; |
| case BitmapValue::BitmapDataType::BITMAP: |
| _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end); |
| break; |
| case BitmapValue::BitmapDataType::SET: |
| _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin(); |
| break; |
| default: |
| CHECK(false) << _bitmap._type; |
| } |
| } |
| |
| BitmapValueIterator(const BitmapValueIterator& other) |
| : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) { |
| _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr; |
| if (_bitmap._type == BitmapValue::BitmapDataType::SET) { |
| _set_iter = other._set_iter; |
| } |
| } |
| |
| ~BitmapValueIterator() { |
| if (_iter != nullptr) { |
| delete _iter; |
| _iter = nullptr; |
| } |
| } |
| |
| uint64_t operator*() const { |
| CHECK(!_end) << "should not get value of end iterator"; |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::SINGLE: |
| return _sv; |
| case BitmapValue::BitmapDataType::BITMAP: |
| return *(*_iter); |
| case BitmapValue::BitmapDataType::SET: { |
| return *_set_iter; |
| } |
| default: |
| CHECK(false) << _bitmap._type; |
| } |
| return 0; |
| } |
| |
| BitmapValueIterator& operator++() { // ++i, must returned inc. value |
| CHECK(!_end) << "should not forward when iterator ends"; |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::SINGLE: |
| _end = true; |
| break; |
| case BitmapValue::BitmapDataType::BITMAP: |
| ++(*_iter); |
| break; |
| case BitmapValue::BitmapDataType::SET: |
| ++_set_iter; |
| break; |
| default: |
| CHECK(false) << _bitmap._type; |
| } |
| return *this; |
| } |
| |
| BitmapValueIterator operator++(int) { // i++, must return orig. value |
| CHECK(!_end) << "should not forward when iterator ends"; |
| BitmapValueIterator orig(*this); |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::SINGLE: |
| _end = true; |
| break; |
| case BitmapValue::BitmapDataType::BITMAP: |
| ++(*_iter); |
| break; |
| case BitmapValue::BitmapDataType::SET: |
| ++_set_iter; |
| break; |
| default: |
| CHECK(false) << _bitmap._type; |
| } |
| return orig; |
| } |
| |
| bool operator==(const BitmapValueIterator& other) const { |
| if (_end && other._end) { |
| return true; |
| } |
| |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::EMPTY: |
| return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY; |
| case BitmapValue::BitmapDataType::SINGLE: |
| return _end == other._end && _sv == other._sv; |
| case BitmapValue::BitmapDataType::BITMAP: |
| return *_iter == *(other._iter); |
| case BitmapValue::BitmapDataType::SET: |
| return _set_iter == other._set_iter; |
| default: |
| CHECK(false) << _bitmap._type; |
| } |
| return false; |
| } |
| |
| bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); } |
| |
| /** |
| * Move the iterator to the first value >= `val`. |
| */ |
| BitmapValueIterator& move(uint64_t val) { |
| switch (_bitmap._type) { |
| case BitmapValue::BitmapDataType::SINGLE: |
| if (_sv < val) { |
| _end = true; |
| } |
| break; |
| case BitmapValue::BitmapDataType::BITMAP: |
| if (!_iter->move(val)) { |
| _end = true; |
| } |
| break; |
| case BitmapValue::BitmapDataType::SET: { |
| throw Exception(Status::FatalError("BitmapValue with set do not support move")); |
| } |
| default: |
| break; |
| } |
| return *this; |
| } |
| |
| private: |
| const BitmapValue& _bitmap; |
| detail::Roaring64MapSetBitForwardIterator* _iter = nullptr; |
| BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter; |
| uint64_t _sv = 0; |
| bool _end = false; |
| }; |
| |
| inline BitmapValueIterator BitmapValue::begin() const { |
| return {*this}; |
| } |
| |
| inline BitmapValueIterator BitmapValue::end() const { |
| return {*this, true}; |
| } |
| |
| inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const { |
| return BitmapValueIterator(*this).move(val); |
| } |
| |
| inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const { |
| if (left > right) { |
| return false; |
| } |
| |
| if (_type == SET) { |
| for (auto v : _set) { |
| if (v >= left && v <= right) { |
| return true; |
| } |
| } |
| return false; |
| } |
| auto it = lower_bound(left); |
| return it != end() && *it <= right; |
| } |
| |
| } // namespace doris |