| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <limits> |
| #include <memory> |
| |
| #include "arrow/buffer.h" |
| #include "arrow/status.h" |
| #include "arrow/util/bit_util.h" |
| #include "arrow/util/endian.h" |
| #include "arrow/util/macros.h" |
| #include "arrow/util/ubsan.h" |
| #include "arrow/util/visibility.h" |
| |
| namespace arrow { |
| namespace internal { |
| namespace detail { |
| |
| inline uint64_t LoadWord(const uint8_t* bytes) { |
| return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes)); |
| } |
| |
| inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) { |
| if (shift == 0) { |
| return current; |
| } |
| return (current >> shift) | (next << (64 - shift)); |
| } |
| |
| // These templates are here to help with unit tests |
| |
| template <typename T> |
| struct BitBlockAnd { |
| static T Call(T left, T right) { return left & right; } |
| }; |
| |
| template <> |
| struct BitBlockAnd<bool> { |
| static bool Call(bool left, bool right) { return left && right; } |
| }; |
| |
| template <typename T> |
| struct BitBlockOr { |
| static T Call(T left, T right) { return left | right; } |
| }; |
| |
| template <> |
| struct BitBlockOr<bool> { |
| static bool Call(bool left, bool right) { return left || right; } |
| }; |
| |
| template <typename T> |
| struct BitBlockOrNot { |
| static T Call(T left, T right) { return left | ~right; } |
| }; |
| |
| template <> |
| struct BitBlockOrNot<bool> { |
| static bool Call(bool left, bool right) { return left || !right; } |
| }; |
| |
| } // namespace detail |
| |
| /// \brief Return value from bit block counters: the total number of bits and |
| /// the number of set bits. |
| struct BitBlockCount { |
| int16_t length; |
| int16_t popcount; |
| |
| bool NoneSet() const { return this->popcount == 0; } |
| bool AllSet() const { return this->length == this->popcount; } |
| }; |
| |
| /// \brief A class that scans through a true/false bitmap to compute popcounts |
| /// 64 or 256 bits at a time. This is used to accelerate processing of |
| /// mostly-not-null array data. |
| class ARROW_EXPORT BitBlockCounter { |
| public: |
| BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length) |
| : bitmap_(bitmap + start_offset / 8), |
| bits_remaining_(length), |
| offset_(start_offset % 8) {} |
| |
| /// \brief The bit size of each word run |
| static constexpr int64_t kWordBits = 64; |
| |
| /// \brief The bit size of four words run |
| static constexpr int64_t kFourWordsBits = kWordBits * 4; |
| |
| /// \brief Return the next run of available bits, usually 256. The returned |
| /// pair contains the size of run and the number of true values. The last |
| /// block will have a length less than 256 if the bitmap length is not a |
| /// multiple of 256, and will return 0-length blocks in subsequent |
| /// invocations. |
| BitBlockCount NextFourWords() { |
| using detail::LoadWord; |
| using detail::ShiftWord; |
| |
| if (!bits_remaining_) { |
| return {0, 0}; |
| } |
| int64_t total_popcount = 0; |
| if (offset_ == 0) { |
| if (bits_remaining_ < kFourWordsBits) { |
| return GetBlockSlow(kFourWordsBits); |
| } |
| total_popcount += BitUtil::PopCount(LoadWord(bitmap_)); |
| total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8)); |
| total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16)); |
| total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24)); |
| } else { |
| // When the offset is > 0, we need there to be a word beyond the last |
| // aligned word in the bitmap for the bit shifting logic. |
| if (bits_remaining_ < 5 * kFourWordsBits - offset_) { |
| return GetBlockSlow(kFourWordsBits); |
| } |
| auto current = LoadWord(bitmap_); |
| auto next = LoadWord(bitmap_ + 8); |
| total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); |
| current = next; |
| next = LoadWord(bitmap_ + 16); |
| total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); |
| current = next; |
| next = LoadWord(bitmap_ + 24); |
| total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); |
| current = next; |
| next = LoadWord(bitmap_ + 32); |
| total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); |
| } |
| bitmap_ += BitUtil::BytesForBits(kFourWordsBits); |
| bits_remaining_ -= kFourWordsBits; |
| return {256, static_cast<int16_t>(total_popcount)}; |
| } |
| |
| /// \brief Return the next run of available bits, usually 64. The returned |
| /// pair contains the size of run and the number of true values. The last |
| /// block will have a length less than 64 if the bitmap length is not a |
| /// multiple of 64, and will return 0-length blocks in subsequent |
| /// invocations. |
| BitBlockCount NextWord() { |
| using detail::LoadWord; |
| using detail::ShiftWord; |
| |
| if (!bits_remaining_) { |
| return {0, 0}; |
| } |
| int64_t popcount = 0; |
| if (offset_ == 0) { |
| if (bits_remaining_ < kWordBits) { |
| return GetBlockSlow(kWordBits); |
| } |
| popcount = BitUtil::PopCount(LoadWord(bitmap_)); |
| } else { |
| // When the offset is > 0, we need there to be a word beyond the last |
| // aligned word in the bitmap for the bit shifting logic. |
| if (bits_remaining_ < 2 * kWordBits - offset_) { |
| return GetBlockSlow(kWordBits); |
| } |
| popcount = |
| BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_)); |
| } |
| bitmap_ += kWordBits / 8; |
| bits_remaining_ -= kWordBits; |
| return {64, static_cast<int16_t>(popcount)}; |
| } |
| |
| private: |
| /// \brief Return block with the requested size when doing word-wise |
| /// computation is not possible due to inadequate bits remaining. |
| BitBlockCount GetBlockSlow(int64_t block_size) noexcept; |
| |
| const uint8_t* bitmap_; |
| int64_t bits_remaining_; |
| int64_t offset_; |
| }; |
| |
| /// \brief A tool to iterate through a possibly non-existent validity bitmap, |
| /// to allow us to write one code path for both the with-nulls and no-nulls |
| /// cases without giving up a lot of performance. |
| class ARROW_EXPORT OptionalBitBlockCounter { |
| public: |
| // validity_bitmap may be NULLPTR |
| OptionalBitBlockCounter(const uint8_t* validity_bitmap, int64_t offset, int64_t length); |
| |
| // validity_bitmap may be null |
| OptionalBitBlockCounter(const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, |
| int64_t length); |
| |
| /// Return block count for next word when the bitmap is available otherwise |
| /// return a block with length up to INT16_MAX when there is no validity |
| /// bitmap (so all the referenced values are not null). |
| BitBlockCount NextBlock() { |
| static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max(); |
| if (has_bitmap_) { |
| BitBlockCount block = counter_.NextWord(); |
| position_ += block.length; |
| return block; |
| } else { |
| int16_t block_size = |
| static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_)); |
| position_ += block_size; |
| // All values are non-null |
| return {block_size, block_size}; |
| } |
| } |
| |
| // Like NextBlock, but returns a word-sized block even when there is no |
| // validity bitmap |
| BitBlockCount NextWord() { |
| static constexpr int64_t kWordSize = 64; |
| if (has_bitmap_) { |
| BitBlockCount block = counter_.NextWord(); |
| position_ += block.length; |
| return block; |
| } else { |
| int16_t block_size = static_cast<int16_t>(std::min(kWordSize, length_ - position_)); |
| position_ += block_size; |
| // All values are non-null |
| return {block_size, block_size}; |
| } |
| } |
| |
| private: |
| const bool has_bitmap_; |
| int64_t position_; |
| int64_t length_; |
| BitBlockCounter counter_; |
| }; |
| |
| /// \brief A class that computes popcounts on the result of bitwise operations |
| /// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each |
| /// bitmap, then the popcount is computed on e.g. the bitwise-and of the two |
| /// words. |
| class ARROW_EXPORT BinaryBitBlockCounter { |
| public: |
| BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset, |
| const uint8_t* right_bitmap, int64_t right_offset, int64_t length) |
| : left_bitmap_(left_bitmap + left_offset / 8), |
| left_offset_(left_offset % 8), |
| right_bitmap_(right_bitmap + right_offset / 8), |
| right_offset_(right_offset % 8), |
| bits_remaining_(length) {} |
| |
| /// \brief Return the popcount of the bitwise-and of the next run of |
| /// available bits, up to 64. The returned pair contains the size of run and |
| /// the number of true values. The last block will have a length less than 64 |
| /// if the bitmap length is not a multiple of 64, and will return 0-length |
| /// blocks in subsequent invocations. |
| BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); } |
| |
| /// \brief Computes "x | y" block for each available run of bits. |
| BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); } |
| |
| /// \brief Computes "x | ~y" block for each available run of bits. |
| BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); } |
| |
| private: |
| template <template <typename T> class Op> |
| BitBlockCount NextWord() { |
| using detail::LoadWord; |
| using detail::ShiftWord; |
| |
| if (!bits_remaining_) { |
| return {0, 0}; |
| } |
| // When the offset is > 0, we need there to be a word beyond the last aligned |
| // word in the bitmap for the bit shifting logic. |
| constexpr int64_t kWordBits = BitBlockCounter::kWordBits; |
| const int64_t bits_required_to_use_words = |
| std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_), |
| right_offset_ == 0 ? 64 : 64 + (64 - right_offset_)); |
| if (bits_remaining_ < bits_required_to_use_words) { |
| const int16_t run_length = |
| static_cast<int16_t>(std::min(bits_remaining_, kWordBits)); |
| int16_t popcount = 0; |
| for (int64_t i = 0; i < run_length; ++i) { |
| if (Op<bool>::Call(BitUtil::GetBit(left_bitmap_, left_offset_ + i), |
| BitUtil::GetBit(right_bitmap_, right_offset_ + i))) { |
| ++popcount; |
| } |
| } |
| // This code path should trigger _at most_ 2 times. In the "two times" |
| // case, the first time the run length will be a multiple of 8. |
| left_bitmap_ += run_length / 8; |
| right_bitmap_ += run_length / 8; |
| bits_remaining_ -= run_length; |
| return {run_length, popcount}; |
| } |
| |
| int64_t popcount = 0; |
| if (left_offset_ == 0 && right_offset_ == 0) { |
| popcount = BitUtil::PopCount( |
| Op<uint64_t>::Call(LoadWord(left_bitmap_), LoadWord(right_bitmap_))); |
| } else { |
| auto left_word = |
| ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_); |
| auto right_word = |
| ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_); |
| popcount = BitUtil::PopCount(Op<uint64_t>::Call(left_word, right_word)); |
| } |
| left_bitmap_ += kWordBits / 8; |
| right_bitmap_ += kWordBits / 8; |
| bits_remaining_ -= kWordBits; |
| return {64, static_cast<int16_t>(popcount)}; |
| } |
| |
| const uint8_t* left_bitmap_; |
| int64_t left_offset_; |
| const uint8_t* right_bitmap_; |
| int64_t right_offset_; |
| int64_t bits_remaining_; |
| }; |
| |
| class ARROW_EXPORT OptionalBinaryBitBlockCounter { |
| public: |
| // Any bitmap may be NULLPTR |
| OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset, |
| const uint8_t* right_bitmap, int64_t right_offset, |
| int64_t length); |
| |
| // Any bitmap may be null |
| OptionalBinaryBitBlockCounter(const std::shared_ptr<Buffer>& left_bitmap, |
| int64_t left_offset, |
| const std::shared_ptr<Buffer>& right_bitmap, |
| int64_t right_offset, int64_t length); |
| |
| BitBlockCount NextAndBlock() { |
| static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max(); |
| switch (has_bitmap_) { |
| case HasBitmap::BOTH: { |
| BitBlockCount block = binary_counter_.NextAndWord(); |
| position_ += block.length; |
| return block; |
| } |
| case HasBitmap::ONE: { |
| BitBlockCount block = unary_counter_.NextWord(); |
| position_ += block.length; |
| return block; |
| } |
| case HasBitmap::NONE: |
| default: { |
| const int16_t block_size = |
| static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_)); |
| position_ += block_size; |
| // All values are non-null |
| return {block_size, block_size}; |
| } |
| } |
| } |
| |
| BitBlockCount NextOrNotBlock() { |
| static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max(); |
| switch (has_bitmap_) { |
| case HasBitmap::BOTH: { |
| BitBlockCount block = binary_counter_.NextOrNotWord(); |
| position_ += block.length; |
| return block; |
| } |
| case HasBitmap::ONE: { |
| BitBlockCount block = unary_counter_.NextWord(); |
| position_ += block.length; |
| return block; |
| } |
| case HasBitmap::NONE: |
| default: { |
| const int16_t block_size = |
| static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_)); |
| position_ += block_size; |
| // All values are non-null |
| return {block_size, block_size}; |
| } |
| } |
| } |
| |
| private: |
| enum class HasBitmap : int { BOTH, ONE, NONE }; |
| |
| const HasBitmap has_bitmap_; |
| int64_t position_; |
| int64_t length_; |
| BitBlockCounter unary_counter_; |
| BinaryBitBlockCounter binary_counter_; |
| |
| static HasBitmap HasBitmapFromBitmaps(bool has_left, bool has_right) { |
| switch (static_cast<int>(has_left) + static_cast<int>(has_right)) { |
| case 0: |
| return HasBitmap::NONE; |
| case 1: |
| return HasBitmap::ONE; |
| default: // 2 |
| return HasBitmap::BOTH; |
| } |
| } |
| }; |
| |
| // Functional-style bit block visitors. |
| |
| template <typename VisitNotNull, typename VisitNull> |
| static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset, |
| int64_t length, VisitNotNull&& visit_not_null, |
| VisitNull&& visit_null) { |
| const uint8_t* bitmap = NULLPTR; |
| if (bitmap_buf != NULLPTR) { |
| bitmap = bitmap_buf->data(); |
| } |
| internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length); |
| int64_t position = 0; |
| while (position < length) { |
| internal::BitBlockCount block = bit_counter.NextBlock(); |
| if (block.AllSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| ARROW_RETURN_NOT_OK(visit_not_null(position)); |
| } |
| } else if (block.NoneSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| ARROW_RETURN_NOT_OK(visit_null()); |
| } |
| } else { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| if (BitUtil::GetBit(bitmap, offset + position)) { |
| ARROW_RETURN_NOT_OK(visit_not_null(position)); |
| } else { |
| ARROW_RETURN_NOT_OK(visit_null()); |
| } |
| } |
| } |
| } |
| return Status::OK(); |
| } |
| |
| template <typename VisitNotNull, typename VisitNull> |
| static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset, |
| int64_t length, VisitNotNull&& visit_not_null, |
| VisitNull&& visit_null) { |
| const uint8_t* bitmap = NULLPTR; |
| if (bitmap_buf != NULLPTR) { |
| bitmap = bitmap_buf->data(); |
| } |
| internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length); |
| int64_t position = 0; |
| while (position < length) { |
| internal::BitBlockCount block = bit_counter.NextBlock(); |
| if (block.AllSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| visit_not_null(position); |
| } |
| } else if (block.NoneSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| visit_null(); |
| } |
| } else { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| if (BitUtil::GetBit(bitmap, offset + position)) { |
| visit_not_null(position); |
| } else { |
| visit_null(); |
| } |
| } |
| } |
| } |
| } |
| |
| template <typename VisitNotNull, typename VisitNull> |
| static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf, |
| int64_t left_offset, |
| const std::shared_ptr<Buffer>& right_bitmap_buf, |
| int64_t right_offset, int64_t length, |
| VisitNotNull&& visit_not_null, VisitNull&& visit_null) { |
| if (left_bitmap_buf == NULLPTR || right_bitmap_buf == NULLPTR) { |
| // At most one bitmap is present |
| if (left_bitmap_buf == NULLPTR) { |
| return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length, |
| std::forward<VisitNotNull>(visit_not_null), |
| std::forward<VisitNull>(visit_null)); |
| } else { |
| return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length, |
| std::forward<VisitNotNull>(visit_not_null), |
| std::forward<VisitNull>(visit_null)); |
| } |
| } |
| // Both bitmaps are present |
| const uint8_t* left_bitmap = left_bitmap_buf->data(); |
| const uint8_t* right_bitmap = right_bitmap_buf->data(); |
| BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset, |
| length); |
| int64_t position = 0; |
| while (position < length) { |
| BitBlockCount block = bit_counter.NextAndWord(); |
| if (block.AllSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| visit_not_null(position); |
| } |
| } else if (block.NoneSet()) { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| visit_null(); |
| } |
| } else { |
| for (int64_t i = 0; i < block.length; ++i, ++position) { |
| if (BitUtil::GetBit(left_bitmap, left_offset + position) && |
| BitUtil::GetBit(right_bitmap, right_offset + position)) { |
| visit_not_null(position); |
| } else { |
| visit_null(); |
| } |
| } |
| } |
| } |
| } |
| |
| } // namespace internal |
| } // namespace arrow |