| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <cstring> |
| #include <memory> |
| |
| #include <gtest/gtest.h> |
| |
| #include "arrow/buffer.h" |
| #include "arrow/memory_pool.h" |
| #include "arrow/result.h" |
| #include "arrow/testing/gtest_common.h" |
| #include "arrow/testing/gtest_util.h" |
| #include "arrow/testing/util.h" |
| #include "arrow/util/bit_block_counter.h" |
| #include "arrow/util/bit_util.h" |
| #include "arrow/util/bitmap_ops.h" |
| |
| namespace arrow { |
| namespace internal { |
| |
| class TestBitBlockCounter : public ::testing::Test { |
| public: |
| void Create(int64_t nbytes, int64_t offset, int64_t length) { |
| ASSERT_OK_AND_ASSIGN(buf_, AllocateBuffer(nbytes)); |
| // Start with data zeroed out |
| std::memset(buf_->mutable_data(), 0, nbytes); |
| counter_.reset(new BitBlockCounter(buf_->data(), offset, length)); |
| } |
| |
| protected: |
| std::shared_ptr<Buffer> buf_; |
| std::unique_ptr<BitBlockCounter> counter_; |
| }; |
| |
| static constexpr int64_t kWordSize = 64; |
| |
| TEST_F(TestBitBlockCounter, OneWordBasics) { |
| const int64_t nbytes = 1024; |
| |
| Create(nbytes, 0, nbytes * 8); |
| |
| int64_t bits_scanned = 0; |
| for (int64_t i = 0; i < nbytes / 8; ++i) { |
| BitBlockCount block = counter_->NextWord(); |
| ASSERT_EQ(block.length, kWordSize); |
| ASSERT_EQ(block.popcount, 0); |
| bits_scanned += block.length; |
| } |
| ASSERT_EQ(bits_scanned, 1024 * 8); |
| |
| auto block = counter_->NextWord(); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| } |
| |
| TEST_F(TestBitBlockCounter, FourWordsBasics) { |
| const int64_t nbytes = 1024; |
| |
| Create(nbytes, 0, nbytes * 8); |
| |
| int64_t bits_scanned = 0; |
| for (int64_t i = 0; i < nbytes / 32; ++i) { |
| BitBlockCount block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 4 * kWordSize); |
| ASSERT_EQ(block.popcount, 0); |
| bits_scanned += block.length; |
| } |
| ASSERT_EQ(bits_scanned, 1024 * 8); |
| |
| auto block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| } |
| |
| TEST_F(TestBitBlockCounter, OneWordWithOffsets) { |
| auto CheckWithOffset = [&](int64_t offset) { |
| const int64_t nwords = 4; |
| |
| const int64_t total_bytes = nwords * 8 + 1; |
| // Trim a bit from the end of the bitmap so we can check the remainder bits |
| // behavior |
| Create(total_bytes, offset, nwords * kWordSize - offset - 1); |
| |
| // Start with data all set |
| std::memset(buf_->mutable_data(), 0xFF, total_bytes); |
| |
| BitBlockCount block = counter_->NextWord(); |
| ASSERT_EQ(kWordSize, block.length); |
| ASSERT_EQ(block.popcount, 64); |
| |
| // Add a false value to the next word |
| BitUtil::SetBitTo(buf_->mutable_data(), kWordSize + offset, false); |
| block = counter_->NextWord(); |
| ASSERT_EQ(block.length, 64); |
| ASSERT_EQ(block.popcount, 63); |
| |
| // Set the next word to all false |
| BitUtil::SetBitsTo(buf_->mutable_data(), 2 * kWordSize + offset, kWordSize, false); |
| |
| block = counter_->NextWord(); |
| ASSERT_EQ(block.length, 64); |
| ASSERT_EQ(block.popcount, 0); |
| |
| block = counter_->NextWord(); |
| ASSERT_EQ(block.length, kWordSize - offset - 1); |
| ASSERT_EQ(block.length, block.popcount); |
| |
| // We can keep calling NextWord safely |
| block = counter_->NextWord(); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| }; |
| |
| for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { |
| CheckWithOffset(offset_i); |
| } |
| } |
| |
| TEST_F(TestBitBlockCounter, FourWordsWithOffsets) { |
| auto CheckWithOffset = [&](int64_t offset) { |
| const int64_t nwords = 17; |
| |
| const int64_t total_bytes = nwords * 8 + 1; |
| // Trim a bit from the end of the bitmap so we can check the remainder bits |
| // behavior |
| Create(total_bytes, offset, nwords * kWordSize - offset - 1); |
| |
| // Start with data all set |
| std::memset(buf_->mutable_data(), 0xFF, total_bytes); |
| |
| BitBlockCount block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 4 * kWordSize); |
| ASSERT_EQ(block.popcount, block.length); |
| |
| // Add some false values to the next 3 shifted words |
| BitUtil::SetBitTo(buf_->mutable_data(), 4 * kWordSize + offset, false); |
| BitUtil::SetBitTo(buf_->mutable_data(), 5 * kWordSize + offset, false); |
| BitUtil::SetBitTo(buf_->mutable_data(), 6 * kWordSize + offset, false); |
| block = counter_->NextFourWords(); |
| |
| ASSERT_EQ(block.length, 4 * kWordSize); |
| ASSERT_EQ(block.popcount, 253); |
| |
| // Set the next two words to all false |
| BitUtil::SetBitsTo(buf_->mutable_data(), 8 * kWordSize + offset, 2 * kWordSize, |
| false); |
| |
| // Block is half set |
| block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 4 * kWordSize); |
| ASSERT_EQ(block.popcount, 128); |
| |
| // Last full block whether offset or no |
| block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 4 * kWordSize); |
| ASSERT_EQ(block.length, block.popcount); |
| |
| // Partial block |
| block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, kWordSize - offset - 1); |
| ASSERT_EQ(block.length, block.popcount); |
| |
| // We can keep calling NextFourWords safely |
| block = counter_->NextFourWords(); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| }; |
| |
| for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { |
| CheckWithOffset(offset_i); |
| } |
| } |
| |
| TEST_F(TestBitBlockCounter, FourWordsRandomData) { |
| const int64_t nbytes = 1024; |
| auto buffer = *AllocateBuffer(nbytes); |
| random_bytes(nbytes, 0, buffer->mutable_data()); |
| |
| auto CheckWithOffset = [&](int64_t offset) { |
| BitBlockCounter counter(buffer->data(), offset, nbytes * 8 - offset); |
| for (int64_t i = 0; i < nbytes / 32; ++i) { |
| BitBlockCount block = counter.NextFourWords(); |
| ASSERT_EQ(block.popcount, |
| CountSetBits(buffer->data(), i * 256 + offset, block.length)); |
| } |
| }; |
| for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { |
| CheckWithOffset(offset_i); |
| } |
| } |
| |
| template <template <typename T> class Op, typename NextWordFunc> |
| void CheckBinaryBitBlockOp(NextWordFunc&& get_next_word) { |
| const int64_t nbytes = 1024; |
| auto left = *AllocateBuffer(nbytes); |
| auto right = *AllocateBuffer(nbytes); |
| random_bytes(nbytes, 0, left->mutable_data()); |
| random_bytes(nbytes, 0, right->mutable_data()); |
| |
| auto CheckWithOffsets = [&](int left_offset, int right_offset) { |
| int64_t overlap_length = nbytes * 8 - std::max(left_offset, right_offset); |
| BinaryBitBlockCounter counter(left->data(), left_offset, right->data(), right_offset, |
| overlap_length); |
| int64_t position = 0; |
| do { |
| BitBlockCount block = get_next_word(&counter); |
| int expected_popcount = 0; |
| for (int j = 0; j < block.length; ++j) { |
| expected_popcount += static_cast<int>( |
| Op<bool>::Call(BitUtil::GetBit(left->data(), position + left_offset + j), |
| BitUtil::GetBit(right->data(), position + right_offset + j))); |
| } |
| ASSERT_EQ(block.popcount, expected_popcount); |
| position += block.length; |
| } while (position < overlap_length); |
| // We made it through all the data |
| ASSERT_EQ(position, overlap_length); |
| |
| BitBlockCount block = get_next_word(&counter); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| }; |
| |
| for (int left_i = 0; left_i < 8; ++left_i) { |
| for (int right_i = 0; right_i < 8; ++right_i) { |
| CheckWithOffsets(left_i, right_i); |
| } |
| } |
| } |
| |
| TEST(TestBinaryBitBlockCounter, NextAndWord) { |
| CheckBinaryBitBlockOp<detail::BitBlockAnd>( |
| [](BinaryBitBlockCounter* counter) { return counter->NextAndWord(); }); |
| } |
| |
| TEST(TestBinaryBitBlockCounter, NextOrWord) { |
| CheckBinaryBitBlockOp<detail::BitBlockOr>( |
| [](BinaryBitBlockCounter* counter) { return counter->NextOrWord(); }); |
| } |
| |
| TEST(TestBinaryBitBlockCounter, NextOrNotWord) { |
| CheckBinaryBitBlockOp<detail::BitBlockOrNot>( |
| [](BinaryBitBlockCounter* counter) { return counter->NextOrNotWord(); }); |
| } |
| |
| TEST(TestOptionalBitBlockCounter, NextBlock) { |
| const int64_t nbytes = 5000; |
| auto bitmap = *AllocateBitmap(nbytes * 8); |
| random_bytes(nbytes, 0, bitmap->mutable_data()); |
| |
| OptionalBitBlockCounter optional_counter(bitmap, 0, nbytes * 8); |
| BitBlockCounter bit_counter(bitmap->data(), 0, nbytes * 8); |
| |
| while (true) { |
| BitBlockCount block = bit_counter.NextWord(); |
| BitBlockCount optional_block = optional_counter.NextBlock(); |
| ASSERT_EQ(optional_block.length, block.length); |
| ASSERT_EQ(optional_block.popcount, block.popcount); |
| if (block.length == 0) { |
| break; |
| } |
| } |
| |
| BitBlockCount optional_block = optional_counter.NextBlock(); |
| ASSERT_EQ(optional_block.length, 0); |
| ASSERT_EQ(optional_block.popcount, 0); |
| |
| OptionalBitBlockCounter optional_counter_no_bitmap(nullptr, 0, nbytes * 8); |
| BitBlockCount no_bitmap_block = optional_counter_no_bitmap.NextBlock(); |
| |
| int16_t max_length = std::numeric_limits<int16_t>::max(); |
| ASSERT_EQ(no_bitmap_block.length, max_length); |
| ASSERT_EQ(no_bitmap_block.popcount, max_length); |
| no_bitmap_block = optional_counter_no_bitmap.NextBlock(); |
| ASSERT_EQ(no_bitmap_block.length, nbytes * 8 - max_length); |
| ASSERT_EQ(no_bitmap_block.popcount, no_bitmap_block.length); |
| } |
| |
| TEST(TestOptionalBitBlockCounter, NextWord) { |
| const int64_t nbytes = 5000; |
| auto bitmap = *AllocateBitmap(nbytes * 8); |
| random_bytes(nbytes, 0, bitmap->mutable_data()); |
| |
| OptionalBitBlockCounter optional_counter(bitmap, 0, nbytes * 8); |
| OptionalBitBlockCounter optional_counter_no_bitmap(nullptr, 0, nbytes * 8); |
| BitBlockCounter bit_counter(bitmap->data(), 0, nbytes * 8); |
| |
| while (true) { |
| BitBlockCount block = bit_counter.NextWord(); |
| BitBlockCount no_bitmap_block = optional_counter_no_bitmap.NextWord(); |
| BitBlockCount optional_block = optional_counter.NextWord(); |
| ASSERT_EQ(optional_block.length, block.length); |
| ASSERT_EQ(optional_block.popcount, block.popcount); |
| |
| ASSERT_EQ(no_bitmap_block.length, block.length); |
| ASSERT_EQ(no_bitmap_block.popcount, block.length); |
| if (block.length == 0) { |
| break; |
| } |
| } |
| |
| BitBlockCount optional_block = optional_counter.NextWord(); |
| ASSERT_EQ(optional_block.length, 0); |
| ASSERT_EQ(optional_block.popcount, 0); |
| } |
| |
| class TestOptionalBinaryBitBlockCounter : public ::testing::Test { |
| public: |
| void SetUp() { |
| const int64_t nbytes = 5000; |
| ASSERT_OK_AND_ASSIGN(left_bitmap_, AllocateBitmap(nbytes * 8)); |
| ASSERT_OK_AND_ASSIGN(right_bitmap_, AllocateBitmap(nbytes * 8)); |
| random_bytes(nbytes, 0, left_bitmap_->mutable_data()); |
| random_bytes(nbytes, 0, right_bitmap_->mutable_data()); |
| |
| left_offset_ = 12; |
| right_offset_ = 23; |
| length_ = nbytes * 8 - std::max(left_offset_, right_offset_); |
| } |
| |
| protected: |
| std::shared_ptr<Buffer> left_bitmap_, right_bitmap_; |
| int64_t left_offset_; |
| int64_t right_offset_; |
| int64_t length_; |
| }; |
| |
| TEST_F(TestOptionalBinaryBitBlockCounter, NextBlockBothBitmaps) { |
| // Both bitmaps present |
| OptionalBinaryBitBlockCounter optional_counter(left_bitmap_, left_offset_, |
| right_bitmap_, right_offset_, length_); |
| BinaryBitBlockCounter bit_counter(left_bitmap_->data(), left_offset_, |
| right_bitmap_->data(), right_offset_, length_); |
| |
| while (true) { |
| BitBlockCount block = bit_counter.NextAndWord(); |
| BitBlockCount optional_block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(optional_block.length, block.length); |
| ASSERT_EQ(optional_block.popcount, block.popcount); |
| if (block.length == 0) { |
| break; |
| } |
| } |
| } |
| |
| TEST_F(TestOptionalBinaryBitBlockCounter, NextBlockLeftBitmap) { |
| // Left bitmap present |
| OptionalBinaryBitBlockCounter optional_counter(left_bitmap_, left_offset_, nullptr, |
| right_offset_, length_); |
| BitBlockCounter bit_counter(left_bitmap_->data(), left_offset_, length_); |
| |
| while (true) { |
| BitBlockCount block = bit_counter.NextWord(); |
| BitBlockCount optional_block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(optional_block.length, block.length); |
| ASSERT_EQ(optional_block.popcount, block.popcount); |
| if (block.length == 0) { |
| break; |
| } |
| } |
| } |
| |
| TEST_F(TestOptionalBinaryBitBlockCounter, NextBlockRightBitmap) { |
| // Right bitmap present |
| OptionalBinaryBitBlockCounter optional_counter(nullptr, left_offset_, right_bitmap_, |
| right_offset_, length_); |
| BitBlockCounter bit_counter(right_bitmap_->data(), right_offset_, length_); |
| |
| while (true) { |
| BitBlockCount block = bit_counter.NextWord(); |
| BitBlockCount optional_block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(optional_block.length, block.length); |
| ASSERT_EQ(optional_block.popcount, block.popcount); |
| if (block.length == 0) { |
| break; |
| } |
| } |
| } |
| |
| TEST_F(TestOptionalBinaryBitBlockCounter, NextBlockNoBitmap) { |
| // No bitmap present |
| OptionalBinaryBitBlockCounter optional_counter(nullptr, left_offset_, nullptr, |
| right_offset_, length_); |
| |
| BitBlockCount block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(block.length, std::numeric_limits<int16_t>::max()); |
| ASSERT_EQ(block.popcount, block.length); |
| |
| const int64_t remaining_length = length_ - block.length; |
| block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(block.length, remaining_length); |
| ASSERT_EQ(block.popcount, block.length); |
| |
| block = optional_counter.NextAndBlock(); |
| ASSERT_EQ(block.length, 0); |
| ASSERT_EQ(block.popcount, 0); |
| } |
| |
| } // namespace internal |
| } // namespace arrow |