blob: e5a5e4c39bef5a958152083f2654fa5b45f89759 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <array>
#include <climits>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <ostream>
#include <string>
#include <utility>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
#include "arrow/buffer.h"
#include "arrow/buffer_builder.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_common.h"
#include "arrow/testing/gtest_compat.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/bit_run_reader.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bitmap.h"
#include "arrow/util/bitmap_generate.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/bitmap_visit.h"
#include "arrow/util/bitmap_writer.h"
#include "arrow/util/bitset_stack.h"
#include "arrow/util/endian.h"
namespace arrow {
using internal::BitmapAnd;
using internal::BitmapAndNot;
using internal::BitmapOr;
using internal::BitmapXor;
using internal::BitsetStack;
using internal::CopyBitmap;
using internal::CountSetBits;
using internal::InvertBitmap;
using ::testing::ElementsAreArray;
namespace internal {
void PrintTo(const BitRun& run, std::ostream* os) { *os << run.ToString(); }
void PrintTo(const SetBitRun& run, std::ostream* os) { *os << run.ToString(); }
} // namespace internal
template <class BitmapWriter>
void WriteVectorToWriter(BitmapWriter& writer, const std::vector<int> values) {
for (const auto& value : values) {
if (value) {
writer.Set();
} else {
writer.Clear();
}
writer.Next();
}
writer.Finish();
}
void BitmapFromVector(const std::vector<int>& values, int64_t bit_offset,
std::shared_ptr<Buffer>* out_buffer, int64_t* out_length) {
const int64_t length = values.size();
*out_length = length;
ASSERT_OK_AND_ASSIGN(*out_buffer, AllocateEmptyBitmap(length + bit_offset));
auto writer = internal::BitmapWriter((*out_buffer)->mutable_data(), bit_offset, length);
WriteVectorToWriter(writer, values);
}
std::shared_ptr<Buffer> BitmapFromString(const std::string& s) {
TypedBufferBuilder<bool> builder;
ABORT_NOT_OK(builder.Reserve(s.size()));
for (const char c : s) {
switch (c) {
case '0':
builder.UnsafeAppend(false);
break;
case '1':
builder.UnsafeAppend(true);
break;
case ' ':
case '\t':
case '\n':
case '\r':
break;
default:
ARROW_LOG(FATAL) << "Unexpected character in bitmap string";
}
}
std::shared_ptr<Buffer> buffer;
ABORT_NOT_OK(builder.Finish(&buffer));
return buffer;
}
#define ASSERT_READER_SET(reader) \
do { \
ASSERT_TRUE(reader.IsSet()); \
ASSERT_FALSE(reader.IsNotSet()); \
reader.Next(); \
} while (false)
#define ASSERT_READER_NOT_SET(reader) \
do { \
ASSERT_FALSE(reader.IsSet()); \
ASSERT_TRUE(reader.IsNotSet()); \
reader.Next(); \
} while (false)
// Assert that a BitmapReader yields the given bit values
void ASSERT_READER_VALUES(internal::BitmapReader& reader, std::vector<int> values) {
for (const auto& value : values) {
if (value) {
ASSERT_READER_SET(reader);
} else {
ASSERT_READER_NOT_SET(reader);
}
}
}
// Assert equal contents of a memory area and a vector of bytes
void ASSERT_BYTES_EQ(const uint8_t* left, const std::vector<uint8_t>& right) {
auto left_array = std::vector<uint8_t>(left, left + right.size());
ASSERT_EQ(left_array, right);
}
TEST(BitUtilTests, TestIsMultipleOf64) {
using BitUtil::IsMultipleOf64;
EXPECT_TRUE(IsMultipleOf64(64));
EXPECT_TRUE(IsMultipleOf64(0));
EXPECT_TRUE(IsMultipleOf64(128));
EXPECT_TRUE(IsMultipleOf64(192));
EXPECT_FALSE(IsMultipleOf64(23));
EXPECT_FALSE(IsMultipleOf64(32));
}
TEST(BitUtilTests, TestNextPower2) {
using BitUtil::NextPower2;
ASSERT_EQ(8, NextPower2(6));
ASSERT_EQ(8, NextPower2(8));
ASSERT_EQ(1, NextPower2(1));
ASSERT_EQ(256, NextPower2(131));
ASSERT_EQ(1024, NextPower2(1000));
ASSERT_EQ(4096, NextPower2(4000));
ASSERT_EQ(65536, NextPower2(64000));
ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1));
ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1));
ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1));
}
TEST(BitUtilTests, BytesForBits) {
using BitUtil::BytesForBits;
ASSERT_EQ(BytesForBits(0), 0);
ASSERT_EQ(BytesForBits(1), 1);
ASSERT_EQ(BytesForBits(7), 1);
ASSERT_EQ(BytesForBits(8), 1);
ASSERT_EQ(BytesForBits(9), 2);
ASSERT_EQ(BytesForBits(0xffff), 8192);
ASSERT_EQ(BytesForBits(0x10000), 8192);
ASSERT_EQ(BytesForBits(0x10001), 8193);
ASSERT_EQ(BytesForBits(0x7ffffffffffffff8ll), 0x0fffffffffffffffll);
ASSERT_EQ(BytesForBits(0x7ffffffffffffff9ll), 0x1000000000000000ll);
ASSERT_EQ(BytesForBits(0x7fffffffffffffffll), 0x1000000000000000ll);
}
TEST(BitmapReader, NormalOperation) {
std::shared_ptr<Buffer> buffer;
int64_t length;
for (int64_t offset : {0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120}) {
BitmapFromVector({0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset, &buffer,
&length);
ASSERT_EQ(length, 14);
auto reader = internal::BitmapReader(buffer->mutable_data(), offset, length);
ASSERT_READER_VALUES(reader, {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1});
}
}
TEST(BitmapReader, DoesNotReadOutOfBounds) {
uint8_t bitmap[16] = {0};
const int length = 128;
internal::BitmapReader r1(bitmap, 0, length);
// If this were to read out of bounds, valgrind would tell us
for (int i = 0; i < length; ++i) {
ASSERT_TRUE(r1.IsNotSet());
r1.Next();
}
internal::BitmapReader r2(bitmap, 5, length - 5);
for (int i = 0; i < (length - 5); ++i) {
ASSERT_TRUE(r2.IsNotSet());
r2.Next();
}
// Does not access invalid memory
internal::BitmapReader r3(nullptr, 0, 0);
}
class TestBitmapUInt64Reader : public ::testing::Test {
public:
void AssertWords(const Buffer& buffer, int64_t start_offset, int64_t length,
const std::vector<uint64_t>& expected) {
internal::BitmapUInt64Reader reader(buffer.data(), start_offset, length);
ASSERT_EQ(reader.position(), 0);
ASSERT_EQ(reader.length(), length);
for (const uint64_t word : expected) {
ASSERT_EQ(reader.NextWord(), word);
}
ASSERT_EQ(reader.position(), length);
}
void Check(const Buffer& buffer, int64_t start_offset, int64_t length) {
internal::BitmapUInt64Reader reader(buffer.data(), start_offset, length);
for (int64_t i = 0; i < length; i += 64) {
ASSERT_EQ(reader.position(), i);
const auto nbits = std::min<int64_t>(64, length - i);
uint64_t word = reader.NextWord();
for (int64_t j = 0; j < nbits; ++j) {
ASSERT_EQ(word & 1, BitUtil::GetBit(buffer.data(), start_offset + i + j));
word >>= 1;
}
}
ASSERT_EQ(reader.position(), length);
}
void CheckExtensive(const Buffer& buffer) {
for (const int64_t offset : kTestOffsets) {
for (int64_t length : kTestOffsets) {
if (offset + length <= buffer.size()) {
Check(buffer, offset, length);
length = buffer.size() - offset - length;
if (offset + length <= buffer.size()) {
Check(buffer, offset, length);
}
}
}
}
}
protected:
const std::vector<int64_t> kTestOffsets = {0, 1, 6, 7, 8, 33, 62, 63, 64, 65};
};
TEST_F(TestBitmapUInt64Reader, Empty) {
for (const int64_t offset : kTestOffsets) {
// Does not access invalid memory
internal::BitmapUInt64Reader reader(nullptr, offset, 0);
ASSERT_EQ(reader.position(), 0);
ASSERT_EQ(reader.length(), 0);
}
}
TEST_F(TestBitmapUInt64Reader, Small) {
auto buffer = BitmapFromString(
"11111111 10000000 00000000 00000000 00000000 00000000 00000001 11111111"
"11111111 10000000 00000000 00000000 00000000 00000000 00000001 11111111"
"11111111 10000000 00000000 00000000 00000000 00000000 00000001 11111111"
"11111111 10000000 00000000 00000000 00000000 00000000 00000001 11111111");
// One word
AssertWords(*buffer, 0, 9, {0x1ff});
AssertWords(*buffer, 1, 9, {0xff});
AssertWords(*buffer, 7, 9, {0x3});
AssertWords(*buffer, 8, 9, {0x1});
AssertWords(*buffer, 9, 9, {0x0});
AssertWords(*buffer, 54, 10, {0x3fe});
AssertWords(*buffer, 54, 9, {0x1fe});
AssertWords(*buffer, 54, 8, {0xfe});
AssertWords(*buffer, 55, 9, {0x1ff});
AssertWords(*buffer, 56, 8, {0xff});
AssertWords(*buffer, 57, 7, {0x7f});
AssertWords(*buffer, 63, 1, {0x1});
AssertWords(*buffer, 0, 64, {0xff800000000001ffULL});
// One straddling word
AssertWords(*buffer, 54, 12, {0xffe});
AssertWords(*buffer, 63, 2, {0x3});
// One word (start_offset >= 64)
AssertWords(*buffer, 96, 64, {0x000001ffff800000ULL});
// Two words
AssertWords(*buffer, 0, 128, {0xff800000000001ffULL, 0xff800000000001ffULL});
AssertWords(*buffer, 0, 127, {0xff800000000001ffULL, 0x7f800000000001ffULL});
AssertWords(*buffer, 1, 127, {0xffc00000000000ffULL, 0x7fc00000000000ffULL});
AssertWords(*buffer, 1, 128, {0xffc00000000000ffULL, 0xffc00000000000ffULL});
AssertWords(*buffer, 63, 128, {0xff000000000003ffULL, 0xff000000000003ffULL});
AssertWords(*buffer, 63, 65, {0xff000000000003ffULL, 0x1});
// More than two words
AssertWords(*buffer, 0, 256,
{0xff800000000001ffULL, 0xff800000000001ffULL, 0xff800000000001ffULL,
0xff800000000001ffULL});
AssertWords(*buffer, 1, 255,
{0xffc00000000000ffULL, 0xffc00000000000ffULL, 0xffc00000000000ffULL,
0x7fc00000000000ffULL});
AssertWords(*buffer, 63, 193,
{0xff000000000003ffULL, 0xff000000000003ffULL, 0xff000000000003ffULL, 0x1});
AssertWords(*buffer, 63, 192,
{0xff000000000003ffULL, 0xff000000000003ffULL, 0xff000000000003ffULL});
CheckExtensive(*buffer);
}
TEST_F(TestBitmapUInt64Reader, Random) {
random::RandomArrayGenerator rng(42);
auto buffer = rng.NullBitmap(500, 0.5);
CheckExtensive(*buffer);
}
class TestSetBitRunReader : public ::testing::Test {
public:
std::vector<internal::SetBitRun> ReferenceBitRuns(const uint8_t* data,
int64_t start_offset,
int64_t length) {
std::vector<internal::SetBitRun> runs;
internal::BitRunReaderLinear reader(data, start_offset, length);
int64_t position = 0;
while (position < length) {
const auto br = reader.NextRun();
if (br.set) {
runs.push_back({position, br.length});
}
position += br.length;
}
return runs;
}
template <typename SetBitRunReaderType>
std::vector<internal::SetBitRun> AllBitRuns(SetBitRunReaderType* reader) {
std::vector<internal::SetBitRun> runs;
auto run = reader->NextRun();
while (!run.AtEnd()) {
runs.push_back(run);
run = reader->NextRun();
}
return runs;
}
template <typename SetBitRunReaderType>
void AssertBitRuns(SetBitRunReaderType* reader,
const std::vector<internal::SetBitRun>& expected) {
ASSERT_EQ(AllBitRuns(reader), expected);
}
void AssertBitRuns(const uint8_t* data, int64_t start_offset, int64_t length,
const std::vector<internal::SetBitRun>& expected) {
{
internal::SetBitRunReader reader(data, start_offset, length);
AssertBitRuns(&reader, expected);
}
{
internal::ReverseSetBitRunReader reader(data, start_offset, length);
auto reversed_expected = expected;
std::reverse(reversed_expected.begin(), reversed_expected.end());
AssertBitRuns(&reader, reversed_expected);
}
}
void AssertBitRuns(const Buffer& buffer, int64_t start_offset, int64_t length,
const std::vector<internal::SetBitRun>& expected) {
AssertBitRuns(buffer.data(), start_offset, length, expected);
}
void CheckAgainstReference(const Buffer& buffer, int64_t start_offset, int64_t length) {
const auto expected = ReferenceBitRuns(buffer.data(), start_offset, length);
AssertBitRuns(buffer.data(), start_offset, length, expected);
}
struct Range {
int64_t offset;
int64_t length;
int64_t end_offset() const { return offset + length; }
};
std::vector<Range> BufferTestRanges(const Buffer& buffer) {
const int64_t buffer_size = buffer.size() * 8; // in bits
std::vector<Range> ranges;
for (const int64_t offset : kTestOffsets) {
for (const int64_t length_adjust : kTestOffsets) {
int64_t length = std::min(buffer_size - offset, length_adjust);
EXPECT_GE(length, 0);
ranges.push_back({offset, length});
length = std::min(buffer_size - offset, buffer_size - length_adjust);
EXPECT_GE(length, 0);
ranges.push_back({offset, length});
}
}
return ranges;
}
protected:
const std::vector<int64_t> kTestOffsets = {0, 1, 6, 7, 8, 33, 63, 64, 65, 71};
};
TEST_F(TestSetBitRunReader, Empty) {
for (const int64_t offset : kTestOffsets) {
// Does not access invalid memory
AssertBitRuns(nullptr, offset, 0, {});
}
}
TEST_F(TestSetBitRunReader, OneByte) {
auto buffer = BitmapFromString("01101101");
AssertBitRuns(*buffer, 0, 8, {{1, 2}, {4, 2}, {7, 1}});
for (const char* bitmap_string : {"01101101", "10110110", "00000000", "11111111"}) {
auto buffer = BitmapFromString(bitmap_string);
for (int64_t offset = 0; offset < 8; ++offset) {
for (int64_t length = 0; length <= 8 - offset; ++length) {
CheckAgainstReference(*buffer, offset, length);
}
}
}
}
TEST_F(TestSetBitRunReader, Tiny) {
auto buffer = BitmapFromString("11100011 10001110 00111000 11100011 10001110 00111000");
AssertBitRuns(*buffer, 0, 48,
{{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}});
AssertBitRuns(*buffer, 0, 46,
{{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}});
AssertBitRuns(*buffer, 0, 45,
{{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}});
AssertBitRuns(*buffer, 0, 42,
{{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}});
AssertBitRuns(*buffer, 3, 45,
{{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}});
AssertBitRuns(*buffer, 3, 43,
{{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}});
AssertBitRuns(*buffer, 3, 42,
{{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}});
AssertBitRuns(*buffer, 3, 39, {{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}});
}
TEST_F(TestSetBitRunReader, AllZeros) {
const int64_t kBufferSize = 256;
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateEmptyBitmap(kBufferSize));
for (const auto range : BufferTestRanges(*buffer)) {
AssertBitRuns(*buffer, range.offset, range.length, {});
}
}
TEST_F(TestSetBitRunReader, AllOnes) {
const int64_t kBufferSize = 256;
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateEmptyBitmap(kBufferSize));
BitUtil::SetBitsTo(buffer->mutable_data(), 0, kBufferSize, true);
for (const auto range : BufferTestRanges(*buffer)) {
if (range.length > 0) {
AssertBitRuns(*buffer, range.offset, range.length, {{0, range.length}});
} else {
AssertBitRuns(*buffer, range.offset, range.length, {});
}
}
}
TEST_F(TestSetBitRunReader, Small) {
// Ones then zeros then ones
const int64_t kBufferSize = 256;
const int64_t kOnesLength = 64;
const int64_t kSecondOnesStart = kBufferSize - kOnesLength;
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateEmptyBitmap(kBufferSize));
BitUtil::SetBitsTo(buffer->mutable_data(), 0, kBufferSize, false);
BitUtil::SetBitsTo(buffer->mutable_data(), 0, kOnesLength, true);
BitUtil::SetBitsTo(buffer->mutable_data(), kSecondOnesStart, kOnesLength, true);
for (const auto range : BufferTestRanges(*buffer)) {
std::vector<internal::SetBitRun> expected;
if (range.offset < kOnesLength && range.length > 0) {
expected.push_back({0, std::min(kOnesLength - range.offset, range.length)});
}
if (range.offset + range.length > kSecondOnesStart) {
expected.push_back({kSecondOnesStart - range.offset,
range.length + range.offset - kSecondOnesStart});
}
AssertBitRuns(*buffer, range.offset, range.length, expected);
}
}
TEST_F(TestSetBitRunReader, SingleRun) {
// One single run of ones, at varying places in the buffer
const int64_t kBufferSize = 512;
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateEmptyBitmap(kBufferSize));
for (const auto ones_range : BufferTestRanges(*buffer)) {
BitUtil::SetBitsTo(buffer->mutable_data(), 0, kBufferSize, false);
BitUtil::SetBitsTo(buffer->mutable_data(), ones_range.offset, ones_range.length,
true);
for (const auto range : BufferTestRanges(*buffer)) {
std::vector<internal::SetBitRun> expected;
if (range.length && ones_range.length && range.offset < ones_range.end_offset() &&
ones_range.offset < range.end_offset()) {
// The two ranges intersect
const int64_t intersect_start = std::max(range.offset, ones_range.offset);
const int64_t intersect_stop =
std::min(range.end_offset(), ones_range.end_offset());
expected.push_back(
{intersect_start - range.offset, intersect_stop - intersect_start});
}
AssertBitRuns(*buffer, range.offset, range.length, expected);
}
}
}
TEST_F(TestSetBitRunReader, Random) {
const int64_t kBufferSize = 4096;
arrow::random::RandomArrayGenerator rng(42);
for (const double set_probability : {0.003, 0.01, 0.1, 0.5, 0.9, 0.99, 0.997}) {
auto arr = rng.Boolean(kBufferSize, set_probability);
auto buffer = arr->data()->buffers[1];
for (const auto range : BufferTestRanges(*buffer)) {
CheckAgainstReference(*buffer, range.offset, range.length);
}
}
}
TEST(BitRunReader, ZeroLength) {
internal::BitRunReader reader(nullptr, /*start_offset=*/0, /*length=*/0);
EXPECT_EQ(reader.NextRun().length, 0);
}
TEST(BitRunReader, NormalOperation) {
std::vector<int> bm_vector = {1, 0, 1}; // size: 3
bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8
bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); // size: 15
bm_vector.insert(bm_vector.end(), /*n=*/3, /*val=*/0); // size: 18
bm_vector.insert(bm_vector.end(), /*n=*/25, /*val=*/1); // size: 43
bm_vector.insert(bm_vector.end(), /*n=*/21, /*val=*/0); // size: 64
bm_vector.insert(bm_vector.end(), /*n=*/26, /*val=*/1); // size: 90
bm_vector.insert(bm_vector.end(), /*n=*/130, /*val=*/0); // size: 220
bm_vector.insert(bm_vector.end(), /*n=*/65, /*val=*/1); // size: 285
std::shared_ptr<Buffer> bitmap;
int64_t length;
BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length);
internal::BitRunReader reader(bitmap->data(), /*start_offset=*/0, /*length=*/length);
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
EXPECT_THAT(results, ElementsAreArray(
std::vector<internal::BitRun>{{/*length=*/1, /*set=*/true},
{/*length=*/1, /*set=*/false},
{/*length=*/1, /*set=*/true},
{/*length=*/5, /*set=*/false},
{/*length=*/7, /*set=*/true},
{/*length=*/3, /*set=*/false},
{/*length=*/25, /*set=*/true},
{/*length=*/21, /*set=*/false},
{/*length=*/26, /*set=*/true},
{/*length=*/130, /*set=*/false},
{/*length=*/65, /*set=*/true}}));
}
TEST(BitRunReader, AllFirstByteCombos) {
for (int offset = 0; offset < 8; offset++) {
for (int64_t x = 0; x < (1 << 8) - 1; x++) {
int64_t bits = BitUtil::ToLittleEndian(x);
internal::BitRunReader reader(reinterpret_cast<uint8_t*>(&bits),
/*start_offset=*/offset,
/*length=*/8 - offset);
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
int64_t sum = 0;
for (const auto& result : results) {
sum += result.length;
}
ASSERT_EQ(sum, 8 - offset);
}
}
}
TEST(BitRunReader, TruncatedAtWord) {
std::vector<int> bm_vector;
bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1);
bm_vector.insert(bm_vector.end(), /*n=*/58, /*val=*/0);
std::shared_ptr<Buffer> bitmap;
int64_t length;
BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length);
internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1,
/*length=*/63);
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
EXPECT_THAT(results,
ElementsAreArray(std::vector<internal::BitRun>{
{/*length=*/6, /*set=*/true}, {/*length=*/57, /*set=*/false}}));
}
TEST(BitRunReader, ScalarComparison) {
::arrow::random::RandomArrayGenerator rag(/*seed=*/23);
constexpr int64_t kNumBits = 1000000;
std::shared_ptr<Buffer> buffer =
rag.Boolean(kNumBits, /*set_probability=*/.4)->data()->buffers[1];
const uint8_t* bitmap = buffer->data();
internal::BitRunReader reader(bitmap, 0, kNumBits);
internal::BitRunReaderLinear scalar_reader(bitmap, 0, kNumBits);
internal::BitRun br, brs;
int64_t br_bits = 0;
int64_t brs_bits = 0;
do {
br = reader.NextRun();
brs = scalar_reader.NextRun();
br_bits += br.length;
brs_bits += brs.length;
EXPECT_EQ(br.length, brs.length);
if (br.length > 0) {
EXPECT_EQ(br, brs) << internal::Bitmap(bitmap, 0, kNumBits).ToString() << br_bits
<< " " << brs_bits;
}
} while (brs.length != 0);
EXPECT_EQ(br_bits, brs_bits);
}
TEST(BitRunReader, TruncatedWithinWordMultipleOf8Bits) {
std::vector<int> bm_vector;
bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1);
bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0);
std::shared_ptr<Buffer> bitmap;
int64_t length;
BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length);
internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1,
/*length=*/7);
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
EXPECT_THAT(results, ElementsAreArray(std::vector<internal::BitRun>{
{/*length=*/6, /*set=*/true}, {/*length=*/1, /*set=*/false}}));
}
TEST(BitRunReader, TruncatedWithinWord) {
std::vector<int> bm_vector;
bm_vector.insert(bm_vector.end(), /*n=*/37 + 40, /*val=*/0);
bm_vector.insert(bm_vector.end(), /*n=*/23, /*val=*/1);
std::shared_ptr<Buffer> bitmap;
int64_t length;
BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length);
constexpr int64_t kOffset = 37;
internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset,
/*length=*/53);
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
EXPECT_THAT(results,
ElementsAreArray(std::vector<internal::BitRun>{
{/*length=*/40, /*set=*/false}, {/*length=*/13, /*set=*/true}}));
}
TEST(BitRunReader, TruncatedMultipleWords) {
std::vector<int> bm_vector = {1, 0, 1}; // size: 3
bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8
bm_vector.insert(bm_vector.end(), /*n=*/30, /*val=*/1); // size: 38
bm_vector.insert(bm_vector.end(), /*n=*/95, /*val=*/0); // size: 133
std::shared_ptr<Buffer> bitmap;
int64_t length;
BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length);
constexpr int64_t kOffset = 5;
internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset,
/*length=*/length - (kOffset + 3));
std::vector<internal::BitRun> results;
internal::BitRun rl;
do {
rl = reader.NextRun();
results.push_back(rl);
} while (rl.length != 0);
EXPECT_EQ(results.back().length, 0);
results.pop_back();
EXPECT_THAT(results, ElementsAreArray(std::vector<internal::BitRun>{
{/*length=*/3, /*set=*/false},
{/*length=*/30, /*set=*/true},
{/*length=*/92, /*set=*/false}}));
}
TEST(BitmapWriter, NormalOperation) {
for (const auto fill_byte_int : {0x00, 0xff}) {
const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::BitmapWriter(bitmap, 0, 12);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
// {0b00110110, 0b....1010, ........, ........}
ASSERT_BYTES_EQ(bitmap, {0x36, static_cast<uint8_t>(0x0a | (fill_byte & 0xf0)),
fill_byte, fill_byte});
}
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::BitmapWriter(bitmap, 3, 12);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
// {0b10110..., 0b.1010001, ........, ........}
ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0xb0 | (fill_byte & 0x07)),
static_cast<uint8_t>(0x51 | (fill_byte & 0x80)), fill_byte,
fill_byte});
}
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::BitmapWriter(bitmap, 20, 12);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
// {........, ........, 0b0110...., 0b10100011}
ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte,
static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
}
// 0-length writes
for (int64_t pos = 0; pos < 32; ++pos) {
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::BitmapWriter(bitmap, pos, 0);
WriteVectorToWriter(writer, {});
ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, fill_byte, fill_byte});
}
}
}
TEST(BitmapWriter, DoesNotWriteOutOfBounds) {
uint8_t bitmap[16] = {0};
const int length = 128;
int64_t num_values = 0;
internal::BitmapWriter r1(bitmap, 0, length);
// If this were to write out of bounds, valgrind would tell us
for (int i = 0; i < length; ++i) {
r1.Set();
r1.Clear();
r1.Next();
}
r1.Finish();
num_values = r1.position();
ASSERT_EQ(length, num_values);
internal::BitmapWriter r2(bitmap, 5, length - 5);
for (int i = 0; i < (length - 5); ++i) {
r2.Set();
r2.Clear();
r2.Next();
}
r2.Finish();
num_values = r2.position();
ASSERT_EQ((length - 5), num_values);
}
TEST(FirstTimeBitmapWriter, NormalOperation) {
for (const auto fill_byte_int : {0x00, 0xff}) {
const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 12);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
// {0b00110110, 0b1010, 0, 0}
ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a});
}
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 12);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
// {0b00110110, 0b1010, 0, 0}
ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
}
// Consecutive write chunks
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 6);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 6, 3);
WriteVectorToWriter(writer, {0, 0, 0});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 9, 3);
WriteVectorToWriter(writer, {1, 0, 1});
}
ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a});
}
{
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 0);
WriteVectorToWriter(writer, {});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 6);
WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 10, 3);
WriteVectorToWriter(writer, {0, 0, 0});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 0);
WriteVectorToWriter(writer, {});
}
{
auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 3);
WriteVectorToWriter(writer, {1, 0, 1});
}
ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
}
}
}
std::string BitmapToString(const uint8_t* bitmap, int64_t bit_count) {
return arrow::internal::Bitmap(bitmap, /*offset*/ 0, /*length=*/bit_count).ToString();
}
std::string BitmapToString(const std::vector<uint8_t>& bitmap, int64_t bit_count) {
return BitmapToString(bitmap.data(), bit_count);
}
TEST(FirstTimeBitmapWriter, AppendWordOffsetOverwritesCorrectBitsOnExistingByte) {
auto check_append = [](const std::string& expected_bits, int64_t offset) {
std::vector<uint8_t> valid_bits = {0x00};
constexpr int64_t kBitsAfterAppend = 8;
internal::FirstTimeBitmapWriter writer(valid_bits.data(), offset,
/*length=*/(8 * valid_bits.size()) - offset);
writer.AppendWord(/*word=*/0xFF, /*number_of_bits=*/kBitsAfterAppend - offset);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, kBitsAfterAppend), expected_bits);
};
check_append("11111111", 0);
check_append("01111111", 1);
check_append("00111111", 2);
check_append("00011111", 3);
check_append("00001111", 4);
check_append("00000111", 5);
check_append("00000011", 6);
check_append("00000001", 7);
auto check_with_set = [](const std::string& expected_bits, int64_t offset) {
std::vector<uint8_t> valid_bits = {0x1};
constexpr int64_t kBitsAfterAppend = 8;
internal::FirstTimeBitmapWriter writer(valid_bits.data(), offset,
/*length=*/(8 * valid_bits.size()) - offset);
writer.AppendWord(/*word=*/0xFF, /*number_of_bits=*/kBitsAfterAppend - offset);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, kBitsAfterAppend), expected_bits);
};
// 0ffset zero would not be a valid mask.
check_with_set("11111111", 1);
check_with_set("10111111", 2);
check_with_set("10011111", 3);
check_with_set("10001111", 4);
check_with_set("10000111", 5);
check_with_set("10000011", 6);
check_with_set("10000001", 7);
auto check_with_preceding = [](const std::string& expected_bits, int64_t offset) {
std::vector<uint8_t> valid_bits = {0xFF};
constexpr int64_t kBitsAfterAppend = 8;
internal::FirstTimeBitmapWriter writer(valid_bits.data(), offset,
/*length=*/(8 * valid_bits.size()) - offset);
writer.AppendWord(/*word=*/0xFF, /*number_of_bits=*/kBitsAfterAppend - offset);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, kBitsAfterAppend), expected_bits);
};
check_with_preceding("11111111", 0);
check_with_preceding("11111111", 1);
check_with_preceding("11111111", 2);
check_with_preceding("11111111", 3);
check_with_preceding("11111111", 4);
check_with_preceding("11111111", 5);
check_with_preceding("11111111", 6);
check_with_preceding("11111111", 7);
}
TEST(FirstTimeBitmapWriter, AppendZeroBitsHasNoImpact) {
std::vector<uint8_t> valid_bits(/*count=*/1, 0);
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/1,
/*length=*/valid_bits.size() * 8);
writer.AppendWord(/*word=*/0xFF, /*number_of_bits=*/0);
writer.AppendWord(/*word=*/0xFF, /*number_of_bits=*/0);
writer.AppendWord(/*word=*/0x01, /*number_of_bits=*/1);
writer.Finish();
EXPECT_EQ(valid_bits[0], 0x2);
}
TEST(FirstTimeBitmapWriter, AppendLessThanByte) {
{
std::vector<uint8_t> valid_bits(/*count*/ 8, 0);
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/1,
/*length=*/8);
writer.AppendWord(0xB, 4);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, /*bit_count=*/8), "01101000");
}
{
// Test with all bits initially set.
std::vector<uint8_t> valid_bits(/*count*/ 8, 0xFF);
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/1,
/*length=*/8);
writer.AppendWord(0xB, 4);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, /*bit_count=*/8), "11101000");
}
}
TEST(FirstTimeBitmapWriter, AppendByteThenMore) {
{
std::vector<uint8_t> valid_bits(/*count*/ 8, 0);
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/0,
/*length=*/9);
writer.AppendWord(0xC3, 8);
writer.AppendWord(0x01, 1);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, /*bit_count=*/9), "11000011 1");
}
{
std::vector<uint8_t> valid_bits(/*count*/ 8, 0xFF);
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/0,
/*length=*/9);
writer.AppendWord(0xC3, 8);
writer.AppendWord(0x01, 1);
writer.Finish();
EXPECT_EQ(BitmapToString(valid_bits, /*bit_count=*/9), "11000011 1");
}
}
TEST(FirstTimeBitmapWriter, AppendWordShiftsBitsCorrectly) {
constexpr uint64_t kPattern = 0x9A9A9A9A9A9A9A9A;
auto check_append = [&](const std::string& leading_bits, const std::string& middle_bits,
const std::string& trailing_bits, int64_t offset,
bool preset_buffer_bits = false) {
ASSERT_GE(offset, 8);
std::vector<uint8_t> valid_bits(/*count=*/10, preset_buffer_bits ? 0xFF : 0);
valid_bits[0] = 0x99;
internal::FirstTimeBitmapWriter writer(valid_bits.data(), offset,
/*length=*/(9 * sizeof(kPattern)) - offset);
writer.AppendWord(/*word=*/kPattern, /*number_of_bits=*/64);
writer.Finish();
EXPECT_EQ(valid_bits[0], 0x99); // shouldn't get changed.
EXPECT_EQ(BitmapToString(valid_bits.data() + 1, /*num_bits=*/8), leading_bits);
for (int x = 2; x < 9; x++) {
EXPECT_EQ(BitmapToString(valid_bits.data() + x, /*num_bits=*/8), middle_bits)
<< "x: " << x << " " << offset << " " << BitmapToString(valid_bits.data(), 80);
}
EXPECT_EQ(BitmapToString(valid_bits.data() + 9, /*num_bits=*/8), trailing_bits);
};
// Original Pattern = "01011001"
check_append(/*leading_bits= */ "01011001", /*middle_bits=*/"01011001",
/*trailing_bits=*/"00000000", /*offset=*/8);
check_append("00101100", "10101100", "10000000", 9);
check_append("00010110", "01010110", "01000000", 10);
check_append("00001011", "00101011", "00100000", 11);
check_append("00000101", "10010101", "10010000", 12);
check_append("00000010", "11001010", "11001000", 13);
check_append("00000001", "01100101", "01100100", 14);
check_append("00000000", "10110010", "10110010", 15);
check_append(/*leading_bits= */ "01011001", /*middle_bits=*/"01011001",
/*trailing_bits=*/"11111111", /*offset=*/8, /*preset_buffer_bits=*/true);
check_append("10101100", "10101100", "10000000", 9, true);
check_append("11010110", "01010110", "01000000", 10, true);
check_append("11101011", "00101011", "00100000", 11, true);
check_append("11110101", "10010101", "10010000", 12, true);
check_append("11111010", "11001010", "11001000", 13, true);
check_append("11111101", "01100101", "01100100", 14, true);
check_append("11111110", "10110010", "10110010", 15, true);
}
TEST(TestAppendBitmap, AppendWordOnlyAppropriateBytesWritten) {
std::vector<uint8_t> valid_bits = {0x00, 0x00};
uint64_t bitmap = 0x1FF;
{
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/1,
/*length=*/(8 * valid_bits.size()) - 1);
writer.AppendWord(bitmap, /*number_of_bits*/ 7);
writer.Finish();
EXPECT_THAT(valid_bits, ElementsAreArray(std::vector<uint8_t>{0xFE, 0x00}));
}
{
internal::FirstTimeBitmapWriter writer(valid_bits.data(), /*start_offset=*/1,
/*length=*/(8 * valid_bits.size()) - 1);
writer.AppendWord(bitmap, /*number_of_bits*/ 8);
writer.Finish();
EXPECT_THAT(valid_bits, ElementsAreArray(std::vector<uint8_t>{0xFE, 0x03}));
}
}
// Tests for GenerateBits and GenerateBitsUnrolled
struct GenerateBitsFunctor {
template <class Generator>
void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
return internal::GenerateBits(bitmap, start_offset, length, g);
}
};
struct GenerateBitsUnrolledFunctor {
template <class Generator>
void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
}
};
template <typename T>
class TestGenerateBits : public ::testing::Test {};
typedef ::testing::Types<GenerateBitsFunctor, GenerateBitsUnrolledFunctor>
GenerateBitsTypes;
TYPED_TEST_SUITE(TestGenerateBits, GenerateBitsTypes);
TYPED_TEST(TestGenerateBits, NormalOperation) {
const int kSourceSize = 256;
uint8_t source[kSourceSize];
random_bytes(kSourceSize, 0, source);
const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32};
const int64_t lengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16,
17, 21, 31, 32, 100, 201, 202, 203, 204, 205, 206, 207};
const uint8_t fill_bytes[] = {0x00, 0xff};
for (const int64_t start_offset : start_offsets) {
for (const int64_t length : lengths) {
for (const uint8_t fill_byte : fill_bytes) {
uint8_t bitmap[kSourceSize + 1];
memset(bitmap, fill_byte, kSourceSize + 1);
// First call GenerateBits
{
int64_t ncalled = 0;
internal::BitmapReader reader(source, 0, length);
TypeParam()(bitmap, start_offset, length, [&]() -> bool {
bool b = reader.IsSet();
reader.Next();
++ncalled;
return b;
});
ASSERT_EQ(ncalled, length);
}
// Then check generated contents
{
internal::BitmapReader source_reader(source, 0, length);
internal::BitmapReader result_reader(bitmap, start_offset, length);
for (int64_t i = 0; i < length; ++i) {
ASSERT_EQ(source_reader.IsSet(), result_reader.IsSet())
<< "mismatch at bit #" << i;
source_reader.Next();
result_reader.Next();
}
}
// Check bits preceding generated contents weren't clobbered
{
internal::BitmapReader reader_before(bitmap, 0, start_offset);
for (int64_t i = 0; i < start_offset; ++i) {
ASSERT_EQ(reader_before.IsSet(), fill_byte == 0xff)
<< "mismatch at preceding bit #" << start_offset - i;
}
}
// Check the byte following generated contents wasn't clobbered
auto byte_after = bitmap[BitUtil::CeilDiv(start_offset + length, 8)];
ASSERT_EQ(byte_after, fill_byte);
}
}
}
}
// Tests for VisitBits and VisitBitsUnrolled. Based on the tests for GenerateBits and
// GenerateBitsUnrolled.
struct VisitBitsFunctor {
void operator()(const uint8_t* bitmap, int64_t start_offset, int64_t length,
bool* destination) {
auto writer = [&](const bool& bit_value) { *destination++ = bit_value; };
return internal::VisitBits(bitmap, start_offset, length, writer);
}
};
struct VisitBitsUnrolledFunctor {
void operator()(const uint8_t* bitmap, int64_t start_offset, int64_t length,
bool* destination) {
auto writer = [&](const bool& bit_value) { *destination++ = bit_value; };
return internal::VisitBitsUnrolled(bitmap, start_offset, length, writer);
}
};
/* Define a typed test class with some utility members. */
template <typename T>
class TestVisitBits : public ::testing::Test {
protected:
// The bitmap size that will be used throughout the VisitBits tests.
static const int64_t kBitmapSizeInBytes = 32;
// Typedefs for the source and expected destination types in this test.
using PackedBitmapType = std::array<uint8_t, kBitmapSizeInBytes>;
using UnpackedBitmapType = std::array<bool, 8 * kBitmapSizeInBytes>;
// Helper functions to generate the source bitmap and expected destination
// arrays.
static PackedBitmapType generate_packed_bitmap() {
PackedBitmapType bitmap;
// Assign random values into the source array.
random_bytes(kBitmapSizeInBytes, 0, bitmap.data());
return bitmap;
}
static UnpackedBitmapType generate_unpacked_bitmap(PackedBitmapType bitmap) {
// Use a BitmapReader (tested earlier) to populate the expected
// unpacked bitmap.
UnpackedBitmapType result;
internal::BitmapReader reader(bitmap.data(), 0, 8 * kBitmapSizeInBytes);
for (int64_t index = 0; index < 8 * kBitmapSizeInBytes; ++index) {
result[index] = reader.IsSet();
reader.Next();
}
return result;
}
// A pre-defined packed bitmap for use in test cases.
const PackedBitmapType packed_bitmap_;
// The expected unpacked bitmap that would be generated if each bit in
// the entire source bitmap was correctly unpacked to bytes.
const UnpackedBitmapType expected_unpacked_bitmap_;
// Define a test constructor that populates the packed bitmap and the expected
// unpacked bitmap.
TestVisitBits()
: packed_bitmap_(generate_packed_bitmap()),
expected_unpacked_bitmap_(generate_unpacked_bitmap(packed_bitmap_)) {}
};
using VisitBitsTestTypes = ::testing::Types<VisitBitsFunctor, VisitBitsUnrolledFunctor>;
TYPED_TEST_SUITE(TestVisitBits, VisitBitsTestTypes);
/* Test bit-unpacking when reading less than eight bits from the input */
TYPED_TEST(TestVisitBits, NormalOperation) {
typename TestFixture::UnpackedBitmapType unpacked_bitmap;
const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32};
const int64_t lengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16,
17, 21, 31, 32, 100, 201, 202, 203, 204, 205, 206, 207};
const bool fill_values[] = {false, true};
for (const bool fill_value : fill_values) {
auto is_unmodified = [=](bool value) -> bool { return value == fill_value; };
for (const int64_t start_offset : start_offsets) {
for (const int64_t length : lengths) {
std::string failure_info = std::string("fill value: ") +
std::to_string(fill_value) +
", start offset: " + std::to_string(start_offset) +
", length: " + std::to_string(length);
// Pre-fill the unpacked_bitmap array.
unpacked_bitmap.fill(fill_value);
// Attempt to read bits from the input bitmap into the unpacked_bitmap bitmap.
using VisitBitsFunctor = TypeParam;
VisitBitsFunctor()(this->packed_bitmap_.data(), start_offset, length,
unpacked_bitmap.data() + start_offset);
// Verify that the correct values have been written in the [start_offset,
// start_offset+length) range.
EXPECT_TRUE(std::equal(unpacked_bitmap.begin() + start_offset,
unpacked_bitmap.begin() + start_offset + length,
this->expected_unpacked_bitmap_.begin() + start_offset))
<< "Invalid bytes unpacked when using " << failure_info;
// Verify that the unpacked_bitmap array has not changed before or after
// the [start_offset, start_offset+length) range.
EXPECT_TRUE(std::all_of(unpacked_bitmap.begin(),
unpacked_bitmap.begin() + start_offset, is_unmodified))
<< "Unexpected modification to unpacked_bitmap array before written range "
"when using "
<< failure_info;
EXPECT_TRUE(std::all_of(unpacked_bitmap.begin() + start_offset + length,
unpacked_bitmap.end(), is_unmodified))
<< "Unexpected modification to unpacked_bitmap array after written range "
"when using "
<< failure_info;
}
}
}
}
struct BitmapOperation {
virtual Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const = 0;
virtual Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const = 0;
virtual ~BitmapOperation() = default;
};
struct BitmapAndOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
return BitmapAnd(pool, left, left_offset, right, right_offset, length, out_offset);
}
Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
BitmapAnd(left, left_offset, right, right_offset, length, out_offset, out_buffer);
return Status::OK();
}
};
struct BitmapOrOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
return BitmapOr(pool, left, left_offset, right, right_offset, length, out_offset);
}
Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
BitmapOr(left, left_offset, right, right_offset, length, out_offset, out_buffer);
return Status::OK();
}
};
struct BitmapXorOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
return BitmapXor(pool, left, left_offset, right, right_offset, length, out_offset);
}
Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
BitmapXor(left, left_offset, right, right_offset, length, out_offset, out_buffer);
return Status::OK();
}
};
struct BitmapAndNotOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
return BitmapAndNot(pool, left, left_offset, right, right_offset, length, out_offset);
}
Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
BitmapAndNot(left, left_offset, right, right_offset, length, out_offset, out_buffer);
return Status::OK();
}
};
class BitmapOp : public TestBase {
public:
void TestAligned(const BitmapOperation& op, const std::vector<int>& left_bits,
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536}) {
BitmapFromVector(left_bits, left_offset, &left, &length);
for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
BitmapFromVector(right_bits, right_offset, &right, &length);
for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
}
}
}
void TestUnaligned(const BitmapOperation& op, const std::vector<int>& left_bits,
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536};
for (int64_t left_offset : offset_values) {
BitmapFromVector(left_bits, left_offset, &left, &length);
for (int64_t right_offset : offset_values) {
BitmapFromVector(right_bits, right_offset, &right, &length);
for (int64_t out_offset : offset_values) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
}
}
}
};
TEST_F(BitmapOp, And) {
BitmapAndOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);
}
TEST_F(BitmapOp, Or) {
BitmapOrOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0};
TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);
}
TEST_F(BitmapOp, Xor) {
BitmapXorOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);
}
TEST_F(BitmapOp, AndNot) {
BitmapAndNotOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1};
TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);
}
TEST_F(BitmapOp, RandomXor) {
const int kBitCount = 1000;
uint8_t buffer[kBitCount * 2] = {0};
random_bytes(kBitCount * 2, 0, buffer);
std::vector<int> left(kBitCount);
std::vector<int> right(kBitCount);
std::vector<int> result(kBitCount);
for (int i = 0; i < kBitCount; ++i) {
left[i] = buffer[i] & 1;
right[i] = buffer[i + kBitCount] & 1;
result[i] = left[i] ^ right[i];
}
BitmapXorOp op;
for (int i = 0; i < 3; ++i) {
TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);
left.resize(left.size() * 5 / 11);
right.resize(left.size());
result.resize(left.size());
}
}
static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset,
int64_t length) {
int64_t count = 0;
for (int64_t i = bit_offset; i < bit_offset + length; ++i) {
if (BitUtil::GetBit(data, i)) {
++count;
}
}
return count;
}
TEST(BitUtilTests, TestCountSetBits) {
const int kBufferSize = 1000;
alignas(8) uint8_t buffer[kBufferSize] = {0};
const int buffer_bits = kBufferSize * 8;
random_bytes(kBufferSize, 0, buffer);
// Check start addresses with 64-bit alignment and without
for (const uint8_t* data : {buffer, buffer + 1, buffer + 7}) {
for (const int num_bits : {buffer_bits - 96, buffer_bits - 101, buffer_bits - 127}) {
std::vector<int64_t> offsets = {
0, 12, 16, 32, 37, 63, 64, 128, num_bits - 30, num_bits - 64};
for (const int64_t offset : offsets) {
int64_t result = CountSetBits(data, offset, num_bits - offset);
int64_t expected = SlowCountBits(data, offset, num_bits - offset);
ASSERT_EQ(expected, result);
}
}
}
}
TEST(BitUtilTests, TestSetBitsTo) {
using BitUtil::SetBitsTo;
for (const auto fill_byte_int : {0x00, 0xff}) {
const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
{
// test set within a byte
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
SetBitsTo(bitmap, 2, 2, true);
SetBitsTo(bitmap, 4, 2, false);
ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)});
}
{
// test straddling a single byte boundary
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
SetBitsTo(bitmap, 4, 7, true);
SetBitsTo(bitmap, 11, 7, false);
ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7,
static_cast<uint8_t>(fill_byte & ~0x3)});
}
{
// test byte aligned end
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
SetBitsTo(bitmap, 4, 4, true);
SetBitsTo(bitmap, 8, 8, false);
ASSERT_BYTES_EQ(bitmap,
{static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte});
}
{
// test byte aligned end, multiple bytes
uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
SetBitsTo(bitmap, 0, 24, false);
uint8_t false_byte = static_cast<uint8_t>(0);
ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte});
}
}
}
TEST(BitUtilTests, TestCopyBitmap) {
const int kBufferSize = 1000;
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateBuffer(kBufferSize));
memset(buffer->mutable_data(), 0, kBufferSize);
random_bytes(kBufferSize, 0, buffer->mutable_data());
const uint8_t* src = buffer->data();
std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
for (int64_t num_bits : lengths) {
for (int64_t offset : offsets) {
const int64_t copy_length = num_bits - offset;
std::shared_ptr<Buffer> copy;
ASSERT_OK_AND_ASSIGN(copy,
CopyBitmap(default_memory_pool(), src, offset, copy_length));
for (int64_t i = 0; i < copy_length; ++i) {
ASSERT_EQ(BitUtil::GetBit(src, i + offset), BitUtil::GetBit(copy->data(), i));
}
}
}
}
TEST(BitUtilTests, TestCopyBitmapPreAllocated) {
const int kBufferSize = 1000;
std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateBuffer(kBufferSize));
memset(buffer->mutable_data(), 0, kBufferSize);
random_bytes(kBufferSize, 0, buffer->mutable_data());
const uint8_t* src = buffer->data();
// Add 16 byte padding on both sides
ASSERT_OK_AND_ASSIGN(auto other_buffer, AllocateBuffer(kBufferSize + 32));
memset(other_buffer->mutable_data(), 0, kBufferSize + 32);
random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data());
const uint8_t* other = other_buffer->data();
for (int64_t num_bits : lengths) {
for (int64_t offset : offsets) {
for (int64_t dest_offset : offsets) {
const int64_t copy_length = num_bits - offset;
ASSERT_OK_AND_ASSIGN(auto copy, AllocateBuffer(other_buffer->size()));
memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size());
CopyBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset);
for (int64_t i = 0; i < dest_offset; ++i) {
ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
}
for (int64_t i = 0; i < copy_length; ++i) {
ASSERT_EQ(BitUtil::GetBit(src, i + offset),
BitUtil::GetBit(copy->data(), i + dest_offset));
}
for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) {
ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
}
}
}
}
}
TEST(BitUtilTests, TestCopyAndInvertBitmapPreAllocated) {
const int kBufferSize = 1000;
std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
ASSERT_OK_AND_ASSIGN(auto buffer, AllocateBuffer(kBufferSize));
memset(buffer->mutable_data(), 0, kBufferSize);
random_bytes(kBufferSize, 0, buffer->mutable_data());
const uint8_t* src = buffer->data();
// Add 16 byte padding on both sides
ASSERT_OK_AND_ASSIGN(auto other_buffer, AllocateBuffer(kBufferSize + 32));
memset(other_buffer->mutable_data(), 0, kBufferSize + 32);
random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data());
const uint8_t* other = other_buffer->data();
for (int64_t num_bits : lengths) {
for (int64_t offset : offsets) {
for (int64_t dest_offset : offsets) {
const int64_t copy_length = num_bits - offset;
ASSERT_OK_AND_ASSIGN(auto copy, AllocateBuffer(other_buffer->size()));
memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size());
InvertBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset);
for (int64_t i = 0; i < dest_offset; ++i) {
ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
}
for (int64_t i = 0; i < copy_length; ++i) {
ASSERT_EQ(BitUtil::GetBit(src, i + offset),
!BitUtil::GetBit(copy->data(), i + dest_offset));
}
for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) {
ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
}
}
}
}
}
TEST(BitUtilTests, TestBitmapEquals) {
const int srcBufferSize = 1000;
ASSERT_OK_AND_ASSIGN(auto src_buffer, AllocateBuffer(srcBufferSize));
memset(src_buffer->mutable_data(), 0, srcBufferSize);
random_bytes(srcBufferSize, 0, src_buffer->mutable_data());
const uint8_t* src = src_buffer->data();
std::vector<int64_t> lengths = {srcBufferSize * 8 - 4, srcBufferSize * 8};
std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
const auto dstBufferSize = srcBufferSize + BitUtil::BytesForBits(*std::max_element(
offsets.cbegin(), offsets.cend()));
ASSERT_OK_AND_ASSIGN(auto dst_buffer, AllocateBuffer(dstBufferSize))
uint8_t* dst = dst_buffer->mutable_data();
for (int64_t num_bits : lengths) {
for (int64_t offset_src : offsets) {
for (int64_t offset_dst : offsets) {
const auto bit_length = num_bits - offset_src;
internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst);
ASSERT_TRUE(internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length));
// test negative cases by flip some bit at head and tail
for (int64_t offset_flip : offsets) {
const auto offset_flip_head = offset_dst + offset_flip;
dst[offset_flip_head / 8] ^= 1 << (offset_flip_head % 8);
ASSERT_FALSE(
internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length));
dst[offset_flip_head / 8] ^= 1 << (offset_flip_head % 8);
const auto offset_flip_tail = offset_dst + bit_length - offset_flip - 1;
dst[offset_flip_tail / 8] ^= 1 << (offset_flip_tail % 8);
ASSERT_FALSE(
internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length));
dst[offset_flip_tail / 8] ^= 1 << (offset_flip_tail % 8);
}
}
}
}
}
TEST(BitUtil, CeilDiv) {
EXPECT_EQ(BitUtil::CeilDiv(0, 1), 0);
EXPECT_EQ(BitUtil::CeilDiv(1, 1), 1);
EXPECT_EQ(BitUtil::CeilDiv(1, 2), 1);
EXPECT_EQ(BitUtil::CeilDiv(0, 8), 0);
EXPECT_EQ(BitUtil::CeilDiv(1, 8), 1);
EXPECT_EQ(BitUtil::CeilDiv(7, 8), 1);
EXPECT_EQ(BitUtil::CeilDiv(8, 8), 1);
EXPECT_EQ(BitUtil::CeilDiv(9, 8), 2);
EXPECT_EQ(BitUtil::CeilDiv(9, 9), 1);
EXPECT_EQ(BitUtil::CeilDiv(10000000000, 10), 1000000000);
EXPECT_EQ(BitUtil::CeilDiv(10, 10000000000), 1);
EXPECT_EQ(BitUtil::CeilDiv(100000000000, 10000000000), 10);
// test overflow
int64_t value = std::numeric_limits<int64_t>::max() - 1;
int64_t divisor = std::numeric_limits<int64_t>::max();
EXPECT_EQ(BitUtil::CeilDiv(value, divisor), 1);
value = std::numeric_limits<int64_t>::max();
EXPECT_EQ(BitUtil::CeilDiv(value, divisor), 1);
}
TEST(BitUtil, RoundUp) {
EXPECT_EQ(BitUtil::RoundUp(0, 1), 0);
EXPECT_EQ(BitUtil::RoundUp(1, 1), 1);
EXPECT_EQ(BitUtil::RoundUp(1, 2), 2);
EXPECT_EQ(BitUtil::RoundUp(6, 2), 6);
EXPECT_EQ(BitUtil::RoundUp(0, 3), 0);
EXPECT_EQ(BitUtil::RoundUp(7, 3), 9);
EXPECT_EQ(BitUtil::RoundUp(9, 9), 9);
EXPECT_EQ(BitUtil::RoundUp(10000000001, 10), 10000000010);
EXPECT_EQ(BitUtil::RoundUp(10, 10000000000), 10000000000);
EXPECT_EQ(BitUtil::RoundUp(100000000000, 10000000000), 100000000000);
// test overflow
int64_t value = std::numeric_limits<int64_t>::max() - 1;
int64_t divisor = std::numeric_limits<int64_t>::max();
EXPECT_EQ(BitUtil::RoundUp(value, divisor), divisor);
value = std::numeric_limits<int64_t>::max();
EXPECT_EQ(BitUtil::RoundUp(value, divisor), divisor);
}
TEST(BitUtil, RoundDown) {
EXPECT_EQ(BitUtil::RoundDown(0, 1), 0);
EXPECT_EQ(BitUtil::RoundDown(1, 1), 1);
EXPECT_EQ(BitUtil::RoundDown(1, 2), 0);
EXPECT_EQ(BitUtil::RoundDown(6, 2), 6);
EXPECT_EQ(BitUtil::RoundDown(5, 7), 0);
EXPECT_EQ(BitUtil::RoundDown(10, 7), 7);
EXPECT_EQ(BitUtil::RoundDown(7, 3), 6);
EXPECT_EQ(BitUtil::RoundDown(9, 9), 9);
EXPECT_EQ(BitUtil::RoundDown(10000000001, 10), 10000000000);
EXPECT_EQ(BitUtil::RoundDown(10, 10000000000), 0);
EXPECT_EQ(BitUtil::RoundDown(100000000000, 10000000000), 100000000000);
for (int i = 0; i < 100; i++) {
for (int j = 1; j < 100; j++) {
EXPECT_EQ(BitUtil::RoundDown(i, j), i - (i % j));
}
}
}
TEST(BitUtil, CoveringBytes) {
EXPECT_EQ(BitUtil::CoveringBytes(0, 8), 1);
EXPECT_EQ(BitUtil::CoveringBytes(0, 9), 2);
EXPECT_EQ(BitUtil::CoveringBytes(1, 7), 1);
EXPECT_EQ(BitUtil::CoveringBytes(1, 8), 2);
EXPECT_EQ(BitUtil::CoveringBytes(2, 19), 3);
EXPECT_EQ(BitUtil::CoveringBytes(7, 18), 4);
}
TEST(BitUtil, TrailingBits) {
EXPECT_EQ(BitUtil::TrailingBits(0xFF, 0), 0);
EXPECT_EQ(BitUtil::TrailingBits(0xFF, 1), 1);
EXPECT_EQ(BitUtil::TrailingBits(0xFF, 64), 0xFF);
EXPECT_EQ(BitUtil::TrailingBits(0xFF, 100), 0xFF);
EXPECT_EQ(BitUtil::TrailingBits(0, 1), 0);
EXPECT_EQ(BitUtil::TrailingBits(0, 64), 0);
EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 0), 0);
EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 63), 0);
EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 64), 1LL << 63);
}
TEST(BitUtil, ByteSwap) {
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0x11223344)), 0x44332211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0x11223344)), 0x44332211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0x1122334455667788)),
0x8877665544332211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0x1122334455667788)),
0x8877665544332211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0x1122)), 0x2211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0x1122)), 0x2211);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int8_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<int8_t>(0x11)), 0x11);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint8_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint8_t>(0x11)), 0x11);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<float>(0)), 0);
uint32_t srci32 = 0xaabbccdd, expectedi32 = 0xddccbbaa;
EXPECT_EQ(BitUtil::ByteSwap(*reinterpret_cast<float*>(&srci32)),
*reinterpret_cast<float*>(&expectedi32));
uint64_t srci64 = 0xaabb11223344ccdd, expectedi64 = 0xddcc44332211bbaa;
EXPECT_EQ(BitUtil::ByteSwap(*reinterpret_cast<double*>(&srci64)),
*reinterpret_cast<double*>(&expectedi64));
}
TEST(BitUtil, Log2) {
EXPECT_EQ(BitUtil::Log2(1), 0);
EXPECT_EQ(BitUtil::Log2(2), 1);
EXPECT_EQ(BitUtil::Log2(3), 2);
EXPECT_EQ(BitUtil::Log2(4), 2);
EXPECT_EQ(BitUtil::Log2(5), 3);
EXPECT_EQ(BitUtil::Log2(8), 3);
EXPECT_EQ(BitUtil::Log2(9), 4);
EXPECT_EQ(BitUtil::Log2(INT_MAX), 31);
EXPECT_EQ(BitUtil::Log2(UINT_MAX), 32);
EXPECT_EQ(BitUtil::Log2(ULLONG_MAX), 64);
}
TEST(BitUtil, NumRequiredBits) {
EXPECT_EQ(BitUtil::NumRequiredBits(0), 0);
EXPECT_EQ(BitUtil::NumRequiredBits(1), 1);
EXPECT_EQ(BitUtil::NumRequiredBits(2), 2);
EXPECT_EQ(BitUtil::NumRequiredBits(3), 2);
EXPECT_EQ(BitUtil::NumRequiredBits(4), 3);
EXPECT_EQ(BitUtil::NumRequiredBits(5), 3);
EXPECT_EQ(BitUtil::NumRequiredBits(7), 3);
EXPECT_EQ(BitUtil::NumRequiredBits(8), 4);
EXPECT_EQ(BitUtil::NumRequiredBits(9), 4);
EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX - 1), 32);
EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX), 32);
EXPECT_EQ(BitUtil::NumRequiredBits(static_cast<uint64_t>(UINT_MAX) + 1), 33);
EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2), 63);
EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2 + 1), 64);
EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX - 1), 64);
EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX), 64);
}
#define U32(x) static_cast<uint32_t>(x)
#define U64(x) static_cast<uint64_t>(x)
#define S64(x) static_cast<int64_t>(x)
TEST(BitUtil, CountLeadingZeros) {
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(0)), 32);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(1)), 31);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(2)), 30);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(3)), 30);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(4)), 29);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(7)), 29);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(8)), 28);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2)), 1);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2 + 1)), 0);
EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX)), 0);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(0)), 64);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(1)), 63);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(2)), 62);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(3)), 62);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(4)), 61);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(7)), 61);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(8)), 60);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX)), 32);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX) + 1), 31);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2)), 1);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2 + 1)), 0);
EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX)), 0);
}
TEST(BitUtil, CountTrailingZeros) {
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(0)), 32);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 31), 31);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 30), 30);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 29), 29);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 28), 28);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(8)), 3);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(4)), 2);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(2)), 1);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1)), 0);
EXPECT_EQ(BitUtil::CountTrailingZeros(U32(ULONG_MAX)), 0);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(0)), 64);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 63), 63);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 62), 62);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 61), 61);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 60), 60);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(8)), 3);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(4)), 2);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(2)), 1);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1)), 0);
EXPECT_EQ(BitUtil::CountTrailingZeros(U64(ULLONG_MAX)), 0);
}
TEST(BitUtil, RoundUpToPowerOf2) {
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(S64(7), 8), 8);
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(S64(8), 8), 8);
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(S64(9), 8), 16);
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(U64(7), 8), 8);
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(U64(8), 8), 8);
EXPECT_EQ(BitUtil::RoundUpToPowerOf2(U64(9), 8), 16);
}
#undef U32
#undef U64
#undef S64
static void TestZigZag(int32_t v) {
uint8_t buffer[BitUtil::BitReader::kMaxVlqByteLength] = {};
BitUtil::BitWriter writer(buffer, sizeof(buffer));
BitUtil::BitReader reader(buffer, sizeof(buffer));
writer.PutZigZagVlqInt(v);
int32_t result;
EXPECT_TRUE(reader.GetZigZagVlqInt(&result));
EXPECT_EQ(v, result);
}
TEST(BitStreamUtil, ZigZag) {
TestZigZag(0);
TestZigZag(1);
TestZigZag(1234);
TestZigZag(-1);
TestZigZag(-1234);
TestZigZag(std::numeric_limits<int32_t>::max());
TestZigZag(-std::numeric_limits<int32_t>::max());
}
TEST(BitUtil, RoundTripLittleEndianTest) {
uint64_t value = 0xFF;
#if ARROW_LITTLE_ENDIAN
uint64_t expected = value;
#else
uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
#endif
uint64_t little_endian_result = BitUtil::ToLittleEndian(value);
ASSERT_EQ(expected, little_endian_result);
uint64_t from_little_endian = BitUtil::FromLittleEndian(little_endian_result);
ASSERT_EQ(value, from_little_endian);
}
TEST(BitUtil, RoundTripBigEndianTest) {
uint64_t value = 0xFF;
#if ARROW_LITTLE_ENDIAN
uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
#else
uint64_t expected = value;
#endif
uint64_t big_endian_result = BitUtil::ToBigEndian(value);
ASSERT_EQ(expected, big_endian_result);
uint64_t from_big_endian = BitUtil::FromBigEndian(big_endian_result);
ASSERT_EQ(value, from_big_endian);
}
TEST(BitUtil, BitsetStack) {
BitsetStack stack;
ASSERT_EQ(stack.TopSize(), 0);
stack.Push(3, false);
ASSERT_EQ(stack.TopSize(), 3);
stack[1] = true;
stack.Push(5, true);
ASSERT_EQ(stack.TopSize(), 5);
stack[1] = false;
for (int i = 0; i != 5; ++i) {
ASSERT_EQ(stack[i], i != 1);
}
stack.Pop();
ASSERT_EQ(stack.TopSize(), 3);
for (int i = 0; i != 3; ++i) {
ASSERT_EQ(stack[i], i == 1);
}
stack.Pop();
ASSERT_EQ(stack.TopSize(), 0);
}
// test the basic assumption of word level Bitmap::Visit
TEST(Bitmap, ShiftingWordsOptimization) {
// single word
{
uint64_t word;
auto bytes = reinterpret_cast<uint8_t*>(&word);
constexpr size_t kBitWidth = sizeof(word) * 8;
for (int seed = 0; seed < 64; ++seed) {
random_bytes(sizeof(word), seed, bytes);
uint64_t native_word = BitUtil::FromLittleEndian(word);
// bits are accessible through simple bit shifting of the word
for (size_t i = 0; i < kBitWidth; ++i) {
ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_word >> i) & 1));
}
// bit offset can therefore be accommodated by shifting the word
for (size_t offset = 0; offset < (kBitWidth * 3) / 4; ++offset) {
uint64_t shifted_word = arrow::BitUtil::ToLittleEndian(native_word >> offset);
auto shifted_bytes = reinterpret_cast<uint8_t*>(&shifted_word);
ASSERT_TRUE(
internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset));
}
}
}
// two words
{
uint64_t words[2];
auto bytes = reinterpret_cast<uint8_t*>(words);
constexpr size_t kBitWidth = sizeof(words[0]) * 8;
for (int seed = 0; seed < 64; ++seed) {
random_bytes(sizeof(words), seed, bytes);
uint64_t native_words0 = BitUtil::FromLittleEndian(words[0]);
uint64_t native_words1 = BitUtil::FromLittleEndian(words[1]);
// bits are accessible through simple bit shifting of a word
for (size_t i = 0; i < kBitWidth; ++i) {
ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_words0 >> i) & 1));
}
for (size_t i = 0; i < kBitWidth; ++i) {
ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((native_words1 >> i) & 1));
}
// bit offset can therefore be accommodated by shifting the word
for (size_t offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) {
uint64_t shifted_words[2];
shifted_words[0] = arrow::BitUtil::ToLittleEndian(
native_words0 >> offset | (native_words1 << (kBitWidth - offset)));
shifted_words[1] = arrow::BitUtil::ToLittleEndian(native_words1 >> offset);
auto shifted_bytes = reinterpret_cast<uint8_t*>(shifted_words);
// from offset to unshifted word boundary
ASSERT_TRUE(
internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset));
// from unshifted word boundary to shifted word boundary
ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth, shifted_bytes,
kBitWidth - offset, offset));
// from shifted word boundary to end
ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth + offset, shifted_bytes,
kBitWidth, kBitWidth - offset));
}
}
}
}
namespace internal {
static Bitmap Copy(const Bitmap& bitmap, std::shared_ptr<Buffer> storage) {
int64_t i = 0;
Bitmap bitmaps[] = {bitmap};
auto min_offset = Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 1> uint64s) {
reinterpret_cast<uint64_t*>(storage->mutable_data())[i++] = uint64s[0];
});
return Bitmap(std::move(storage), min_offset, bitmap.length());
}
// reconstruct a bitmap from a word-wise visit
TEST(Bitmap, VisitWords) {
constexpr int64_t nbytes = 1 << 10;
std::shared_ptr<Buffer> buffer, actual_buffer;
for (std::shared_ptr<Buffer>* b : {&buffer, &actual_buffer}) {
ASSERT_OK_AND_ASSIGN(*b, AllocateBuffer(nbytes));
memset((*b)->mutable_data(), 0, nbytes);
}
random_bytes(nbytes, 0, buffer->mutable_data());
constexpr int64_t kBitWidth = 8 * sizeof(uint64_t);
for (int64_t offset : {0, 1, 2, 5, 17}) {
for (int64_t num_bits :
{int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1,
nbytes * 8 - offset, nbytes * 6, nbytes * 4}) {
Bitmap actual = Copy({buffer, offset, num_bits}, actual_buffer);
ASSERT_EQ(actual, Bitmap(buffer->data(), offset, num_bits))
<< "offset:" << offset << " bits:" << num_bits << std::endl
<< Bitmap(actual_buffer, 0, num_bits).Diff({buffer, offset, num_bits});
}
}
}
#ifndef ARROW_VALGRIND
// This test reads uninitialized memory
TEST(Bitmap, VisitPartialWords) {
uint64_t words[2];
constexpr auto nbytes = sizeof(words);
constexpr auto nbits = nbytes * 8;
auto buffer = Buffer::Wrap(words, 2);
Bitmap bitmap(buffer, 0, nbits);
ASSERT_OK_AND_ASSIGN(std::shared_ptr<Buffer> storage, AllocateBuffer(nbytes));
// words partially outside the buffer are not accessible, but they are loaded bitwise
auto first_byte_was_missing = Bitmap(SliceBuffer(buffer, 1), 0, nbits - 8);
ASSERT_EQ(Copy(first_byte_was_missing, storage), bitmap.Slice(8));
auto last_byte_was_missing = Bitmap(SliceBuffer(buffer, 0, nbytes - 1), 0, nbits - 8);
ASSERT_EQ(Copy(last_byte_was_missing, storage), bitmap.Slice(0, nbits - 8));
}
#endif // ARROW_VALGRIND
TEST(Bitmap, ToString) {
uint8_t bitmap[8] = {0xAC, 0xCA, 0, 0, 0, 0, 0, 0};
EXPECT_EQ(Bitmap(bitmap, /*bit_offset*/ 0, /*length=*/34).ToString(),
"00110101 01010011 00000000 00000000 00");
EXPECT_EQ(Bitmap(bitmap, /*bit_offset*/ 0, /*length=*/16).ToString(),
"00110101 01010011");
EXPECT_EQ(Bitmap(bitmap, /*bit_offset*/ 0, /*length=*/11).ToString(), "00110101 010");
EXPECT_EQ(Bitmap(bitmap, /*bit_offset*/ 3, /*length=*/8).ToString(), "10101010");
}
// compute bitwise AND of bitmaps using word-wise visit
TEST(Bitmap, VisitWordsAnd) {
constexpr int64_t nbytes = 1 << 10;
std::shared_ptr<Buffer> buffer, actual_buffer, expected_buffer;
for (std::shared_ptr<Buffer>* b : {&buffer, &actual_buffer, &expected_buffer}) {
ASSERT_OK_AND_ASSIGN(*b, AllocateBuffer(nbytes));
memset((*b)->mutable_data(), 0, nbytes);
}
random_bytes(nbytes, 0, buffer->mutable_data());
constexpr int64_t kBitWidth = 8 * sizeof(uint64_t);
for (int64_t left_offset :
{0, 1, 2, 5, 17, int(kBitWidth - 1), int(kBitWidth + 1), int(kBitWidth + 17)}) {
for (int64_t right_offset = 0; right_offset < left_offset; ++right_offset) {
for (int64_t num_bits :
{int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1,
2 * kBitWidth - 1, 2 * kBitWidth, 2 * kBitWidth + 1, nbytes * 8 - left_offset,
3 * kBitWidth - 1, 3 * kBitWidth, 3 * kBitWidth + 1, nbytes * 6,
nbytes * 4}) {
Bitmap bitmaps[] = {{buffer, left_offset, num_bits},
{buffer, right_offset, num_bits}};
int64_t i = 0;
auto min_offset =
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 2> uint64s) {
reinterpret_cast<uint64_t*>(actual_buffer->mutable_data())[i++] =
uint64s[0] & uint64s[1];
});
BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(),
bitmaps[1].buffer()->data(), bitmaps[1].offset(), bitmaps[0].length(),
0, expected_buffer->mutable_data());
ASSERT_TRUE(BitmapEquals(actual_buffer->data(), min_offset,
expected_buffer->data(), 0, num_bits))
<< "left_offset:" << left_offset << " bits:" << num_bits
<< " right_offset:" << right_offset << std::endl
<< Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits});
}
}
}
}
} // namespace internal
} // namespace arrow