blob: d4f02f37a416eeeac850e83040dda0e0138cf3a2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <cstring>
#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
namespace arrow {
namespace internal {
class BitmapWriter {
// A sequential bitwise writer that preserves surrounding bit values.
public:
BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap), position_(0), length_(length) {
byte_offset_ = start_offset / 8;
bit_mask_ = BitUtil::kBitmask[start_offset % 8];
if (length > 0) {
current_byte_ = bitmap[byte_offset_];
} else {
current_byte_ = 0;
}
}
void Set() { current_byte_ |= bit_mask_; }
void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
void Next() {
bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
++position_;
if (bit_mask_ == 0) {
// Finished this byte, need advancing
bit_mask_ = 0x01;
bitmap_[byte_offset_++] = current_byte_;
if (ARROW_PREDICT_TRUE(position_ < length_)) {
current_byte_ = bitmap_[byte_offset_];
}
}
}
void Finish() {
// Store current byte if we didn't went past bitmap storage
if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
bitmap_[byte_offset_] = current_byte_;
}
}
int64_t position() const { return position_; }
private:
uint8_t* bitmap_;
int64_t position_;
int64_t length_;
uint8_t current_byte_;
uint8_t bit_mask_;
int64_t byte_offset_;
};
class FirstTimeBitmapWriter {
// Like BitmapWriter, but any bit values *following* the bits written
// might be clobbered. It is hence faster than BitmapWriter, and can
// also avoid false positives with Valgrind.
public:
FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap), position_(0), length_(length) {
current_byte_ = 0;
byte_offset_ = start_offset / 8;
bit_mask_ = BitUtil::kBitmask[start_offset % 8];
if (length > 0) {
current_byte_ = bitmap[byte_offset_] & BitUtil::kPrecedingBitmask[start_offset % 8];
} else {
current_byte_ = 0;
}
}
/// Appends number_of_bits from word to valid_bits and valid_bits_offset.
///
/// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
/// to be unset (i.e. 0).
/// \param[in] number_of_bits The number of bits to append from word.
void AppendWord(uint64_t word, int64_t number_of_bits) {
if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
return;
}
// Location that the first byte needs to be written to.
uint8_t* append_position = bitmap_ + byte_offset_;
// Update state variables except for current_byte_ here.
position_ += number_of_bits;
int64_t bit_offset = BitUtil::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
bit_mask_ = BitUtil::kBitmask[(bit_offset + number_of_bits) % 8];
byte_offset_ += (bit_offset + number_of_bits) / 8;
if (bit_offset != 0) {
// We are in the middle of the byte. This code updates the byte and shifts
// bits appropriately within word so it can be memcpy'd below.
int64_t bits_to_carry = 8 - bit_offset;
// Carry over bits from word to current_byte_. We assume any extra bits in word
// unset so no additional accounting is needed for when number_of_bits <
// bits_to_carry.
current_byte_ |= (word & BitUtil::kPrecedingBitmask[bits_to_carry]) << bit_offset;
// Check if everything is transfered into current_byte_.
if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
return;
}
*append_position = current_byte_;
append_position++;
// Move the carry bits off of word.
word = word >> bits_to_carry;
number_of_bits -= bits_to_carry;
}
word = BitUtil::ToLittleEndian(word);
int64_t bytes_for_word = ::arrow::BitUtil::BytesForBits(number_of_bits);
std::memcpy(append_position, &word, bytes_for_word);
// At this point, the previous current_byte_ has been written to bitmap_.
// The new current_byte_ is either the last relevant byte in 'word'
// or cleared if the new position is byte aligned (i.e. a fresh byte).
if (bit_mask_ == 0x1) {
current_byte_ = 0;
} else {
current_byte_ = *(append_position + bytes_for_word - 1);
}
}
void Set() { current_byte_ |= bit_mask_; }
void Clear() {}
void Next() {
bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
++position_;
if (bit_mask_ == 0) {
// Finished this byte, need advancing
bit_mask_ = 0x01;
bitmap_[byte_offset_++] = current_byte_;
current_byte_ = 0;
}
}
void Finish() {
// Store current byte if we didn't went go bitmap storage
if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
bitmap_[byte_offset_] = current_byte_;
}
}
int64_t position() const { return position_; }
private:
uint8_t* bitmap_;
int64_t position_;
int64_t length_;
uint8_t current_byte_;
uint8_t bit_mask_;
int64_t byte_offset_;
};
} // namespace internal
} // namespace arrow