| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // This file is copied from |
| // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/bit-stream-utils.inline.h |
| // and modified by Doris |
| |
| #pragma once |
| |
| #include <algorithm> |
| |
| #include "glog/logging.h" |
| #include "util/alignment.h" |
| #include "util/bit_packing.inline.h" |
| #include "util/bit_stream_utils.h" |
| #include "util/bit_util.h" |
| |
| using doris::BitUtil; |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| inline void BitWriter::PutValue(uint64_t v, int num_bits) { |
| DCHECK_LE(num_bits, 64); |
| // Truncate the higher-order bits. This is necessary to |
| // support signed values. |
| v &= ~0ULL >> (64 - num_bits); |
| |
| buffered_values_ |= v << bit_offset_; |
| bit_offset_ += num_bits; |
| |
| if (bit_offset_ >= 64) [[unlikely]] { |
| // Flush buffered_values_ and write out bits of v that did not fit |
| buffer_->reserve(ALIGN_UP(byte_offset_ + 8, 8)); |
| buffer_->resize(byte_offset_ + 8); |
| DCHECK_LE(byte_offset_ + 8, buffer_->capacity()); |
| memcpy(buffer_->data() + byte_offset_, &buffered_values_, 8); |
| buffered_values_ = 0; |
| byte_offset_ += 8; |
| bit_offset_ -= 64; |
| buffered_values_ = BitUtil::ShiftRightZeroOnOverflow(v, (num_bits - bit_offset_)); |
| } |
| DCHECK_LT(bit_offset_, 64); |
| } |
| |
| inline void BitWriter::Flush(bool align) { |
| int num_bytes = BitUtil::Ceil(bit_offset_, 8); |
| buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8)); |
| buffer_->resize(byte_offset_ + num_bytes); |
| DCHECK_LE(byte_offset_ + num_bytes, buffer_->capacity()); |
| memcpy(buffer_->data() + byte_offset_, &buffered_values_, num_bytes); |
| |
| if (align) { |
| buffered_values_ = 0; |
| byte_offset_ += num_bytes; |
| bit_offset_ = 0; |
| } |
| } |
| |
| inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) { |
| Flush(/* align */ true); |
| buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8)); |
| buffer_->resize(byte_offset_ + num_bytes); |
| uint8_t* ptr = buffer_->data() + byte_offset_; |
| byte_offset_ += num_bytes; |
| DCHECK_LE(byte_offset_, buffer_->capacity()); |
| return ptr; |
| } |
| |
| template <typename T> |
| void BitWriter::PutAligned(T val, int num_bytes) { |
| DCHECK_LE(num_bytes, sizeof(T)); |
| uint8_t* ptr = GetNextBytePtr(num_bytes); |
| memcpy(ptr, &val, num_bytes); |
| } |
| |
| inline void BitWriter::PutVlqInt(int32_t v) { |
| while ((v & 0xFFFFFF80) != 0L) { |
| PutAligned<uint8_t>((v & 0x7F) | 0x80, 1); |
| v >>= 7; |
| } |
| PutAligned<uint8_t>(v & 0x7F, 1); |
| } |
| |
| inline BitReader::BitReader(const uint8_t* buffer, int buffer_len) |
| : buffer_(buffer), |
| max_bytes_(buffer_len), |
| buffered_values_(0), |
| byte_offset_(0), |
| bit_offset_(0) { |
| int num_bytes = std::min(8, max_bytes_); |
| memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); |
| } |
| |
| inline void BitReader::BufferValues() { |
| int bytes_remaining = max_bytes_ - byte_offset_; |
| if (bytes_remaining >= 8) [[likely]] { |
| memcpy(&buffered_values_, buffer_ + byte_offset_, 8); |
| } else { |
| memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); |
| } |
| } |
| |
| template <typename T> |
| bool BitReader::GetValue(int num_bits, T* v) { |
| DCHECK_LE(num_bits, 64); |
| DCHECK_LE(num_bits, sizeof(T) * 8); |
| |
| if (byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8) [[unlikely]] { |
| return false; |
| } |
| |
| *v = static_cast<T>(BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> |
| bit_offset_); |
| |
| bit_offset_ += num_bits; |
| if (bit_offset_ >= 64) { |
| byte_offset_ += 8; |
| bit_offset_ -= 64; |
| BufferValues(); |
| // Read bits of v that crossed into new buffered_values_ |
| *v |= BitUtil::ShiftLeftZeroOnOverflow(BitUtil::TrailingBits(buffered_values_, bit_offset_), |
| (num_bits - bit_offset_)); |
| } |
| DCHECK_LE(bit_offset_, 64); |
| return true; |
| } |
| |
| inline void BitReader::Rewind(int num_bits) { |
| bit_offset_ -= num_bits; |
| if (bit_offset_ >= 0) { |
| return; |
| } |
| while (bit_offset_ < 0) { |
| int seek_back = std::min(byte_offset_, 8); |
| byte_offset_ -= seek_back; |
| bit_offset_ += seek_back * 8; |
| } |
| // This should only be executed *if* rewinding by 'num_bits' |
| // make the existing buffered_values_ invalid |
| DCHECK_GE(byte_offset_, 0); // Check for underflow |
| memcpy(&buffered_values_, buffer_ + byte_offset_, 8); |
| } |
| |
| inline bool BitReader::Advance(int64_t num_bits) { |
| int64_t bits_required = bit_offset_ + num_bits; |
| int64_t bytes_required = (bits_required >> 3) + ((bits_required & 7) != 0); |
| if (bytes_required > max_bytes_ - byte_offset_) { |
| return false; |
| } |
| byte_offset_ += static_cast<int>(bits_required >> 3); |
| bit_offset_ = static_cast<int>(bits_required & 7); |
| BufferValues(); |
| return true; |
| } |
| |
| inline void BitReader::SeekToBit(unsigned int stream_position) { |
| DCHECK_LE(stream_position, max_bytes_ * 8); |
| |
| int delta = static_cast<int>(stream_position) - position(); |
| if (delta == 0) { |
| return; |
| } else if (delta < 0) { |
| Rewind(position() - stream_position); |
| } else { |
| bit_offset_ += delta; |
| while (bit_offset_ >= 64) { |
| byte_offset_ += 8; |
| bit_offset_ -= 64; |
| if (bit_offset_ < 64) { |
| // This should only be executed if seeking to |
| // 'stream_position' makes the existing buffered_values_ |
| // invalid. |
| BufferValues(); |
| } |
| } |
| } |
| } |
| |
| template <typename T> |
| bool BitReader::GetAligned(int num_bytes, T* v) { |
| DCHECK_LE(num_bytes, sizeof(T)); |
| int bytes_read = BitUtil::Ceil(bit_offset_, 8); |
| if (byte_offset_ + bytes_read + num_bytes > max_bytes_) [[unlikely]] { |
| return false; |
| } |
| |
| // Advance byte_offset to next unread byte and read num_bytes |
| byte_offset_ += bytes_read; |
| memcpy(v, buffer_ + byte_offset_, num_bytes); |
| byte_offset_ += num_bytes; |
| |
| // Reset buffered_values_ |
| bit_offset_ = 0; |
| int bytes_remaining = max_bytes_ - byte_offset_; |
| if (bytes_remaining >= 8) [[likely]] { |
| memcpy(&buffered_values_, buffer_ + byte_offset_, 8); |
| } else { |
| memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); |
| } |
| return true; |
| } |
| |
| inline bool BitReader::GetVlqInt(uint32_t* v) { |
| uint32_t tmp = 0; |
| for (int num_bytes = 0; num_bytes < MAX_VLQ_BYTE_LEN; num_bytes++) { |
| uint8_t byte = 0; |
| if (!GetAligned<uint8_t>(1, &byte)) return false; |
| tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * num_bytes); |
| if ((byte & 0x80) == 0) { |
| *v = tmp; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| inline bool BitReader::GetZigZagVlqInt(int32_t* v) { |
| uint32_t u; |
| if (!GetVlqInt(&u)) { |
| return false; |
| } |
| u = (u >> 1) ^ (~(u & 1) + 1); |
| // copy uint32_t to int32_t |
| std::memcpy(v, &u, sizeof(uint32_t)); |
| return true; |
| } |
| |
| inline bool BitReader::GetVlqInt(uint64_t* v) { |
| uint64_t tmp = 0; |
| for (int num_bytes = 0; num_bytes < MAX_VLQ_BYTE_LEN_FOR_INT64; num_bytes++) { |
| uint8_t byte = 0; |
| if (!GetAligned<uint8_t>(1, &byte)) return false; |
| tmp |= static_cast<uint64_t>(byte & 0x7F) << (7 * num_bytes); |
| if ((byte & 0x80) == 0) { |
| *v = tmp; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| inline bool BitReader::GetZigZagVlqInt(int64_t* v) { |
| uint64_t u; |
| if (!GetVlqInt(&u)) { |
| return false; |
| } |
| u = (u >> 1) ^ (~(u & 1) + 1); |
| std::memcpy(v, &u, sizeof(uint64_t)); |
| return true; |
| } |
| |
| template <typename T> |
| int BatchedBitReader::UnpackBatch(int bit_width, int num_values, T* v) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_LE(bit_width, sizeof(T) * 8); |
| DCHECK_GE(num_values, 0); |
| |
| int64_t num_read; |
| std::tie(buffer_pos_, num_read) = |
| BitPacking::UnpackValues(bit_width, buffer_pos_, bytes_left(), num_values, v); |
| DCHECK_LE(buffer_pos_, buffer_end_); |
| DCHECK_LE(num_read, num_values); |
| return static_cast<int>(num_read); |
| } |
| |
| inline bool BatchedBitReader::SkipBatch(int bit_width, int num_values_to_skip) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_GE(num_values_to_skip, 0); |
| |
| int skip_bytes = BitUtil::RoundUpNumBytes(bit_width * num_values_to_skip); |
| if (skip_bytes > buffer_end_ - buffer_pos_) { |
| return false; |
| } |
| buffer_pos_ += skip_bytes; |
| return true; |
| } |
| |
| template <typename T> |
| int BatchedBitReader::UnpackAndDecodeBatch(int bit_width, T* dict, int64_t dict_len, int num_values, |
| T* v, int64_t stride) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_GE(num_values, 0); |
| |
| const uint8_t* new_buffer_pos; |
| int64_t num_read; |
| bool decode_error = false; |
| std::tie(new_buffer_pos, num_read) = |
| BitPacking::UnpackAndDecodeValues(bit_width, buffer_pos_, bytes_left(), dict, dict_len, |
| num_values, v, stride, &decode_error); |
| if (UNLIKELY(decode_error)) return -1; |
| buffer_pos_ = new_buffer_pos; |
| DCHECK_LE(buffer_pos_, buffer_end_); |
| DCHECK_LE(num_read, num_values); |
| return static_cast<int>(num_read); |
| } |
| |
| template <typename T> |
| bool BatchedBitReader::GetBytes(int num_bytes, T* v) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(num_bytes, 0); |
| DCHECK_LE(num_bytes, sizeof(T)); |
| if (UNLIKELY(buffer_pos_ + num_bytes > buffer_end_)) return false; |
| *v = 0; // Ensure unset bytes are initialized to zero. |
| memcpy(v, buffer_pos_, num_bytes); |
| buffer_pos_ += num_bytes; |
| return true; |
| } |
| |
| template <typename UINT_T> |
| bool BatchedBitReader::GetUleb128(UINT_T* v) { |
| static_assert(std::is_integral<UINT_T>::value, "Integral type required."); |
| static_assert(std::is_unsigned<UINT_T>::value, "Unsigned type required."); |
| static_assert(!std::is_same<UINT_T, bool>::value, "Bools are not supported."); |
| |
| *v = 0; |
| int shift = 0; |
| uint8_t byte = 0; |
| do { |
| if (UNLIKELY(shift >= max_vlq_byte_len<UINT_T>() * 7)) return false; |
| if (!GetBytes(1, &byte)) return false; |
| |
| /// We need to convert 'byte' to UINT_T so that the result of the bitwise and |
| /// operation is at least as long an integer as '*v', otherwise the shift may be too |
| /// big and lead to undefined behaviour. |
| const UINT_T byte_as_UINT_T = byte; |
| *v |= (byte_as_UINT_T & 0x7Fu) << shift; |
| shift += 7; |
| } while ((byte & 0x80u) != 0); |
| return true; |
| } |
| |
| #include "common/compile_check_end.h" |
| } // namespace doris |