| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H |
| #define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H |
| |
| #include "util/bit-stream-utils.h" |
| |
| #include "common/compiler-util.h" |
| |
| namespace impala { |
| |
| inline bool BitWriter::PutValue(uint64_t v, int num_bits) { |
| DCHECK_LE(num_bits, MAX_BITWIDTH); |
| DCHECK(num_bits == MAX_BITWIDTH || v >> num_bits == 0) |
| << "v = " << v << ", num_bits = " << num_bits; |
| |
| if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; |
| |
| buffered_values_ |= v << bit_offset_; |
| bit_offset_ += num_bits; |
| |
| if (UNLIKELY(bit_offset_ >= 64)) { |
| // Flush buffered_values_ and write out bits of v that did not fit |
| memcpy(buffer_ + byte_offset_, &buffered_values_, 8); |
| byte_offset_ += 8; |
| bit_offset_ -= 64; |
| |
| // Shifting with the same or greater amount than the number of bits in the number is |
| // undefined behaviour. |
| int shift = num_bits - bit_offset_; |
| |
| if (LIKELY(shift < 64)) { |
| buffered_values_ = v >> shift; |
| } else { |
| buffered_values_ = 0; |
| } |
| } |
| |
| DCHECK_LT(bit_offset_, 64); |
| return true; |
| } |
| |
| inline void BitWriter::Flush(bool align) { |
| int num_bytes = BitUtil::Ceil(bit_offset_, 8); |
| DCHECK_LE(byte_offset_ + num_bytes, max_bytes_); |
| memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes); |
| |
| if (align) { |
| buffered_values_ = 0; |
| byte_offset_ += num_bytes; |
| bit_offset_ = 0; |
| } |
| } |
| |
| inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) { |
| Flush(/* align */ true); |
| DCHECK_LE(byte_offset_, max_bytes_); |
| if (byte_offset_ + num_bytes > max_bytes_) return NULL; |
| uint8_t* ptr = buffer_ + byte_offset_; |
| byte_offset_ += num_bytes; |
| return ptr; |
| } |
| |
| template<typename T> |
| inline bool BitWriter::PutAligned(T val, int num_bytes) { |
| uint8_t* ptr = GetNextBytePtr(num_bytes); |
| if (ptr == NULL) return false; |
| memcpy(ptr, &val, num_bytes); |
| return true; |
| } |
| |
| template<typename UINT_T> |
| inline bool BitWriter::PutUleb128(UINT_T v) { |
| static_assert(std::is_integral<UINT_T>::value, "Integral type required."); |
| static_assert(std::is_unsigned<UINT_T>::value, "Unsigned type required."); |
| static_assert(!std::is_same<UINT_T, bool>::value, "Bools are not supported."); |
| |
| constexpr UINT_T mask_lowest_7_bits = std::numeric_limits<UINT_T>::max() ^ 0x7Fu; |
| |
| bool result = true; |
| while ((v & mask_lowest_7_bits) != 0L) { |
| result &= PutAligned<uint8_t>((v & 0x7Fu) | 0x80u, 1); |
| v >>= 7; |
| } |
| result &= PutAligned<uint8_t>(v & 0x7Fu, 1); |
| return result; |
| } |
| |
| template<typename INT_T> |
| bool BitWriter::PutZigZagInteger(INT_T v) { |
| static_assert(std::is_integral<INT_T>::value, "Integral type required."); |
| static_assert(std::is_signed<INT_T>::value, "Signed type required."); |
| |
| using UINT_T = std::make_unsigned_t<INT_T>; |
| constexpr int bit_length_minus_one = sizeof(INT_T) * 8 - 1; |
| |
| /// For negative values, << results in undefined behaviour (prior to C++20), so we treat |
| /// `v` as unsigned. |
| const UINT_T v_unsigned = v; |
| |
| /// For the right shift, we rely on implementation defined behaviour as we expect it to |
| /// be an arithmetic right shift (bits equal to the MSB are shifted in). |
| return PutUleb128((v_unsigned << 1) ^ (v >> bit_length_minus_one)); |
| } |
| |
| template<typename T> |
| inline int BatchedBitReader::UnpackBatch(int bit_width, int num_values, T* v) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_LE(bit_width, sizeof(T) * 8); |
| DCHECK_GE(num_values, 0); |
| |
| int64_t num_read; |
| std::tie(buffer_pos_, num_read) = BitPacking::UnpackValues(bit_width, buffer_pos_, |
| bytes_left(), num_values, v); |
| DCHECK_LE(buffer_pos_, buffer_end_); |
| DCHECK_LE(num_read, num_values); |
| return static_cast<int>(num_read); |
| } |
| |
| inline bool BatchedBitReader::SkipBatch(int bit_width, int num_values_to_skip) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GT(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_GT(num_values_to_skip, 0); |
| |
| int skip_bytes = BitUtil::RoundUpNumBytes(bit_width * num_values_to_skip); |
| if (skip_bytes > buffer_end_ - buffer_pos_) return false; |
| buffer_pos_ += skip_bytes; |
| return true; |
| } |
| |
| template <typename T> |
| inline int BatchedBitReader::UnpackAndDecodeBatch( |
| int bit_width, T* dict, int64_t dict_len, int num_values, T* v, int64_t stride) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(bit_width, 0); |
| DCHECK_LE(bit_width, MAX_BITWIDTH); |
| DCHECK_GE(num_values, 0); |
| |
| const uint8_t* new_buffer_pos; |
| int64_t num_read; |
| bool decode_error = false; |
| std::tie(new_buffer_pos, num_read) = BitPacking::UnpackAndDecodeValues(bit_width, |
| buffer_pos_, bytes_left(), dict, dict_len, num_values, v, stride, &decode_error); |
| if (UNLIKELY(decode_error)) return -1; |
| buffer_pos_ = new_buffer_pos; |
| DCHECK_LE(buffer_pos_, buffer_end_); |
| DCHECK_LE(num_read, num_values); |
| return static_cast<int>(num_read); |
| } |
| |
| template<typename T> |
| inline bool BatchedBitReader::GetBytes(int num_bytes, T* v) { |
| DCHECK(buffer_pos_ != nullptr); |
| DCHECK_GE(num_bytes, 0); |
| DCHECK_LE(num_bytes, sizeof(T)); |
| if (UNLIKELY(buffer_pos_ + num_bytes > buffer_end_)) return false; |
| *v = 0; // Ensure unset bytes are initialized to zero. |
| memcpy(v, buffer_pos_, num_bytes); |
| buffer_pos_ += num_bytes; |
| return true; |
| } |
| |
| template<typename UINT_T> |
| inline bool BatchedBitReader::GetUleb128(UINT_T* v) { |
| static_assert(std::is_integral<UINT_T>::value, "Integral type required."); |
| static_assert(std::is_unsigned<UINT_T>::value, "Unsigned type required."); |
| static_assert(!std::is_same<UINT_T, bool>::value, "Bools are not supported."); |
| |
| *v = 0; |
| int shift = 0; |
| uint8_t byte = 0; |
| do { |
| if (UNLIKELY(shift >= max_vlq_byte_len<UINT_T>() * 7)) return false; |
| if (!GetBytes(1, &byte)) return false; |
| |
| /// We need to convert 'byte' to UINT_T so that the result of the bitwise and |
| /// operation is at least as long an integer as '*v', otherwise the shift may be too |
| /// big and lead to undefined behaviour. |
| const UINT_T byte_as_UINT_T = byte; |
| *v |= (byte_as_UINT_T & 0x7Fu) << shift; |
| shift += 7; |
| } while ((byte & 0x80u) != 0); |
| return true; |
| } |
| |
| template<typename INT_T> |
| bool BatchedBitReader::GetZigZagInteger(INT_T* v) { |
| static_assert(std::is_integral<INT_T>::value, "Integral type required."); |
| static_assert(std::is_signed<INT_T>::value, "Signed type required."); |
| |
| using UINT_T = std::make_unsigned_t<INT_T>; |
| |
| UINT_T v_unsigned; |
| if (UNLIKELY(!GetUleb128<UINT_T>(&v_unsigned))) return false; |
| |
| /// Here we rely on implementation defined behaviour in converting UINT_T to INT_T. |
| *v = (v_unsigned >> 1) ^ -(v_unsigned & 1u); |
| |
| return true; |
| } |
| } |
| |
| #endif |