| // Copyright 2012 Cloudera Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| #ifndef IMPALA_UTIL_BIT_STREAM_UTILS_H |
| #define IMPALA_UTIL_BIT_STREAM_UTILS_H |
| |
| #include <boost/cstdint.hpp> |
| #include <string.h> |
| #include "impala/compiler-util.h" |
| #include "impala/bit-util.h" |
| #include "impala/logging.h" |
| |
| namespace impala { |
| |
| // Utility class to write bit/byte streams. This class can write data to either be |
| // bit packed or byte aligned (and a single stream that has a mix of both). |
| // This class does not allocate memory. |
| class BitWriter { |
| public: |
| // buffer: buffer to write bits to. Buffer should be preallocated with |
| // 'buffer_len' bytes. |
| BitWriter(uint8_t* buffer, int buffer_len) : |
| buffer_(buffer), |
| max_bytes_(buffer_len) { |
| Clear(); |
| } |
| |
| void Clear() { |
| buffered_values_ = 0; |
| byte_offset_ = 0; |
| bit_offset_ = 0; |
| } |
| |
| // The number of current bytes written, including the current byte (i.e. may include a |
| // fraction of a byte). Includes buffered values. |
| int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); } |
| uint8_t* buffer() const { return buffer_; } |
| int buffer_len() const { return max_bytes_; } |
| |
| // Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit |
| // packed. Returns false if there was not enough space. num_bits must be <= 32. |
| bool PutValue(uint64_t v, int num_bits); |
| |
| // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the |
| // extra high-order bytes will be ignored. Returns false if there was not enough space. |
| template<typename T> |
| bool PutAligned(T v, int num_bytes); |
| |
| // Write a Vlq encoded int to the buffer. Returns false if there was not enough |
| // room. The value is written byte aligned. |
| // For more details on vlq: |
| // en.wikipedia.org/wiki/Variable-length_quantity |
| bool PutVlqInt(uint32_t v); |
| bool PutZigZagVlqInt(int32_t v); |
| |
| // Get a pointer to the next aligned byte and advance the underlying buffer |
| // by num_bytes. |
| // Returns NULL if there was not enough space. |
| uint8_t* GetNextBytePtr(int num_bytes = 1); |
| |
| // Flushes all buffered values to the buffer. Call this when done writing to the buffer. |
| // If 'align' is true, buffered_values_ is reset and any future writes will be written |
| // to the next byte boundary. |
| void Flush(bool align=false); |
| |
| private: |
| uint8_t* buffer_; |
| int max_bytes_; |
| |
| // Bit-packed values are initially written to this variable before being memcpy'd to |
| // buffer_. This is faster than writing values byte by byte directly to buffer_. |
| uint64_t buffered_values_; |
| |
| int byte_offset_; // Offset in buffer_ |
| int bit_offset_; // Offset in buffered_values_ |
| }; |
| |
| // Utility class to read bit/byte stream. This class can read bits or bytes |
| // that are either byte aligned or not. It also has utilities to read multiple |
| // bytes in one read (e.g. encoded int). |
| class BitReader { |
| public: |
| // 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'. |
| BitReader(const uint8_t* buffer, int buffer_len) : |
| buffer_(buffer), |
| max_bytes_(buffer_len), |
| byte_offset_(0), |
| bit_offset_(0) { |
| int num_bytes = std::min(8, max_bytes_ - byte_offset_); |
| memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); |
| } |
| |
| BitReader() : buffer_(NULL), max_bytes_(0) {} |
| |
| // Gets the next value from the buffer. Returns true if 'v' could be read or false if |
| // there are not enough bytes left. num_bits must be <= 32. |
| template<typename T> |
| bool GetValue(int num_bits, T* v); |
| |
| // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a |
| // little-endian native type and big enough to store 'num_bytes'. The value is assumed |
| // to be byte-aligned so the stream will be advanced to the start of the next byte |
| // before 'v' is read. Returns false if there are not enough bytes left. |
| template<typename T> |
| bool GetAligned(int num_bytes, T* v); |
| |
| // Reads a vlq encoded int from the stream. The encoded int must start at the |
| // beginning of a byte. Return false if there were not enough bytes in the buffer. |
| bool GetVlqInt(uint64_t* v); |
| bool GetZigZagVlqInt(int64_t* v); |
| |
| // Returns the number of bytes left in the stream, not including the current byte (i.e., |
| // there may be an additional fraction of a byte). |
| int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); } |
| |
| const uint8_t* current_ptr() { return buffer_ + byte_offset_; } |
| void SkipBytes(int num_bytes) { |
| byte_offset_ += num_bytes; |
| num_bytes = std::min(8, max_bytes_ - byte_offset_); |
| memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); |
| } |
| |
| // Maximum byte length of a vlq encoded int |
| static const int MAX_VLQ_BYTE_LEN = 5; |
| |
| private: |
| const uint8_t* buffer_; |
| int max_bytes_; |
| |
| // Bytes are memcpy'd from buffer_ and values are read from this variable. This is |
| // faster than reading values byte by byte directly from buffer_. |
| uint64_t buffered_values_; |
| |
| int byte_offset_; // Offset in buffer_ |
| int bit_offset_; // Offset in buffered_values_ |
| }; |
| |
| } |
| |
| #endif |