blob: c6aeb01f8c983c51d0fa5de6840158c51001abcb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_H
#define IMPALA_UTIL_BIT_STREAM_UTILS_H
#include "kudu/gutil/port.h"
#include "kudu/util/bit-util.h"
#include "kudu/util/faststring.h"
namespace kudu {
// Utility class to write bit/byte streams. This class can write data to either be
// bit packed or byte aligned (and a single stream that has a mix of both).
class BitWriter {
public:
// buffer: buffer to write bits to.
explicit BitWriter(faststring *buffer)
: buffer_(buffer) {
Clear();
}
void Clear() {
buffered_values_ = 0;
byte_offset_ = 0;
bit_offset_ = 0;
buffer_->clear();
}
// Returns a pointer to the underlying buffer
faststring *buffer() const { return buffer_; }
// The number of current bytes written, including the current byte (i.e. may include a
// fraction of a byte). Includes buffered values.
int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
// Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit
// packed. num_bits must be <= 32. If 'v' is larger than 'num_bits' bits, the higher
// bits are ignored.
void PutValue(uint64_t v, int num_bits);
// Writes v to the next aligned byte using num_bits. If T is larger than num_bits, the
// extra high-order bits will be ignored.
template<typename T>
void PutAligned(T v, int num_bits);
// Write a Vlq encoded int to the buffer. The value is written byte aligned.
// For more details on vlq: en.wikipedia.org/wiki/Variable-length_quantity
void PutVlqInt(int32_t v);
// Get the index to the next aligned byte and advance the underlying buffer by num_bytes.
size_t GetByteIndexAndAdvance(int num_bytes) {
uint8_t* ptr = GetNextBytePtr(num_bytes);
return ptr - buffer_->data();
}
// Get a pointer to the next aligned byte and advance the underlying buffer by num_bytes.
uint8_t* GetNextBytePtr(int num_bytes);
// Flushes all buffered values to the buffer. Call this when done writing to the buffer.
// If 'align' is true, buffered_values_ is reset and any future writes will be written
// to the next byte boundary.
void Flush(bool align = false);
private:
// Bit-packed values are initially written to this variable before being memcpy'd to
// buffer_. This is faster than writing values byte by byte directly to buffer_.
uint64_t buffered_values_;
faststring *buffer_;
int byte_offset_; // Offset in buffer_
int bit_offset_; // Offset in buffered_values_
};
// Utility class to read bit/byte stream. This class can read bits or bytes
// that are either byte aligned or not. It also has utilities to read multiple
// bytes in one read (e.g. encoded int).
class BitReader {
public:
// 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'.
BitReader(const uint8_t* buffer, int buffer_len);
BitReader() : buffer_(NULL), max_bytes_(0) {}
// Gets the next value from the buffer. Returns true if 'v' could be read or false if
// there are not enough bytes left. num_bits must be <= 32.
template<typename T>
bool GetValue(int num_bits, T* v);
// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
// little-endian native type and big enough to store 'num_bytes'. The value is assumed
// to be byte-aligned so the stream will be advanced to the start of the next byte
// before 'v' is read. Returns false if there are not enough bytes left.
template<typename T>
bool GetAligned(int num_bytes, T* v);
// Reads a vlq encoded int from the stream. The encoded int must start at the
// beginning of a byte. Return false if there were not enough bytes in the buffer.
bool GetVlqInt(int32_t* v);
// Returns the number of bytes left in the stream, not including the current byte (i.e.,
// there may be an additional fraction of a byte).
int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
// Current position in the stream, by bit.
int position() const { return byte_offset_ * 8 + bit_offset_; }
// Rewind the stream by 'num_bits' bits
void Rewind(int num_bits);
// Seek to a specific bit in the buffer
void SeekToBit(uint stream_position);
// Maximum byte length of a vlq encoded int
static const int MAX_VLQ_BYTE_LEN = 5;
bool is_initialized() const { return buffer_ != NULL; }
private:
// Used by SeekToBit() and GetValue() to fetch the
// the next word into buffer_.
void BufferValues();
const uint8_t* buffer_;
int max_bytes_;
// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
// faster than reading values byte by byte directly from buffer_.
uint64_t buffered_values_;
int byte_offset_; // Offset in buffer_
int bit_offset_; // Offset in buffered_values_
};
} // namespace kudu
#endif