blob: d169c73e6f0a5a4b0d5f5bb7970ff09b017f1c2f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef ENCODING_BITPACK_ENCODER_H
#define ENCODING_BITPACK_ENCODER_H
#include <vector>
#include "common/allocator/alloc_base.h"
#include "encoder.h"
#include "encoding/encode_utils.h"
#include "encoding/intpacker.h"
#include "utils/errno_define.h"
namespace storage {
class BitPackEncoder {
private:
int bitpacked_group_count_;
int num_buffered_values_;
int bit_width_;
IntPacker *packer_;
common::ByteStream byte_cache_;
std::vector<int64_t> values_; // all data tobe encoded
int64_t buffered_values_[8]; // encode each 8 values
std::vector<unsigned char> bytes_buffer_;
public:
// BitPackEncoder() :byte_cache_(1024,common::MOD_ENCODER_OBJ){}
BitPackEncoder()
: bitpacked_group_count_(0),
num_buffered_values_(0),
bit_width_(0),
packer_(nullptr),
byte_cache_(1024, common::MOD_ENCODER_OBJ) {}
~BitPackEncoder() { destroy(); }
void init() {
bitpacked_group_count_ = 0;
num_buffered_values_ = 0;
bit_width_ = 0;
packer_ = nullptr;
}
void destroy() { delete (packer_); }
void reset() {
num_buffered_values_ = 0;
bitpacked_group_count_ = 0;
bit_width_ = 0;
bytes_buffer_.clear();
byte_cache_.reset();
values_.clear();
delete (packer_);
packer_ = nullptr;
}
FORCE_INLINE void encode(int64_t value, common::ByteStream &out) {
values_.push_back(value);
int current_bit_width = 64 - number_of_leading_zeros(value);
if (current_bit_width > bit_width_) {
bit_width_ = current_bit_width;
}
}
void encode_flush(common::ByteStream &out) {
ASSERT(packer_ == nullptr);
packer_ = new IntPacker(bit_width_);
common::SerializationUtil::write_i8(bit_width_, byte_cache_);
for (size_t i = 0; i < values_.size(); i++) {
// encodeValue(value);
buffered_values_[num_buffered_values_] = values_[i];
num_buffered_values_++;
if (num_buffered_values_ == 8) {
write_or_append_bitpacked_run();
}
}
flush(out);
}
void write_or_append_bitpacked_run() {
if (bitpacked_group_count_ >= 63) {
// we've packed as many values as we can for this run,
// end it and start a new one
end_previous_bitpacked_run(8);
}
convert_buffer();
num_buffered_values_ = 0;
++bitpacked_group_count_;
}
void convert_buffer() {
// TODO: put the bytes on the stack instead on the heap
unsigned char *bytes = (unsigned char *)common::mem_alloc(
bit_width_, common::MOD_BITENCODE_OBJ);
int64_t tmp_buffer[8];
for (int i = 0; i < 8; i++) {
tmp_buffer[i] = (int64_t)buffered_values_[i];
}
packer_->pack_8values(tmp_buffer, 0, bytes);
// we'll not writer bit-packing group to OutputStream immediately
// we buffer them in list
for (int i = 0; i < bit_width_; i++) {
bytes_buffer_.push_back(bytes[i]);
}
common::mem_free(bytes);
}
void flush(common::ByteStream &out) {
int last_bitpacked_num = num_buffered_values_;
if (num_buffered_values_ > 0) {
clear_buffer();
write_or_append_bitpacked_run();
end_previous_bitpacked_run(last_bitpacked_num);
} else {
end_previous_bitpacked_run(8);
}
uint32_t b_length = byte_cache_.total_size();
common::SerializationUtil::write_var_uint(b_length, out);
merge_byte_stream(out, byte_cache_);
reset();
}
void clear_buffer() {
for (int i = num_buffered_values_; i < 8; i++) {
buffered_values_[i] = 0;
}
}
void end_previous_bitpacked_run(int last_bitpacked_num) {
unsigned char bitPackHeader =
(unsigned char)((bitpacked_group_count_ << 1) | 1);
common::SerializationUtil::write_ui8(bitPackHeader, byte_cache_);
common::SerializationUtil::write_ui8((uint8_t)last_bitpacked_num,
byte_cache_);
for (size_t i = 0; i < bytes_buffer_.size(); i++) {
common::SerializationUtil::write_ui8(bytes_buffer_[i], byte_cache_);
}
bytes_buffer_.clear();
bitpacked_group_count_ = 0;
}
int get_max_byte_size() {
if (values_.empty()) {
return 0;
}
int totalValues = values_.size();
int fullGroups = totalValues / 8;
int remainingValues = totalValues % 8;
int bytesPerGroup = (bit_width_ * 8 + 7) / 8;
int maxSize = 0;
maxSize += fullGroups * bytesPerGroup;
if (remainingValues > 0) {
maxSize += bytesPerGroup;
}
// Add additional bytes, because each bitpack group has a header of 1
// byte and a tail of 1 byte.
maxSize += fullGroups * (1 + 1) + (remainingValues > 0 ? (1 + 1) : 0);
return maxSize;
}
};
} // end namespace storage
#endif // ENCODING_BITPACK_ENCODER_H