blob: 5f2b1437ad98011d3bd33961113d67f3e25b0ffb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef ENCODING_DICTIONARY_ENCODER_H
#define ENCODING_DICTIONARY_ENCODER_H
#include <map>
#include <string>
#include <vector>
#include "common/allocator/byte_stream.h"
#include "encoder.h"
#include "encoding/bitpack_encoder.h"
namespace storage {
class DictionaryEncoder {
private:
std::map<std::string, int> entry_index_;
std::vector<std::string> index_entry_;
BitPackEncoder values_encoder_;
int map_size_;
public:
DictionaryEncoder() {}
~DictionaryEncoder() {}
void init() {
map_size_ = 0;
values_encoder_.init();
}
void reset() {
entry_index_.clear();
index_entry_.clear();
map_size_ = 0;
values_encoder_.reset();
}
void encode(std::string value, common::ByteStream &out) {
if (entry_index_.count(value) == 0) {
index_entry_.push_back(value);
map_size_ = map_size_ + value.length();
entry_index_[value] = entry_index_.size();
}
values_encoder_.encode(entry_index_[value], out);
}
int flush(common::ByteStream &out) {
int ret = common::E_OK;
ret = write_map(out);
if (ret != common::E_OK) {
return ret;
} else {
write_encoded_data(out);
}
if (ret != common::E_OK) {
return ret;
}
return common::E_OK;
}
int write_map(common::ByteStream &out) {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_var_int(
(int)index_entry_.size(), out))) {
return ret;
} else {
for (int i = 0; i < (int)index_entry_.size(); i++) {
if (RET_FAIL(common::SerializationUtil::write_str(
index_entry_[i], out))) {
return common::E_FILE_WRITE_ERR;
}
}
}
return common::E_OK;
}
void write_encoded_data(common::ByteStream &out) {
values_encoder_.encode_flush(out);
}
int get_max_byte_size() {
// 4 bytes for storing dictionary size
return 4 + map_size_ + values_encoder_.get_max_byte_size();
}
};
} // end namespace storage
#endif // ENCODING_DICTIONARY_ENCODER_H