blob: def0e3e0244b17c933bc3f00cef5f10cc628cabb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef KUDU_CFILE_GVINT_BLOCK_H
#define KUDU_CFILE_GVINT_BLOCK_H
#include <stdint.h>
#include <vector>
#include <gtest/gtest_prod.h>
#include "kudu/cfile/block_encodings.h"
namespace kudu {
namespace cfile {
struct WriterOptions;
typedef uint32_t IntType;
using std::vector;
// Builder for an encoded block of ints.
// The encoding is group-varint plus frame-of-reference:
//
// Header group (gvint): <num_elements, min_element, [unused], [unused]
// followed by enough group varints to represent the total number of
// elements, including padding 0s at the end. Each element is a delta
// from the min_element frame-of-reference.
//
// See AppendGroupVarInt32(...) for details on the varint
// encoding.
class GVIntBlockBuilder : public BlockBuilder {
public:
explicit GVIntBlockBuilder(const WriterOptions *options);
bool IsBlockFull(size_t limit) const OVERRIDE;
int Add(const uint8_t *vals, size_t count) OVERRIDE;
Slice Finish(rowid_t ordinal_pos) OVERRIDE;
void Reset() OVERRIDE;
size_t Count() const OVERRIDE;
// Return the first added key.
// key should be a uint32_t *
Status GetFirstKey(void *key) const OVERRIDE;
private:
// TODO: this currently does not do a good job of estimating
// when the ints are large but clustered together,
// since it doesn't take into account the delta coding relative
// to the min int. We could track the min int along the way
// but then we have extra branches in the add loop. Come back to this,
// probably the branches don't matter since this is write-side.
uint64_t EstimateEncodedSize() const {
return estimated_raw_size_ + (ints_.size() + 3) / 4
+ kEstimatedHeaderSizeBytes + kTrailerExtraPaddingBytes;
}
friend class TestEncoding;
FRIEND_TEST(TestEncoding, TestGroupVarInt);
FRIEND_TEST(TestEncoding, TestIntBlockEncoder);
vector<IntType> ints_;
faststring buffer_;
uint64_t estimated_raw_size_;
const WriterOptions *options_;
enum {
kEstimatedHeaderSizeBytes = 10,
// Up to 3 "0s" can be tacked on the end of the block to round out
// the groups of 4
kTrailerExtraPaddingBytes = 3
};
};
// Decoder for UINT32 type, GROUP_VARINT coding
class GVIntBlockDecoder : public BlockDecoder {
public:
explicit GVIntBlockDecoder(Slice slice);
Status ParseHeader() OVERRIDE;
void SeekToStart() {
SeekToPositionInBlock(0);
}
void SeekToPositionInBlock(uint pos) OVERRIDE;
Status SeekAtOrAfterValue(const void *value, bool *exact_match) OVERRIDE;
Status CopyNextValues(size_t *n, ColumnDataView *dst) OVERRIDE;
// Copy the integers to a temporary buffer, it is used by StringDictDecoder
// in its CopyNextValues() method.
Status CopyNextValuesToArray(size_t *n, uint8_t* array);
size_t GetCurrentIndex() const OVERRIDE {
DCHECK(parsed_) << "must parse header first";
return cur_idx_;
}
virtual rowid_t GetFirstRowId() const OVERRIDE {
return ordinal_pos_base_;
}
size_t Count() const OVERRIDE {
return num_elems_;
}
bool HasNext() const OVERRIDE {
return (num_elems_ - cur_idx_) > 0;
}
private:
friend class TestEncoding;
template<class IntSink>
Status DoGetNextValues(size_t *n, IntSink *sink);
Slice data_;
bool parsed_;
const uint8_t *ints_start_;
uint32_t num_elems_;
uint32_t min_elem_;
rowid_t ordinal_pos_base_;
const uint8_t *cur_pos_;
size_t cur_idx_;
// Items that have been decoded but not yet yielded
// to the user. The next one to be yielded is at the
// *end* of the vector!
std::vector<uint32_t> pending_;
// Min Length of a header. (prefix + 4 tags)
static const size_t kMinHeaderSize = 5;
};
} // namespace cfile
} // namespace kudu
#endif