blob: 76c05a07aa8e163f397106aad31db417108909a9 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef KUDU_CFILE_BLOCK_COMPRESSION_H
#define KUDU_CFILE_BLOCK_COMPRESSION_H
#include <cstddef>
#include <cstdint>
#include <ostream>
#include <vector>
#include <glog/logging.h>
#include "kudu/gutil/macros.h"
#include "kudu/util/faststring.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"
namespace kudu {
class CompressionCodec;
namespace cfile {
// A compressed block has the following format:
//
// CFile version 1
// -----------------
// 4-byte little-endian: compressed_size
// the size of the compressed data, not including the 8-byte header.
// 4-byte little-endian: uncompressed_size
// the expected size of the data after decompression
// <compressed data>
//
// CFile version 2
// -----------------
// 4-byte little-endian: uncompressed_size
// The size of the data after decompression.
//
// NOTE: if uncompressed_size is equal to the remaining size of the
// block (i.e. the uncompressed and compressed sizes are equal)
// then the block is assumed to be uncompressed, and the codec should
// not be executed.
// <compressed data>
// Builder for writing compressed blocks.
// Always writes v2 format.
class CompressedBlockBuilder {
public:
// 'codec' is expected to remain alive for the lifetime of this object.
explicit CompressedBlockBuilder(const CompressionCodec* codec);
// Sets "*data_slices" to the compressed version of the given input data.
// The input data is formed by concatenating the elements of 'data_slices'.
//
// The slices inside the result may either refer to data owned by this instance,
// or to slices of the input data. In the former case, the slices remain valid
// until the class is destructed or until Compress() is called again. In the latter
// case, it's up to the user to ensure that the original input data is not
// modified while the elements of 'result' are still being used.
//
// If an error was encountered, returns a non-OK status.
Status Compress(const std::vector<Slice>& data_slices,
std::vector<Slice>* result);
// See format information above.
static const size_t kHeaderLength = 4;
private:
DISALLOW_COPY_AND_ASSIGN(CompressedBlockBuilder);
const CompressionCodec* codec_;
faststring buffer_;
};
// Builder for reading compressed blocks.
// Can read v1 or v2 format based on 'cfile_version' constructor parameter.
class CompressedBlockDecoder {
public:
// 'codec' is expected to remain alive for the lifetime of this object.
CompressedBlockDecoder(const CompressionCodec* codec,
int cfile_version,
const Slice& block_data);
// Parses and validates the header in the data block.
// After calling this, the accessors below as well as UncompressIntoBuffer()
// may be safely called.
//
// Returns Corruption if the data block header indicates a compressed size
// that is different than the amount of remaining data in the block, or if the
// uncompressed size is greater than the configured maximum uncompressed block size.
Status Init();
int uncompressed_size() const {
DCHECK_GE(uncompressed_size_, 0) << "must Init()";
return uncompressed_size_;
}
// Uncompress into the provided 'dst' buffer, which must be at least as
// large as the 'uncompressed_size()'.
//
// REQUIRES: Init() has been called and returned successfully.
// REQUIRES: !block_skipped()
Status UncompressIntoBuffer(uint8_t* dst);
private:
DISALLOW_COPY_AND_ASSIGN(CompressedBlockDecoder);
static const size_t kHeaderLengthV1 = 8;
static const size_t kHeaderLengthV2 = 4;
size_t header_length() const {
return cfile_version_ == 1 ? kHeaderLengthV1 : kHeaderLengthV2;
}
const CompressionCodec* const codec_;
const int cfile_version_;
const Slice data_;
int uncompressed_size_ = -1;
};
} // namespace cfile
} // namespace kudu
#endif