blob: 43c74eb2c967ef151dace472f000d022389c0aa4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory> // unique_ptr
#include <string>
#include <vector>
#include "common/status.h" // Status
#include "gen_cpp/segment_v2.pb.h"
#include "gutil/macros.h"
#include "olap/tablet_schema.h"
#include "util/faststring.h"
#include "vec/core/block.h"
#include "vec/olap/olap_data_convertor.h"
namespace doris {
// TODO(lingbin): Should be a conf that can be dynamically adjusted, or a member in the context
const uint32_t MAX_SEGMENT_SIZE = static_cast<uint32_t>(OLAP_MAX_COLUMN_SEGMENT_FILE_SIZE *
OLAP_COLUMN_FILE_SEGMENT_SIZE_SCALE);
class DataDir;
class MemTracker;
class RowBlock;
class RowCursor;
class TabletSchema;
class TabletColumn;
class ShortKeyIndexBuilder;
class PrimaryKeyIndexBuilder;
class KeyCoder;
namespace io {
class FileWriter;
} // namespace io
namespace segment_v2 {
class ColumnWriter;
extern const char* k_segment_magic;
extern const uint32_t k_segment_magic_length;
struct SegmentWriterOptions {
uint32_t num_rows_per_block = 1024;
bool enable_unique_key_merge_on_write = false;
};
class SegmentWriter {
public:
explicit SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
TabletSchemaSPtr tablet_schema, DataDir* data_dir,
uint32_t max_row_per_segment, const SegmentWriterOptions& opts);
~SegmentWriter();
Status init();
// for vertical compaction
Status init(const std::vector<uint32_t>& col_ids, bool has_key);
template <typename RowType>
Status append_row(const RowType& row);
Status append_block(const vectorized::Block* block, size_t row_pos, size_t num_rows);
int64_t max_row_to_add(size_t row_avg_size_in_bytes);
uint64_t estimate_segment_size();
uint32_t num_rows_written() const { return _num_rows_written; }
uint32_t row_count() const { return _row_count; }
Status finalize(uint64_t* segment_file_size, uint64_t* index_size);
uint32_t get_segment_id() { return _segment_id; }
Status finalize_columns(uint64_t* index_size);
Status finalize_footer(uint64_t* segment_file_size);
static void init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column,
TabletSchemaSPtr tablet_schema);
Slice min_encoded_key();
Slice max_encoded_key();
DataDir* get_data_dir() { return _data_dir; }
bool is_unique_key() { return _tablet_schema->keys_type() == UNIQUE_KEYS; }
private:
DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
Status _write_data();
Status _write_ordinal_index();
Status _write_zone_map();
Status _write_bitmap_index();
Status _write_bloom_filter_index();
Status _write_short_key_index();
Status _write_primary_key_index();
Status _write_footer();
Status _write_raw_data(const std::vector<Slice>& slices);
std::string _encode_keys(const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos, bool null_first = true);
// used for unique-key with merge on write and segment min_max key
std::string _full_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos,
bool null_first = true);
// used for unique-key with merge on write
void _encode_seq_column(const vectorized::IOlapColumnDataAccessor* seq_column, size_t pos,
string* encoded_keys);
void set_min_max_key(const Slice& key);
void set_min_key(const Slice& key);
void set_max_key(const Slice& key);
void _reset_column_writers();
private:
uint32_t _segment_id;
TabletSchemaSPtr _tablet_schema;
DataDir* _data_dir;
uint32_t _max_row_per_segment;
SegmentWriterOptions _opts;
// Not owned. owned by RowsetWriter
io::FileWriter* _file_writer;
SegmentFooterPB _footer;
size_t _num_key_columns;
size_t _num_short_key_columns;
std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
std::unique_ptr<MemTracker> _mem_tracker;
std::unique_ptr<vectorized::OlapBlockDataConvertor> _olap_data_convertor;
// used for building short key index or primary key index during vectorized write.
std::vector<const KeyCoder*> _key_coders;
const KeyCoder* _seq_coder = nullptr;
std::vector<uint16_t> _key_index_size;
size_t _short_key_row_pos = 0;
std::vector<uint32_t> _column_ids;
bool _has_key = true;
// _num_rows_written means row count already written in this current column group
uint32_t _num_rows_written = 0;
// _row_count means total row count of this segment
// In vertical compaction row count is recorded when key columns group finish
// and _num_rows_written will be updated in value column group
uint32_t _row_count = 0;
bool _is_first_row = true;
faststring _min_key;
faststring _max_key;
};
} // namespace segment_v2
} // namespace doris