blob: 0693e3e4d7f3f76362f5d23ed1d2e73c0c9e88e6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Define file format struct, like data header, index header.
syntax="proto2";
package doris.segment_v2;
// page position info
message PagePointerPB {
required uint64 offset = 1; // offset in segment file
required uint32 size = 2; // size of page in byte
}
message MetadataPairPB {
optional string key = 1;
optional bytes value = 2;
}
enum EncodingTypePB {
UNKNOWN_ENCODING = 0;
DEFAULT_ENCODING = 1;
PLAIN_ENCODING = 2;
PREFIX_ENCODING = 3;
RLE = 4;
DICT_ENCODING = 5;
BIT_SHUFFLE = 6;
FOR_ENCODING = 7; // Frame-Of-Reference
}
enum CompressionTypePB {
UNKNOWN_COMPRESSION = 0;
DEFAULT_COMPRESSION = 1;
NO_COMPRESSION = 2;
SNAPPY = 3;
LZ4 = 4;
LZ4F = 5;
ZLIB = 6;
ZSTD = 7;
LZ4HC = 8;
}
enum PageTypePB {
UNKNOWN_PAGE_TYPE = 0;
DATA_PAGE = 1;
INDEX_PAGE = 2;
DICTIONARY_PAGE = 3;
SHORT_KEY_PAGE = 4;
PRIMARY_KEY_INDEX_PAGE = 5;
}
message DataPageFooterPB {
// required: ordinal of the first value
optional uint64 first_ordinal = 1;
// required: number of values, including NULLs
optional uint64 num_values = 2;
// required: size of nullmap, 0 if the page doesn't contain NULL
optional uint32 nullmap_size = 3;
// only for array column
// Save the offset of next page
optional uint64 next_array_item_ordinal = 4;
}
message IndexPageFooterPB {
// required: number of index entries in this page
optional uint32 num_entries = 1;
enum Type {
UNKNOWN_INDEX_PAGE_TYPE = 0;
LEAF = 1;
INTERNAL = 2;
};
// required: type of the index page
optional Type type = 2;
}
message DictPageFooterPB {
// required: encoding for dictionary
optional EncodingTypePB encoding = 1;
}
message ShortKeyFooterPB {
// How many index item in this index.
optional uint32 num_items = 1;
// The total bytes occupied by the index key
optional uint32 key_bytes = 2;
// The total bytes occupied by the key offsets
optional uint32 offset_bytes = 3;
// Segment id which this index is belong to
optional uint32 segment_id = 4;
// number rows in each block
optional uint32 num_rows_per_block = 5;
// How many rows in this segment
optional uint32 num_segment_rows = 6;
}
message PageFooterPB {
// required: indicates which of the *_footer fields is set
optional PageTypePB type = 1;
// required: page body size before compression (exclude footer and crc).
// page body is uncompressed when it's equal to page body size
optional uint32 uncompressed_size = 2;
// present only when type == DATA_PAGE
optional DataPageFooterPB data_page_footer = 7;
// present only when type == INDEX_PAGE
optional IndexPageFooterPB index_page_footer = 8;
// present only when type == DICTIONARY_PAGE
optional DictPageFooterPB dict_page_footer = 9;
// present only when type == SHORT_KEY_PAGE
optional ShortKeyFooterPB short_key_page_footer = 10;
}
message ZoneMapPB {
// minimum not-null value, invalid when all values are null(has_not_null==false)
optional bytes min = 1;
// maximum not-null value, invalid when all values are null (has_not_null==false)
optional bytes max = 2;
// whether the zone has null value
optional bool has_null = 3;
// whether the zone has not-null value
optional bool has_not_null = 4;
// whether this zone is including all values;
optional bool pass_all = 5 [default = false];
}
message ColumnPathPartInfo {
// key represent a part of key in full parts info
optional string key = 1;
// is_nested indicates that the part if a nested array
optional bool is_nested = 2;
// anonymous_array_level indicates the nested level of array
optional uint32 anonymous_array_level = 3;
}
// Persist info for PathInData that represents path in document, e.g. JSON.
// Each variant's subcolumn must persist it's path info to storage, since it only
// uses path info to identify a column in storage instead of column unique id
message ColumnPathInfo {
// The full path string representation
optional string path = 1;
// A list of parts, each part indicate a sub path of the whole path
repeated ColumnPathPartInfo path_part_infos = 2;
optional bool has_nested = 3;
// The original parent variant's unique id, used to distinguish from different variants
optional uint32 parrent_column_unique_id = 4;
// is_typed flags if the subcolumn is assigned specific types
optional bool is_typed = 5;
}
message VariantStatisticsPB {
// in the order of subcolumns in variant
map<string, uint32> sparse_column_non_null_size = 2;
}
message ColumnMetaPB {
// column id in table schema
optional uint32 column_id = 1;
// unique column id
optional uint32 unique_id = 2;
// this field is FieldType's value
optional int32 type = 3;
// var length for string type
optional int32 length = 4;
optional EncodingTypePB encoding = 5;
// compress type for column
optional CompressionTypePB compression = 6;
// if this column can be nullable
optional bool is_nullable = 7;
// metadata about all the column indexes
repeated ColumnIndexMetaPB indexes = 8;
// pointer to dictionary page when using DICT_ENCODING
optional PagePointerPB dict_page = 9;
repeated ColumnMetaPB children_columns = 10;
// required by array/struct/map reader to create child reader.
optional uint64 num_rows = 11;
repeated string children_column_names = 12;
// persist info for PathInData that represents path in document, e.g. JSON.
optional ColumnPathInfo column_path_info = 13;
// Extra type info to be compatible with tabet schema
optional bytes default_value = 14; // ColumnMessage.default_value ?
optional int32 precision = 15; // ColumnMessage.precision
optional int32 frac = 16; // ColumnMessag
repeated ColumnMetaPB sparse_columns = 17; // deprecated
optional bool result_is_nullable = 18; // used on agg_state type
optional string function_name = 19; // used on agg_state type
optional int32 be_exec_version = 20; // used on agg_state type
optional VariantStatisticsPB variant_statistics = 21; // only used in variant type
optional int32 variant_max_subcolumns_count = 22 [default = 0];
optional uint64 none_null_size = 23;
}
message PrimaryKeyIndexMetaPB {
// primary key index
optional IndexedColumnMetaPB primary_key_index = 1;
// bloom filter index
optional ColumnIndexMetaPB bloom_filter_index = 2;
optional bytes min_key = 3;
optional bytes max_key = 4;
}
message SegmentFooterPB {
optional uint32 version = 1 [default = 1]; // file version
repeated ColumnMetaPB columns = 2; // tablet schema
optional uint32 num_rows = 3; // number of values
optional uint64 index_footprint = 4; // total index footprint of all columns
optional uint64 data_footprint = 5; // total data footprint of all columns
optional uint64 raw_data_footprint = 6; // raw data footprint
optional CompressionTypePB compress_type = 7 [default = LZ4F]; // default compression type for file columns
repeated MetadataPairPB file_meta_datas = 8; // meta data of file
// Short key index's page
optional PagePointerPB short_key_index_page = 9;
// Primary key index meta
optional PrimaryKeyIndexMetaPB primary_key_index_meta = 10;
}
message BTreeMetaPB {
// required: pointer to either root index page or sole data page based on is_root_data_page
optional PagePointerPB root_page = 1;
// required: true if we only have one data page, in which case root points to that page directly
optional bool is_root_data_page = 2;
}
message IndexedColumnMetaPB {
// required: FieldType value
optional int32 data_type = 1;
// required: encoding for this column
optional EncodingTypePB encoding = 2;
// required: total number of values in this column
optional int64 num_values = 3;
// present iff this column has ordinal index
optional BTreeMetaPB ordinal_index_meta = 4;
// present iff this column contains sorted values and has value index
optional BTreeMetaPB value_index_meta = 5;
// compression type for data and index page
optional CompressionTypePB compression = 6 [default=NO_COMPRESSION];
// index size
optional uint64 size = 7;
}
// -------------------------------------------------------------
// Column Index Metadata
// -------------------------------------------------------------
enum ColumnIndexTypePB {
UNKNOWN_INDEX_TYPE = 0;
ORDINAL_INDEX = 1;
ZONE_MAP_INDEX = 2;
BITMAP_INDEX = 3;
BLOOM_FILTER_INDEX = 4;
}
message ColumnIndexMetaPB {
optional ColumnIndexTypePB type = 1;
optional OrdinalIndexPB ordinal_index = 7;
optional ZoneMapIndexPB zone_map_index = 8;
optional BitmapIndexPB bitmap_index = 9;
optional BloomFilterIndexPB bloom_filter_index = 10;
}
message OrdinalIndexPB {
// required: the root page can be data page if there is only one data page,
// or the only index page if there is more than one data pages.
optional BTreeMetaPB root_page = 1;
}
message ZoneMapIndexPB {
// required: segment-level zone map
optional ZoneMapPB segment_zone_map = 1;
// required: zone map for each data page is stored in an IndexedColumn with ordinal index
optional IndexedColumnMetaPB page_zone_maps = 2;
}
message BitmapIndexPB {
enum BitmapType {
UNKNOWN_BITMAP_TYPE = 0;
ROARING_BITMAP = 1;
}
optional BitmapType bitmap_type = 1 [default=ROARING_BITMAP];
// required: whether the index contains null key.
// if true, the last bitmap (ordinal:dict_column.num_values) in bitmap_column is
// the bitmap for null key. we don't store null key in dict_column.
optional bool has_null = 2;
// required: meta for ordered dictionary part
optional IndexedColumnMetaPB dict_column = 3;
// required: meta for bitmaps part
optional IndexedColumnMetaPB bitmap_column = 4;
}
enum HashStrategyPB {
HASH_MURMUR3_X64_64 = 0;
CITY_HASH_64 = 1;
}
enum BloomFilterAlgorithmPB {
BLOCK_BLOOM_FILTER = 0;
CLASSIC_BLOOM_FILTER = 1;
NGRAM_BLOOM_FILTER = 2;
}
message BloomFilterIndexPB {
// required
optional HashStrategyPB hash_strategy = 1;
optional BloomFilterAlgorithmPB algorithm = 2;
// required: meta for bloom filters
optional IndexedColumnMetaPB bloom_filter = 3;
}