blob: 584d409717a15466028b90d61e3cbb43e5b802e5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Define file format struct, like data header, index header.
syntax="proto2";
package doris.segment_v2;
// page position info
message PagePointerPB {
required uint64 offset = 1; // offset in segment file
required uint32 size = 2; // size of page in byte
}
message MetadataPairPB {
optional string key = 1;
optional bytes value = 2;
}
enum EncodingTypePB {
UNKNOWN_ENCODING = 0;
DEFAULT_ENCODING = 1;
PLAIN_ENCODING = 2;
PREFIX_ENCODING = 3;
RLE = 4;
DICT_ENCODING = 5;
BIT_SHUFFLE = 6;
FOR_ENCODING = 7; // Frame-Of-Reference
}
enum CompressionTypePB {
UNKNOWN_COMPRESSION = 0;
DEFAULT_COMPRESSION = 1;
NO_COMPRESSION = 2;
SNAPPY = 3;
LZ4 = 4;
LZ4F = 5;
ZLIB = 6;
ZSTD = 7;
}
enum PageTypePB {
UNKNOWN_PAGE_TYPE = 0;
DATA_PAGE = 1;
INDEX_PAGE = 2;
DICTIONARY_PAGE = 3;
SHORT_KEY_PAGE = 4;
}
message DataPageFooterPB {
// required: ordinal of the first value
optional uint64 first_ordinal = 1;
// required: number of values, including NULLs
optional uint64 num_values = 2;
// required: size of nullmap, 0 if the page doesn't contain NULL
optional uint32 nullmap_size = 3;
// only for array column, largest array item ordinal + 1,
// used to calculate the length of last array in this page
optional uint64 next_array_item_ordinal = 4;
}
message IndexPageFooterPB {
// required: number of index entries in this page
optional uint32 num_entries = 1;
enum Type {
UNKNOWN_INDEX_PAGE_TYPE = 0;
LEAF = 1;
INTERNAL = 2;
};
// required: type of the index page
optional Type type = 2;
}
message DictPageFooterPB {
// required: encoding for dictionary
optional EncodingTypePB encoding = 1;
}
message ShortKeyFooterPB {
// How many index item in this index.
optional uint32 num_items = 1;
// The total bytes occupied by the index key
optional uint32 key_bytes = 2;
// The total bytes occupied by the key offsets
optional uint32 offset_bytes = 3;
// Segment id which this index is belong to
optional uint32 segment_id = 4;
// number rows in each block
optional uint32 num_rows_per_block = 5;
// How many rows in this segment
optional uint32 num_segment_rows = 6;
}
message PageFooterPB {
// required: indicates which of the *_footer fields is set
optional PageTypePB type = 1;
// required: page body size before compression (exclude footer and crc).
// page body is uncompressed when it's equal to page body size
optional uint32 uncompressed_size = 2;
// present only when type == DATA_PAGE
optional DataPageFooterPB data_page_footer = 7;
// present only when type == INDEX_PAGE
optional IndexPageFooterPB index_page_footer = 8;
// present only when type == DICTIONARY_PAGE
optional DictPageFooterPB dict_page_footer = 9;
// present only when type == SHORT_KEY_PAGE
optional ShortKeyFooterPB short_key_page_footer = 10;
}
message ZoneMapPB {
// minimum not-null value, invalid when all values are null(has_not_null==false)
optional bytes min = 1;
// maximum not-null value, invalid when all values are null (has_not_null==false)
optional bytes max = 2;
// whether the zone has null value
optional bool has_null = 3;
// whether the zone has not-null value
optional bool has_not_null = 4;
}
message ColumnMetaPB {
// column id in table schema
optional uint32 column_id = 1;
// unique column id
optional uint32 unique_id = 2;
// this field is FieldType's value
optional int32 type = 3;
// var length for string type
optional int32 length = 4;
optional EncodingTypePB encoding = 5;
// compress type for column
optional CompressionTypePB compression = 6;
// if this column can be nullable
optional bool is_nullable = 7;
// metadata about all the column indexes
repeated ColumnIndexMetaPB indexes = 8;
// pointer to dictionary page when using DICT_ENCODING
optional PagePointerPB dict_page = 9;
}
message SegmentFooterPB {
optional uint32 version = 1 [default = 1]; // file version
repeated ColumnMetaPB columns = 2; // tablet schema
optional uint32 num_rows = 3; // number of values
optional uint64 index_footprint = 4; // total idnex footprint of all columns
optional uint64 data_footprint = 5; // total data footprint of all columns
optional uint64 raw_data_footprint = 6; // raw data footprint
optional CompressionTypePB compress_type = 7 [default = LZ4F]; // default compression type for file columns
repeated MetadataPairPB file_meta_datas = 8; // meta data of file
// Short key index's page
optional PagePointerPB short_key_index_page = 9;
}
message BTreeMetaPB {
// required: pointer to either root index page or sole data page based on is_root_data_page
optional PagePointerPB root_page = 1;
// required: true if we only have one data page, in which case root points to that page directly
optional bool is_root_data_page = 2;
}
message IndexedColumnMetaPB {
// required: FieldType value
optional int32 data_type = 1;
// required: encoding for this column
optional EncodingTypePB encoding = 2;
// required: total number of values in this column
optional int64 num_values = 3;
// present iff this column has ordinal index
optional BTreeMetaPB ordinal_index_meta = 4;
// present iff this column contains sorted values and has value index
optional BTreeMetaPB value_index_meta = 5;
// compression type for data and index page
optional CompressionTypePB compression = 6 [default=NO_COMPRESSION];
// index size
optional uint64 size = 7;
}
// -------------------------------------------------------------
// Column Index Metadata
// -------------------------------------------------------------
enum ColumnIndexTypePB {
UNKNOWN_INDEX_TYPE = 0;
ORDINAL_INDEX = 1;
ZONE_MAP_INDEX = 2;
BITMAP_INDEX = 3;
BLOOM_FILTER_INDEX = 4;
}
message ColumnIndexMetaPB {
optional ColumnIndexTypePB type = 1;
optional OrdinalIndexPB ordinal_index = 7;
optional ZoneMapIndexPB zone_map_index = 8;
optional BitmapIndexPB bitmap_index = 9;
optional BloomFilterIndexPB bloom_filter_index = 10;
}
message OrdinalIndexPB {
// required: the root page can be data page if there is only one data page,
// or the only index page if there is more than one data pages.
optional BTreeMetaPB root_page = 1;
}
message ZoneMapIndexPB {
// required: segment-level zone map
optional ZoneMapPB segment_zone_map = 1;
// required: zone map for each data page is stored in an IndexedColumn with ordinal index
optional IndexedColumnMetaPB page_zone_maps = 2;
}
message BitmapIndexPB {
enum BitmapType {
UNKNOWN_BITMAP_TYPE = 0;
ROARING_BITMAP = 1;
}
optional BitmapType bitmap_type = 1 [default=ROARING_BITMAP];
// required: whether the index contains null key.
// if true, the last bitmap (ordinal:dict_column.num_values) in bitmap_column is
// the bitmap for null key. we don't store null key in dict_column.
optional bool has_null = 2;
// required: meta for ordered dictionary part
optional IndexedColumnMetaPB dict_column = 3;
// required: meta for bitmaps part
optional IndexedColumnMetaPB bitmap_column = 4;
}
enum HashStrategyPB {
HASH_MURMUR3_X64_64 = 0;
}
enum BloomFilterAlgorithmPB {
BLOCK_BLOOM_FILTER = 0;
CLASSIC_BLOOM_FILTER = 1;
}
message BloomFilterIndexPB {
// required
optional HashStrategyPB hash_strategy = 1;
optional BloomFilterAlgorithmPB algorithm = 2;
// required: meta for bloom filters
optional IndexedColumnMetaPB bloom_filter = 3;
}