| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // Define file format struct, like data header, index header. |
| |
| syntax="proto2"; |
| |
| package doris.segment_v2; |
| |
| // page position info |
| message PagePointerPB { |
| required uint64 offset = 1; // offset in segment file |
| required uint32 size = 2; // size of page in byte |
| } |
| |
| message MetadataPairPB { |
| optional string key = 1; |
| optional bytes value = 2; |
| } |
| |
| enum EncodingTypePB { |
| UNKNOWN_ENCODING = 0; |
| DEFAULT_ENCODING = 1; |
| PLAIN_ENCODING = 2; |
| PREFIX_ENCODING = 3; |
| RLE = 4; |
| DICT_ENCODING = 5; |
| BIT_SHUFFLE = 6; |
| FOR_ENCODING = 7; // Frame-Of-Reference |
| } |
| |
| enum CompressionTypePB { |
| UNKNOWN_COMPRESSION = 0; |
| DEFAULT_COMPRESSION = 1; |
| NO_COMPRESSION = 2; |
| SNAPPY = 3; |
| LZ4 = 4; |
| LZ4F = 5; |
| ZLIB = 6; |
| ZSTD = 7; |
| } |
| |
| enum PageTypePB { |
| UNKNOWN_PAGE_TYPE = 0; |
| DATA_PAGE = 1; |
| INDEX_PAGE = 2; |
| DICTIONARY_PAGE = 3; |
| SHORT_KEY_PAGE = 4; |
| } |
| |
| message DataPageFooterPB { |
| // required: ordinal of the first value |
| optional uint64 first_ordinal = 1; |
| // required: number of values, including NULLs |
| optional uint64 num_values = 2; |
| // required: size of nullmap, 0 if the page doesn't contain NULL |
| optional uint32 nullmap_size = 3; |
| // only for array column, largest array item ordinal + 1, |
| // used to calculate the length of last array in this page |
| optional uint64 next_array_item_ordinal = 4; |
| } |
| |
| message IndexPageFooterPB { |
| // required: number of index entries in this page |
| optional uint32 num_entries = 1; |
| |
| enum Type { |
| UNKNOWN_INDEX_PAGE_TYPE = 0; |
| LEAF = 1; |
| INTERNAL = 2; |
| }; |
| // required: type of the index page |
| optional Type type = 2; |
| } |
| |
| message DictPageFooterPB { |
| // required: encoding for dictionary |
| optional EncodingTypePB encoding = 1; |
| } |
| |
| message ShortKeyFooterPB { |
| // How many index item in this index. |
| optional uint32 num_items = 1; |
| // The total bytes occupied by the index key |
| optional uint32 key_bytes = 2; |
| // The total bytes occupied by the key offsets |
| optional uint32 offset_bytes = 3; |
| // Segment id which this index is belong to |
| optional uint32 segment_id = 4; |
| // number rows in each block |
| optional uint32 num_rows_per_block = 5; |
| // How many rows in this segment |
| optional uint32 num_segment_rows = 6; |
| } |
| |
| message PageFooterPB { |
| // required: indicates which of the *_footer fields is set |
| optional PageTypePB type = 1; |
| // required: page body size before compression (exclude footer and crc). |
| // page body is uncompressed when it's equal to page body size |
| optional uint32 uncompressed_size = 2; |
| // present only when type == DATA_PAGE |
| optional DataPageFooterPB data_page_footer = 7; |
| // present only when type == INDEX_PAGE |
| optional IndexPageFooterPB index_page_footer = 8; |
| // present only when type == DICTIONARY_PAGE |
| optional DictPageFooterPB dict_page_footer = 9; |
| // present only when type == SHORT_KEY_PAGE |
| optional ShortKeyFooterPB short_key_page_footer = 10; |
| } |
| |
| message ZoneMapPB { |
| // minimum not-null value, invalid when all values are null(has_not_null==false) |
| optional bytes min = 1; |
| // maximum not-null value, invalid when all values are null (has_not_null==false) |
| optional bytes max = 2; |
| // whether the zone has null value |
| optional bool has_null = 3; |
| // whether the zone has not-null value |
| optional bool has_not_null = 4; |
| } |
| |
| message ColumnMetaPB { |
| // column id in table schema |
| optional uint32 column_id = 1; |
| // unique column id |
| optional uint32 unique_id = 2; |
| // this field is FieldType's value |
| optional int32 type = 3; |
| // var length for string type |
| optional int32 length = 4; |
| optional EncodingTypePB encoding = 5; |
| // compress type for column |
| optional CompressionTypePB compression = 6; |
| // if this column can be nullable |
| optional bool is_nullable = 7; |
| // metadata about all the column indexes |
| repeated ColumnIndexMetaPB indexes = 8; |
| // pointer to dictionary page when using DICT_ENCODING |
| optional PagePointerPB dict_page = 9; |
| } |
| |
| message SegmentFooterPB { |
| optional uint32 version = 1 [default = 1]; // file version |
| repeated ColumnMetaPB columns = 2; // tablet schema |
| optional uint32 num_rows = 3; // number of values |
| optional uint64 index_footprint = 4; // total idnex footprint of all columns |
| optional uint64 data_footprint = 5; // total data footprint of all columns |
| optional uint64 raw_data_footprint = 6; // raw data footprint |
| |
| optional CompressionTypePB compress_type = 7 [default = LZ4F]; // default compression type for file columns |
| repeated MetadataPairPB file_meta_datas = 8; // meta data of file |
| |
| // Short key index's page |
| optional PagePointerPB short_key_index_page = 9; |
| } |
| |
| message BTreeMetaPB { |
| // required: pointer to either root index page or sole data page based on is_root_data_page |
| optional PagePointerPB root_page = 1; |
| // required: true if we only have one data page, in which case root points to that page directly |
| optional bool is_root_data_page = 2; |
| } |
| |
| message IndexedColumnMetaPB { |
| // required: FieldType value |
| optional int32 data_type = 1; |
| // required: encoding for this column |
| optional EncodingTypePB encoding = 2; |
| // required: total number of values in this column |
| optional int64 num_values = 3; |
| // present iff this column has ordinal index |
| optional BTreeMetaPB ordinal_index_meta = 4; |
| // present iff this column contains sorted values and has value index |
| optional BTreeMetaPB value_index_meta = 5; |
| // compression type for data and index page |
| optional CompressionTypePB compression = 6 [default=NO_COMPRESSION]; |
| // index size |
| optional uint64 size = 7; |
| } |
| |
| // ------------------------------------------------------------- |
| // Column Index Metadata |
| // ------------------------------------------------------------- |
| |
| enum ColumnIndexTypePB { |
| UNKNOWN_INDEX_TYPE = 0; |
| ORDINAL_INDEX = 1; |
| ZONE_MAP_INDEX = 2; |
| BITMAP_INDEX = 3; |
| BLOOM_FILTER_INDEX = 4; |
| } |
| |
| message ColumnIndexMetaPB { |
| optional ColumnIndexTypePB type = 1; |
| optional OrdinalIndexPB ordinal_index = 7; |
| optional ZoneMapIndexPB zone_map_index = 8; |
| optional BitmapIndexPB bitmap_index = 9; |
| optional BloomFilterIndexPB bloom_filter_index = 10; |
| } |
| |
| message OrdinalIndexPB { |
| // required: the root page can be data page if there is only one data page, |
| // or the only index page if there is more than one data pages. |
| optional BTreeMetaPB root_page = 1; |
| } |
| |
| message ZoneMapIndexPB { |
| // required: segment-level zone map |
| optional ZoneMapPB segment_zone_map = 1; |
| // required: zone map for each data page is stored in an IndexedColumn with ordinal index |
| optional IndexedColumnMetaPB page_zone_maps = 2; |
| } |
| |
| message BitmapIndexPB { |
| enum BitmapType { |
| UNKNOWN_BITMAP_TYPE = 0; |
| ROARING_BITMAP = 1; |
| } |
| optional BitmapType bitmap_type = 1 [default=ROARING_BITMAP]; |
| // required: whether the index contains null key. |
| // if true, the last bitmap (ordinal:dict_column.num_values) in bitmap_column is |
| // the bitmap for null key. we don't store null key in dict_column. |
| optional bool has_null = 2; |
| // required: meta for ordered dictionary part |
| optional IndexedColumnMetaPB dict_column = 3; |
| // required: meta for bitmaps part |
| optional IndexedColumnMetaPB bitmap_column = 4; |
| } |
| |
| enum HashStrategyPB { |
| HASH_MURMUR3_X64_64 = 0; |
| } |
| |
| enum BloomFilterAlgorithmPB { |
| BLOCK_BLOOM_FILTER = 0; |
| CLASSIC_BLOOM_FILTER = 1; |
| } |
| |
| message BloomFilterIndexPB { |
| // required |
| optional HashStrategyPB hash_strategy = 1; |
| optional BloomFilterAlgorithmPB algorithm = 2; |
| // required: meta for bloom filters |
| optional IndexedColumnMetaPB bloom_filter = 3; |
| } |