blob: 8919505b6410890ab40dc910fad0a8566ca80b09 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package quickstep;
// Options for TupleStorageSubBlocks.
message TupleStorageSubBlockDescription {
enum TupleStorageSubBlockType {
BASIC_COLUMN_STORE = 0;
COMPRESSED_PACKED_ROW_STORE = 1;
COMPRESSED_COLUMN_STORE = 2;
SPLIT_ROW_STORE = 3;
}
required TupleStorageSubBlockType sub_block_type = 1;
// The convention for extension numbering is that extensions for a particular
// TupleStorageSubBlock type should begin from (sub_block_type + 1) * 32.
extensions 32 to max;
}
message BasicColumnStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
// Indicates the attribute to sort the column store by. If unset, the
// column store will be unsorted.
optional int32 sort_attribute_id = 64;
}
}
message CompressedPackedRowStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
repeated int32 compressed_attribute_id = 96;
}
}
message CompressedColumnStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
// Required when sub_block_type == COMPRESSED_COLUMN_STORE.
optional int32 sort_attribute_id = 128;
repeated int32 compressed_attribute_id = 129;
}
}
// Options for IndexSubBlocks.
message IndexSubBlockDescription {
enum IndexSubBlockType {
CSB_TREE = 0;
SMA = 1;
BLOOM_FILTER = 2;
BITWEAVING_H = 3;
BITWEAVING_V = 4;
}
required IndexSubBlockType sub_block_type = 1;
repeated int32 indexed_attribute_ids = 2;
// The convention for extension numbering is that extensions for a particular
// TupleStorageSubBlock type should begin from (sub_block_type + 1) * 32.
extensions 32 to max;
}
message BloomFilterIndexSubBlockDescription {
extend IndexSubBlockDescription {
// The default values are added for a 2 MB block size, which were
// determined from empirical experiments. These values control the
// amount of false positivity that is expected from Bloom Filter.
// - Default bloom filter size per attribute = 10 KB.
// - Default number of hash functions used in bloom filter = 20.
// - Default number of elements expected to be stored = 1 million.
optional fixed64 bloom_filter_size = 96 [default = 10000];
optional int32 number_of_hashes = 97 [default = 20];
optional fixed64 projected_element_count = 98 [default = 1000000];
}
}
// A complete logical description of a layout.
message StorageBlockLayoutDescription {
required uint64 num_slots = 1;
required TupleStorageSubBlockDescription tuple_store_description = 2;
repeated IndexSubBlockDescription index_description = 3;
}
// A binary-format header for an individual StorageBlock. The memory-layout of
// a StorageBlock is as follows:
// - 4 bytes: An int which is the length of the subsequent StorageBlockHeader
// (obtained from StorageBlockHeader::ByteSize())
// - Variable length (previous int): A binary-serialized StorageBlockHeader.
// - Variable length (header.tuple_store_size): A TupleStorageSubBlock.
// - A series of 0 or more IndexSubBlocks whose lengths are from
// header.index_size.
message StorageBlockHeader {
required StorageBlockLayoutDescription layout = 1;
// Sub-block sizes in bytes.
//
// NOTE(chasseur): Although uint64 would almost always lead to smaller, more
// efficient coding, we use fixed64 so that the exact size of a header can be
// known before the sub-block sizes are computed.
required fixed64 tuple_store_size = 2;
repeated fixed64 index_size = 3 [packed=true];
// Whether each IndexSubBlock is in a consistent state (i.e. completely and
// accurately reflects the contents of the TupleStorageSubBlock).
repeated bool index_consistent = 4 [packed=true];
}
// A binary-format header for common information in compressed blocks.
message CompressedBlockInfo {
// The compressed byte-length of each attribute. If this is different than
// the byte-length of the attribute's type in the catalog, then the attribute
// is compressed.
repeated fixed64 attribute_size = 1 [packed=true];
// The size of dictionaries. There is one entry for each attribute. A
// positive value indicates the size of the attribute's compression
// dictionary. Zero indicates that the attribute is either uncompressed,
// or is an integer-like attribute which is compressed by truncating values
// to a shorter byte length.
repeated fixed64 dictionary_size = 2 [packed=true];
// The size in bits of the null bitmap(s) for uncompressed attributes with
// null values. May be zero if no uncompressed attributes have any null
// values.
required fixed64 null_bitmap_bits = 3;
// Whether each attribute is uncompressed and has null values (i.e. whether
// it has a columnar null bitmap in CompressedColumnStoreTupleStorageSubBlock
// or bits in the row-major null bitmap of a
// CompressedPackedRowStoreTupleStorageSubBlock).
repeated bool uncompressed_attribute_has_nulls = 4 [packed=true];
}