blob: 0c067738a35a623a93222935fd6ea6006803992c [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace org.apache.impala.fb;
enum FbIcebergDataFileFormat: byte {
PARQUET,
ORC,
AVRO,
PUFFIN
}
enum FbIcebergTransformType : byte {
IDENTITY,
HOUR,
DAY,
MONTH,
YEAR,
BUCKET,
TRUNCATE,
VOID
}
table FbIcebergPartitionTransformValue {
transform_type: FbIcebergTransformType;
transform_param: int;
transform_value: [ubyte];
source_id: int;
}
table FbIcebergMetadata {
file_format : FbIcebergDataFileFormat;
record_count : long;
data_sequence_number : long;
spec_id : ushort;
partition_keys : [FbIcebergPartitionTransformValue];
equality_field_ids : [int];
part_id : int = -1;
first_row_id : long;
}
table FbIcebergColumnStats {
field_id: int;
total_compressed_byte_size: long;
value_count: long;
null_count: long;
lower_bound: [ubyte];
upper_bound: [ubyte];
}
table FbIcebergPartitionField {
field_value: [ubyte];
}
table FbIcebergDataFile {
path: string;
format: FbIcebergDataFileFormat = PARQUET;
record_count: long = 0;
file_size_in_bytes: long = 0;
spec_id: ushort;
partition_path: string;
raw_partition_fields: [FbIcebergPartitionField];
per_column_stats: [FbIcebergColumnStats];
// DV to remove from metadata.
old_deletion_vector: FbIcebergDeletionVector;
// DV to add to metadata.
new_deletion_vector: FbIcebergDeletionVector;
}
table FbIcebergDeletionVector {
path: string;
content_offset: long;
content_size_in_bytes: long;
referenced_data_file_hash_high: long;
referenced_data_file_hash_low: long;
record_count: long;
// Total size of the Puffin file containing this deletion vector, in bytes.
file_size_in_bytes: long;
spec_id: ushort;
raw_partition_fields: [FbIcebergPartitionField];
}