blob: 893b7b2b6d4574019209bad9ab098a3f42b45ce6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package internal
import ""
const (
ManifestListV1Key = "manifest-list-v1"
ManifestListV2Key = "manifest-list-v2"
ManifestEntryV1Key = "manifest-entry-v1"
ManifestEntryV2Key = "manifest-entry-v2"
var (
AvroSchemaCache avro.SchemaCache
func init() {
AvroSchemaCache.Add(ManifestListV1Key, avro.MustParse(`{
"type": "record",
"name": "manifest_file",
"fields": [
{"name": "manifest_path", "type": "string", "doc": "Location URI with FS scheme", "field-id": 500},
{"name": "manifest_length", "type": "long", "doc": "Total file size in bytes", "field-id": 501},
{"name": "partition_spec_id", "type": "int", "doc": "Spec ID used to write", "field-id": 502},
"name": "added_snapshot_id",
"type": "long",
"doc": "Snapshot ID that added the manifest",
"field-id": 503
"name": "added_data_files_count",
"type": ["null", "int"],
"doc": "Added entry count",
"field-id": 504
"name": "existing_data_files_count",
"type": ["null", "int"],
"doc": "Existing entry count",
"field-id": 505
"name": "deleted_data_files_count",
"type": ["null", "int"],
"doc": "Deleted entry count",
"field-id": 506
"name": "partitions",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "r508",
"fields": [
"name": "contains_null",
"type": "boolean",
"doc": "True if any file has a null partition value",
"field-id": 509
"name": "contains_nan",
"type": ["null", "boolean"],
"doc": "True if any file has a nan partition value",
"field-id": 518
"name": "lower_bound",
"type": ["null", "bytes"],
"doc": "Partition lower bound for all files",
"field-id": 510
"name": "upper_bound",
"type": ["null", "bytes"],
"doc": "Partition upper bound for all files",
"field-id": 511
"element-id": 508
"doc": "Summary for each partition",
"field-id": 507
{"name": "added_rows_count", "type": ["null", "long"], "doc": "Added rows count", "field-id": 512},
"name": "existing_rows_count",
"type": ["null", "long"],
"doc": "Existing rows count",
"field-id": 513
"name": "deleted_rows_count",
"type": ["null", "long"],
"doc": "Deleted rows count",
"field-id": 514
AvroSchemaCache.Add(ManifestListV2Key, avro.MustParse(`{
"type": "record",
"name": "manifest_file",
"fields": [
{"name": "manifest_path", "type": "string", "doc": "Location URI with FS scheme", "field-id": 500},
{"name": "manifest_length", "type": "long", "doc": "Total file size in bytes", "field-id": 501},
{"name": "partition_spec_id", "type": "int", "doc": "Spec ID used to write", "field-id": 502},
{"name": "content", "type": "int", "doc": "Contents of the manifest: 0=data, 1=deletes", "field-id": 517},
"name": "sequence_number",
"type": "long",
"doc": "Sequence number when the manifest was added",
"field-id": 515
"name": "min_sequence_number",
"type": "long",
"doc": "Lowest sequence number in the manifest",
"field-id": 516
{"name": "added_snapshot_id", "type": "long", "doc": "Snapshot ID that added the manifest", "field-id": 503},
{"name": "added_files_count", "type": "int", "doc": "Added entry count", "field-id": 504},
{"name": "existing_files_count", "type": "int", "doc": "Existing entry count", "field-id": 505},
{"name": "deleted_files_count", "type": "int", "doc": "Deleted entry count", "field-id": 506},
{"name": "added_rows_count", "type": "long", "doc": "Added rows count", "field-id": 512},
{"name": "existing_rows_count", "type": "long", "doc": "Existing rows count", "field-id": 513},
{"name": "deleted_rows_count", "type": "long", "doc": "Deleted rows count", "field-id": 514},
"name": "partitions",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "r508",
"fields": [
"name": "contains_null",
"type": "boolean",
"doc": "True if any file has a null partition value",
"field-id": 509
"name": "contains_nan",
"type": ["null", "boolean"],
"doc": "True if any file has a nan partition value",
"field-id": 518
"name": "lower_bound",
"type": ["null", "bytes"],
"doc": "Partition lower bound for all files",
"field-id": 510
"name": "upper_bound",
"type": ["null", "bytes"],
"doc": "Partition upper bound for all files",
"field-id": 511
"element-id": 508
"doc": "Summary for each partition",
"field-id": 507
AvroSchemaCache.Add(ManifestEntryV1Key, avro.MustParse(`{
"type": "record",
"name": "manifest_entry",
"fields": [
{"name": "status", "type": "int", "field-id": 0},
{"name": "snapshot_id", "type": "long", "field-id": 1},
"name": "data_file",
"type": {
"type": "record",
"name": "r2",
"fields": [
{"name": "file_path", "type": "string", "doc": "Location URI with FS scheme", "field-id": 100},
"name": "file_format",
"type": "string",
"doc": "File format name: avro, orc, or parquet",
"field-id": 101
"name": "partition",
"type": {
"type": "record",
"name": "r102",
"fields": [
{"field-id": 1000, "name": "VendorID", "type": ["null", "int"]},
"field-id": 1001,
"name": "tpep_pickup_datetime",
"type": ["null", {"type": "int", "logicalType": "date"}]
"field-id": 102
{"name": "record_count", "type": "long", "doc": "Number of records in the file", "field-id": 103},
{"name": "file_size_in_bytes", "type": "long", "doc": "Total file size in bytes", "field-id": 104},
{"name": "block_size_in_bytes", "type": "long", "field-id": 105},
"name": "column_sizes",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k117_v118",
"fields": [
{"name": "key", "type": "int", "field-id": 117},
{"name": "value", "type": "long", "field-id": 118}
"logicalType": "map"
"doc": "Map of column id to total size on disk",
"field-id": 108
"name": "value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k119_v120",
"fields": [
{"name": "key", "type": "int", "field-id": 119},
{"name": "value", "type": "long", "field-id": 120}
"logicalType": "map"
"doc": "Map of column id to total count, including null and NaN",
"field-id": 109
"name": "null_value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k121_v122",
"fields": [
{"name": "key", "type": "int", "field-id": 121},
{"name": "value", "type": "long", "field-id": 122}
"logicalType": "map"
"doc": "Map of column id to null value count",
"field-id": 110
"name": "nan_value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k138_v139",
"fields": [
{"name": "key", "type": "int", "field-id": 138},
{"name": "value", "type": "long", "field-id": 139}
"logicalType": "map"
"doc": "Map of column id to number of NaN values in the column",
"field-id": 137
"name": "lower_bounds",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k126_v127",
"fields": [
{"name": "key", "type": "int", "field-id": 126},
{"name": "value", "type": "bytes", "field-id": 127}
"logicalType": "map"
"doc": "Map of column id to lower bound",
"field-id": 125
"name": "upper_bounds",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k129_v130",
"fields": [
{"name": "key", "type": "int", "field-id": 129},
{"name": "value", "type": "bytes", "field-id": 130}
"logicalType": "map"
"doc": "Map of column id to upper bound",
"field-id": 128
"name": "key_metadata",
"type": ["null", "bytes"],
"doc": "Encryption key metadata blob",
"field-id": 131
"name": "split_offsets",
"type": ["null", {"type": "array", "items": "long", "element-id": 133}],
"doc": "Splittable offsets",
"field-id": 132
"name": "sort_order_id",
"type": ["null", "int"],
"doc": "Sort order ID",
"field-id": 140
"field-id": 2
AvroSchemaCache.Add(ManifestEntryV2Key, avro.MustParse(`{
"type": "record",
"name": "manifest_entry",
"fields": [
{"name": "status", "type": "int", "field-id": 0},
{"name": "snapshot_id", "type": ["null", "long"], "field-id": 1},
{"name": "sequence_number", "type": ["null", "long"], "field-id": 3},
{"name": "file_sequence_number", "type": ["null", "long"], "field-id": 4},
"name": "data_file",
"type": {
"type": "record",
"name": "r2",
"fields": [
{"name": "content", "type": "int", "doc": "Type of content stored by the data file", "field-id": 134},
{"name": "file_path", "type": "string", "doc": "Location URI with FS scheme", "field-id": 100},
"name": "file_format",
"type": "string",
"doc": "File format name: avro, orc, or parquet",
"field-id": 101
"name": "partition",
"type": {
"type": "record",
"name": "r102",
"fields": [
{"field-id": 1000, "name": "VendorID", "type": ["null", "int"]},
"field-id": 1001,
"name": "tpep_pickup_datetime",
"type": ["null", {"type": "int", "logicalType": "date"}]
"field-id": 102
{"name": "record_count", "type": "long", "doc": "Number of records in the file", "field-id": 103},
{"name": "file_size_in_bytes", "type": "long", "doc": "Total file size in bytes", "field-id": 104},
"name": "column_sizes",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k117_v118",
"fields": [
{"name": "key", "type": "int", "field-id": 117},
{"name": "value", "type": "long", "field-id": 118}
"logicalType": "map"
"doc": "Map of column id to total size on disk",
"field-id": 108
"name": "value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k119_v120",
"fields": [
{"name": "key", "type": "int", "field-id": 119},
{"name": "value", "type": "long", "field-id": 120}
"logicalType": "map"
"doc": "Map of column id to total count, including null and NaN",
"field-id": 109
"name": "null_value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k121_v122",
"fields": [
{"name": "key", "type": "int", "field-id": 121},
{"name": "value", "type": "long", "field-id": 122}
"logicalType": "map"
"doc": "Map of column id to null value count",
"field-id": 110
"name": "nan_value_counts",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k138_v139",
"fields": [
{"name": "key", "type": "int", "field-id": 138},
{"name": "value", "type": "long", "field-id": 139}
"logicalType": "map"
"doc": "Map of column id to number of NaN values in the column",
"field-id": 137
"name": "lower_bounds",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k126_v127",
"fields": [
{"name": "key", "type": "int", "field-id": 126},
{"name": "value", "type": "bytes", "field-id": 127}
"logicalType": "map"
"doc": "Map of column id to lower bound",
"field-id": 125
"name": "upper_bounds",
"type": [
"type": "array",
"items": {
"type": "record",
"name": "k129_v130",
"fields": [
{"name": "key", "type": "int", "field-id": 129},
{"name": "value", "type": "bytes", "field-id": 130}
"logicalType": "map"
"doc": "Map of column id to upper bound",
"field-id": 128
"name": "key_metadata",
"type": ["null", "bytes"],
"doc": "Encryption key metadata blob",
"field-id": 131
"name": "split_offsets",
"type": ["null", {"type": "array", "items": "long", "element-id": 133}],
"doc": "Splittable offsets",
"field-id": 132
"name": "equality_ids",
"type": ["null", {"type": "array", "items": "int", "element-id": 136}],
"doc": "Field ids used to determine row equality for delete files",
"field-id": 135
"name": "sort_order_id",
"type": ["null", "int"],
"doc": "Sort order ID",
"field-id": 140
"field-id": 2