blob: 4eefc3c37c815667660bc94c5e09db80d5474e9f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "HoodieMetadataRecord",
"doc": "A record saved within the Metadata Table",
"fields": [
{
"name": "key",
"type": "string"
},
{
"name": "type",
"doc": "Type of the metadata record",
"type": "int"
},
{
"doc": "Contains information about partitions and files within the dataset",
"name": "filesystemMetadata",
"type": [
"null",
{
"type": "map",
"values": {
"type": "record",
"name": "HoodieMetadataFileInfo",
"fields": [
{
"name": "size",
"type": "long",
"doc": "Size of the file"
},
{
"name": "isDeleted",
"type": "boolean",
"doc": "True if this file has been deleted"
}
]
}
}
]
},
{
"doc": "Metadata Index of bloom filters for all data files in the user table",
"name": "BloomFilterMetadata",
"type": [
"null",
{
"doc": "Data file bloom filter details",
"name": "HoodieMetadataBloomFilter",
"type": "record",
"fields": [
{
"doc": "Bloom filter type code",
"name": "type",
"type": "string"
},
{
"doc": "Instant timestamp when this metadata was created/updated",
"name": "timestamp",
"type": "string"
},
{
"doc": "Bloom filter binary byte array",
"name": "bloomFilter",
"type": "bytes"
},
{
"doc": "Bloom filter entry valid/deleted flag",
"name": "isDeleted",
"type": "boolean"
}
]
}
],
"default" : null
},
{
"doc": "Metadata Index of column statistics for all data files in the user table",
"name": "ColumnStatsMetadata",
"type": [
"null",
{
"doc": "Data file column statistics",
"name": "HoodieMetadataColumnStats",
"type": "record",
"fields": [
{
"doc": "File name for which this column statistics applies",
"name": "fileName",
"type": [
"null",
"string"
],
"default" : null
},
{
"doc": "Column name for which this column statistics applies",
"name": "columnName",
"type": [
"null",
"string"
],
"default" : null
},
{
"doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
"name": "minValue",
"type": [
// Those types should be aligned with Parquet `Statistics` impl
// making sure that we implement semantic consistent across file formats
//
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
// into one of the following types, making sure that their corresponding
// ordering is preserved
"null",
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "BooleanWrapper",
"doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "boolean",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "IntWrapper",
"doc": "A record wrapping int type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "int",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "LongWrapper",
"doc": "A record wrapping long type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "long",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "FloatWrapper",
"doc": "A record wrapping float type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "float",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "DoubleWrapper",
"doc": "A record wrapping double type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "double",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "BytesWrapper",
"doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "bytes",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "StringWrapper",
"doc": "A record wrapping string type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": "string",
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "DateWrapper",
"doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": {
"type": "int"
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
// rely on logical types to do proper encoding of the native Java types,
// and hereby have to encode statistic manually
//"logicalType": "date"
},
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "DecimalWrapper",
"doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": {
"type": "bytes",
"logicalType": "decimal",
// NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should
// be enough for almost any possible use-cases
"precision": 30,
"scale": 15
},
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "TimeMicrosWrapper",
"doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": {
"type": "long",
"logicalType": "time-micros"
},
"name": "value"
}
]
},
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "TimestampMicrosWrapper",
"doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union",
"fields": [
{
"type": {
"type": "long"
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
// rely on logical types to do proper encoding of the native Java types,
// and hereby have to encode statistic manually
//"logicalType": "timestamp-micros"
},
"name": "value"
}
]
}
],
"default": null
},
{
"doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
"name": "maxValue",
"type": [
// Those types should be aligned with Parquet `Statistics` impl
// making sure that we implement semantic consistent across file formats
//
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
// into one of the following types, making sure that their corresponding
// ordering is preserved
"null",
"org.apache.hudi.avro.model.BooleanWrapper",
"org.apache.hudi.avro.model.IntWrapper",
"org.apache.hudi.avro.model.LongWrapper",
"org.apache.hudi.avro.model.FloatWrapper",
"org.apache.hudi.avro.model.DoubleWrapper",
"org.apache.hudi.avro.model.BytesWrapper",
"org.apache.hudi.avro.model.StringWrapper",
"org.apache.hudi.avro.model.DateWrapper",
"org.apache.hudi.avro.model.DecimalWrapper",
"org.apache.hudi.avro.model.TimeMicrosWrapper",
"org.apache.hudi.avro.model.TimestampMicrosWrapper"
],
"default": null
},
{
"doc": "Total count of values",
"name": "valueCount",
"type": [
"null",
"long"
],
"default": null
},
{
"doc": "Total count of null values",
"name": "nullCount",
"type": [
"null",
"long"
],
"default": null
},
{
"doc": "Total storage size on disk",
"name": "totalSize",
"type": [
"null",
"long"
],
"default": null
},
{
"doc": "Total uncompressed storage size on disk",
"name": "totalUncompressedSize",
"type": [
"null",
"long"
],
"default": null
},
{
"doc": "Column range entry valid/deleted flag",
"name": "isDeleted",
"type": "boolean"
}
]
}
],
"default" : null
},
{
"name": "recordIndexMetadata",
"doc": "Metadata Index that contains information about record keys and their location in the dataset",
"type": [
"null",
{
"type": "record",
"name": "HoodieRecordIndexInfo",
"fields": [
{
"name": "partitionName",
"type": [
"null",
"string"
],
"default": null,
"doc": "Refers to the partition name the record belongs to"
},
{
"name": "fileIdHighBits",
"type": [
"null",
"long"
],
"default": null,
"doc": "Refers to high 64 bits if the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}."
},
{
"name": "fileIdLowBits",
"type": [
"null",
"long"
],
"default": null,
"doc": "Refers to low 64 bits if the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}."
},
{
"name": "fileIndex",
"type": [
"null",
"int"
],
"default": null,
"doc": "Index representing file index which is used to re-construct UUID based fileID. Applicable when the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}."
},
{
"name": "fileId",
"type": [
"null",
"string"
],
"default" : null,
"doc": "Represents fileId of the location where record belongs to. When the encoding is 1, fileID is stored in raw string format."
},
{
"name": "instantTime",
"type": [
"null",
"long"
],
"default": null,
"doc": "Epoch time in millisecond representing the commit time at which record was added"
},
{
"name": "fileIdEncoding",
"type": "int",
"default": 0,
"doc": "Represents fileId encoding. Possible values are 0 and 1. O represents UUID based fileID, and 1 represents raw string format of the fileId. \nWhen the encoding is 0, reader can deduce fileID from fileIdLowBits, fileIdHighBits and fileIndex."
}
]
}
],
"default" : null
},
{
"name": "SecondaryIndexMetadata",
"doc": "Metadata Index that contains information about secondary keys and the corresponding record keys in the dataset",
"type": [
"null",
{
"type": "record",
"name": "HoodieSecondaryIndexInfo",
"fields": [
{
"name": "recordKey",
"type": [
"null",
"string"
],
"default": null,
"doc": "Refers to the record key that this secondary key maps to"
},
{
"name": "isDeleted",
"type": "boolean",
"doc": "True if this entry has been deleted"
}
]
}
],
"default" : null
}
]
}