| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "HoodieMetadataRecord", |
| "doc": "A record saved within the Metadata Table", |
| "fields": [ |
| { |
| "name": "key", |
| "type": "string" |
| }, |
| { |
| "name": "type", |
| "doc": "Type of the metadata record", |
| "type": "int" |
| }, |
| { |
| "doc": "Contains information about partitions and files within the dataset", |
| "name": "filesystemMetadata", |
| "type": [ |
| "null", |
| { |
| "type": "map", |
| "values": { |
| "type": "record", |
| "name": "HoodieMetadataFileInfo", |
| "fields": [ |
| { |
| "name": "size", |
| "type": "long", |
| "doc": "Size of the file" |
| }, |
| { |
| "name": "isDeleted", |
| "type": "boolean", |
| "doc": "True if this file has been deleted" |
| } |
| ] |
| } |
| } |
| ] |
| }, |
| { |
| "doc": "Metadata Index of bloom filters for all data files in the user table", |
| "name": "BloomFilterMetadata", |
| "type": [ |
| "null", |
| { |
| "doc": "Data file bloom filter details", |
| "name": "HoodieMetadataBloomFilter", |
| "type": "record", |
| "fields": [ |
| { |
| "doc": "Bloom filter type code", |
| "name": "type", |
| "type": "string" |
| }, |
| { |
| "doc": "Instant timestamp when this metadata was created/updated", |
| "name": "timestamp", |
| "type": "string" |
| }, |
| { |
| "doc": "Bloom filter binary byte array", |
| "name": "bloomFilter", |
| "type": "bytes" |
| }, |
| { |
| "doc": "Bloom filter entry valid/deleted flag", |
| "name": "isDeleted", |
| "type": "boolean" |
| } |
| ] |
| } |
| ], |
| "default" : null |
| }, |
| { |
| "doc": "Metadata Index of column statistics for all data files in the user table", |
| "name": "ColumnStatsMetadata", |
| "type": [ |
| "null", |
| { |
| "doc": "Data file column statistics", |
| "name": "HoodieMetadataColumnStats", |
| "type": "record", |
| "fields": [ |
| { |
| "doc": "File name for which this column statistics applies", |
| "name": "fileName", |
| "type": [ |
| "null", |
| "string" |
| ], |
| "default" : null |
| }, |
| { |
| "doc": "Column name for which this column statistics applies", |
| "name": "columnName", |
| "type": [ |
| "null", |
| "string" |
| ], |
| "default" : null |
| }, |
| { |
| "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type", |
| "name": "minValue", |
| "type": [ |
| // Those types should be aligned with Parquet `Statistics` impl |
| // making sure that we implement semantic consistent across file formats |
| // |
| // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted |
| // into one of the following types, making sure that their corresponding |
| // ordering is preserved |
| "null", |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "BooleanWrapper", |
| "doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "boolean", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "IntWrapper", |
| "doc": "A record wrapping int type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "int", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "LongWrapper", |
| "doc": "A record wrapping long type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "long", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "FloatWrapper", |
| "doc": "A record wrapping float type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "float", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "DoubleWrapper", |
| "doc": "A record wrapping double type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "double", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "BytesWrapper", |
| "doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "bytes", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "StringWrapper", |
| "doc": "A record wrapping string type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": "string", |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "DateWrapper", |
| "doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": { |
| "type": "int" |
| // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't |
| // rely on logical types to do proper encoding of the native Java types, |
| // and hereby have to encode statistic manually |
| //"logicalType": "date" |
| }, |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "DecimalWrapper", |
| "doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": { |
| "type": "bytes", |
| "logicalType": "decimal", |
| // NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should |
| // be enough for almost any possible use-cases |
| "precision": 30, |
| "scale": 15 |
| }, |
| "name": "value" |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "TimeMicrosWrapper", |
| "doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": { |
| "type": "long", |
| "logicalType": "time-micros" |
| }, |
| "name": "value" |
| |
| } |
| ] |
| }, |
| { |
| "namespace": "org.apache.hudi.avro.model", |
| "type": "record", |
| "name": "TimestampMicrosWrapper", |
| "doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union", |
| "fields": [ |
| { |
| "type": { |
| "type": "long" |
| // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't |
| // rely on logical types to do proper encoding of the native Java types, |
| // and hereby have to encode statistic manually |
| //"logicalType": "timestamp-micros" |
| }, |
| "name": "value" |
| } |
| ] |
| } |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type", |
| "name": "maxValue", |
| "type": [ |
| // Those types should be aligned with Parquet `Statistics` impl |
| // making sure that we implement semantic consistent across file formats |
| // |
| // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted |
| // into one of the following types, making sure that their corresponding |
| // ordering is preserved |
| "null", |
| "org.apache.hudi.avro.model.BooleanWrapper", |
| "org.apache.hudi.avro.model.IntWrapper", |
| "org.apache.hudi.avro.model.LongWrapper", |
| "org.apache.hudi.avro.model.FloatWrapper", |
| "org.apache.hudi.avro.model.DoubleWrapper", |
| "org.apache.hudi.avro.model.BytesWrapper", |
| "org.apache.hudi.avro.model.StringWrapper", |
| "org.apache.hudi.avro.model.DateWrapper", |
| "org.apache.hudi.avro.model.DecimalWrapper", |
| "org.apache.hudi.avro.model.TimeMicrosWrapper", |
| "org.apache.hudi.avro.model.TimestampMicrosWrapper" |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Total count of values", |
| "name": "valueCount", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Total count of null values", |
| "name": "nullCount", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Total storage size on disk", |
| "name": "totalSize", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Total uncompressed storage size on disk", |
| "name": "totalUncompressedSize", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null |
| }, |
| { |
| "doc": "Column range entry valid/deleted flag", |
| "name": "isDeleted", |
| "type": "boolean" |
| } |
| ] |
| } |
| ], |
| "default" : null |
| }, |
| { |
| "name": "recordIndexMetadata", |
| "doc": "Metadata Index that contains information about record keys and their location in the dataset", |
| "type": [ |
| "null", |
| { |
| "type": "record", |
| "name": "HoodieRecordIndexInfo", |
| "fields": [ |
| { |
| "name": "partitionName", |
| "type": [ |
| "null", |
| "string" |
| ], |
| "default": null, |
| "doc": "Refers to the partition name the record belongs to" |
| }, |
| { |
| "name": "fileIdHighBits", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null, |
| "doc": "Refers to high 64 bits if the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}." |
| }, |
| { |
| "name": "fileIdLowBits", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null, |
| "doc": "Refers to low 64 bits if the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}." |
| }, |
| { |
| "name": "fileIndex", |
| "type": [ |
| "null", |
| "int" |
| ], |
| "default": null, |
| "doc": "Index representing file index which is used to re-construct UUID based fileID. Applicable when the fileId is based on UUID format. \nA UUID based fileId is stored as 3 pieces in RLI (fileIdHighBits, fileIdLowBits and fileIndex). \nFileID format is {UUID}-{fileIndex}." |
| }, |
| { |
| "name": "fileId", |
| "type": [ |
| "null", |
| "string" |
| ], |
| "default" : null, |
| "doc": "Represents fileId of the location where record belongs to. When the encoding is 1, fileID is stored in raw string format." |
| }, |
| { |
| "name": "instantTime", |
| "type": [ |
| "null", |
| "long" |
| ], |
| "default": null, |
| "doc": "Epoch time in millisecond representing the commit time at which record was added" |
| }, |
| { |
| "name": "fileIdEncoding", |
| "type": "int", |
| "default": 0, |
| "doc": "Represents fileId encoding. Possible values are 0 and 1. O represents UUID based fileID, and 1 represents raw string format of the fileId. \nWhen the encoding is 0, reader can deduce fileID from fileIdLowBits, fileIdHighBits and fileIndex." |
| } |
| ] |
| } |
| ], |
| "default" : null |
| }, |
| { |
| "name": "SecondaryIndexMetadata", |
| "doc": "Metadata Index that contains information about secondary keys and the corresponding record keys in the dataset", |
| "type": [ |
| "null", |
| { |
| "type": "record", |
| "name": "HoodieSecondaryIndexInfo", |
| "fields": [ |
| { |
| "name": "recordKey", |
| "type": [ |
| "null", |
| "string" |
| ], |
| "default": null, |
| "doc": "Refers to the record key that this secondary key maps to" |
| }, |
| { |
| "name": "isDeleted", |
| "type": "boolean", |
| "doc": "True if this entry has been deleted" |
| } |
| ] |
| } |
| ], |
| "default" : null |
| } |
| ] |
| } |