format/Schema.fbs - arrow-rs - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 /// Logical types, vector layouts, and schemas

 /// Format Version History.
 /// Version 1.0 - Forward and backwards compatibility guaranteed.
 /// Version 1.1 - Add Decimal256 (No format release).
 /// Version 1.2 (Pending)- Add Interval MONTH_DAY_NANO

 namespace org.apache.arrow.flatbuf;

 enum MetadataVersion:short {
   /// 0.1.0 (October 2016).
   V1,

   /// 0.2.0 (February 2017). Non-backwards compatible with V1.
   V2,

   /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
   V3,

   /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
   V4,

   /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
   /// metadata and IPC messages). Implementations are recommended to provide a
   /// V4 compatibility mode with V5 format changes disabled.
   ///
   /// Incompatible changes between V4 and V5:
   /// - Union buffer layout has changed. In V5, Unions don't have a validity
   ///   bitmap buffer.
   V5,
 }

 /// Represents Arrow Features that might not have full support
 /// within implementations. This is intended to be used in
 /// two scenarios:
 ///  1.  A mechanism for readers of Arrow Streams
 ///      and files to understand that the stream or file makes
 ///      use of a feature that isn't supported or unknown to
 ///      the implementation (and therefore can meet the Arrow
 ///      forward compatibility guarantees).
 ///  2.  A means of negotiating between a client and server
 ///      what features a stream is allowed to use. The enums
 ///      values here are intented to represent higher level
 ///      features, additional details maybe negotiated
 ///      with key-value pairs specific to the protocol.
 ///
 /// Enums added to this list should be assigned power-of-two values
 /// to facilitate exchanging and comparing bitmaps for supported
 /// features.
 enum Feature : long {
   /// Needed to make flatbuffers happy.
   UNUSED = 0,
   /// The stream makes use of multiple full dictionaries with the
   /// same ID and assumes clients implement dictionary replacement
   /// correctly.
   DICTIONARY_REPLACEMENT = 1,
   /// The stream makes use of compressed bodies as described
   /// in Message.fbs.
   COMPRESSED_BODY = 2
 }

 /// These are stored in the flatbuffer in the Type union below

 table Null {
 }

 /// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
 /// (according to the physical memory layout). We used Struct_ here as
 /// Struct is a reserved word in Flatbuffers
 table Struct_ {
 }

 table List {
 }

 /// Same as List, but with 64-bit offsets, allowing to represent
 /// extremely large data values.
 table LargeList {
 }

 table FixedSizeList {
   /// Number of list items per value
   listSize: int;
 }

 /// A Map is a logical nested type that is represented as
 ///
 /// List<entries: Struct<key: K, value: V>>
 ///
 /// In this layout, the keys and values are each respectively contiguous. We do
 /// not constrain the key and value types, so the application is responsible
 /// for ensuring that the keys are hashable and unique. Whether the keys are sorted
 /// may be set in the metadata for this field.
 ///
 /// In a field with Map type, the field has a child Struct field, which then
 /// has two children: key type and the second the value type. The names of the
 /// child fields may be respectively "entries", "key", and "value", but this is
 /// not enforced.
 ///
 /// Map
 /// ```text
 ///   - child[0] entries: Struct
 ///     - child[0] key: K
 ///     - child[1] value: V
 /// ```
 /// Neither the "entries" field nor the "key" field may be nullable.
 ///
 /// The metadata is structured so that Arrow systems without special handling
 /// for Map can make Map an alias for List. The "layout" attribute for the Map
 /// field must have the same contents as a List.
 table Map {
   /// Set to true if the keys within each value are sorted
   keysSorted: bool;
 }

 enum UnionMode:short { Sparse, Dense }

 /// A union is a complex type with children in Field
 /// By default ids in the type vector refer to the offsets in the children
 /// optionally typeIds provides an indirection between the child offset and the type id
 /// for each child `typeIds[offset]` is the id used in the type vector
 table Union {
   mode: UnionMode;
   typeIds: [ int ]; // optional, describes typeid of each child.
 }

 table Int {
   bitWidth: int; // restricted to 8, 16, 32, and 64 in v1
   is_signed: bool;
 }

 enum Precision:short {HALF, SINGLE, DOUBLE}

 table FloatingPoint {
   precision: Precision;
 }

 /// Unicode with UTF-8 encoding
 table Utf8 {
 }

 /// Opaque binary data
 table Binary {
 }

 /// Same as Utf8, but with 64-bit offsets, allowing to represent
 /// extremely large data values.
 table LargeUtf8 {
 }

 /// Same as Binary, but with 64-bit offsets, allowing to represent
 /// extremely large data values.
 table LargeBinary {
 }

 table FixedSizeBinary {
   /// Number of bytes per value
   byteWidth: int;
 }

 table Bool {
 }

 /// Exact decimal value represented as an integer value in two's
 /// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
 /// are used. The representation uses the endianness indicated
 /// in the Schema.
 table Decimal {
   /// Total number of decimal digits
   precision: int;

   /// Number of digits after the decimal point "."
   scale: int;

   /// Number of bits per value. The only accepted widths are 128 and 256.
   /// We use bitWidth for consistency with Int::bitWidth.
   bitWidth: int = 128;
 }

 enum DateUnit: short {
   DAY,
   MILLISECOND
 }

 /// Date is either a 32-bit or 64-bit signed integer type representing an
 /// elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
 ///
 /// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
 ///   leap seconds), where the values are evenly divisible by 86400000
 /// * Days (32 bits) since the UNIX epoch
 table Date {
   unit: DateUnit = MILLISECOND;
 }

 enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND }

 /// Time is either a 32-bit or 64-bit signed integer type representing an
 /// elapsed time since midnight, stored in either of four units: seconds,
 /// milliseconds, microseconds or nanoseconds.
 ///
 /// The integer `bitWidth` depends on the `unit` and must be one of the following:
 /// * SECOND and MILLISECOND: 32 bits
 /// * MICROSECOND and NANOSECOND: 64 bits
 ///
 /// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
 /// (exclusive), adjusted for the time unit (for example, up to 86400000
 /// exclusive for the MILLISECOND unit).
 /// This definition doesn't allow for leap seconds. Time values from
 /// measurements with leap seconds will need to be corrected when ingesting
 /// into Arrow (for example by replacing the value 86400 with 86399).
 table Time {
   unit: TimeUnit = MILLISECOND;
   bitWidth: int = 32;
 }

 /// Timestamp is a 64-bit signed integer representing an elapsed time since a
 /// fixed epoch, stored in either of four units: seconds, milliseconds,
 /// microseconds or nanoseconds, and is optionally annotated with a timezone.
 ///
 /// Timestamp values do not include any leap seconds (in other words, all
 /// days are considered 86400 seconds long).
 ///
 /// Timestamps with a non-empty timezone
 /// ------------------------------------
 ///
 /// If a Timestamp column has a non-empty timezone value, its epoch is
 /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
 /// (the Unix epoch), regardless of the Timestamp's own timezone.
 ///
 /// Therefore, timestamp values with a non-empty timezone correspond to
 /// physical points in time together with some additional information about
 /// how the data was obtained and/or how to display it (the timezone).
 ///
 ///   For example, the timestamp value 0 with the timezone string "Europe/Paris"
 ///   corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
 ///   application may prefer to display it as "January 1st 1970, 01h00" in
 ///   the Europe/Paris timezone (which is the same physical point in time).
 ///
 /// One consequence is that timestamp values with a non-empty timezone
 /// can be compared and ordered directly, since they all share the same
 /// well-known point of reference (the Unix epoch).
 ///
 /// Timestamps with an unset / empty timezone
 /// -----------------------------------------
 ///
 /// If a Timestamp column has no timezone value, its epoch is
 /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
 ///
 /// Therefore, timestamp values without a timezone cannot be meaningfully
 /// interpreted as physical points in time, but only as calendar / clock
 /// indications ("wall clock time") in an unspecified timezone.
 ///
 ///   For example, the timestamp value 0 with an empty timezone string
 ///   corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
 ///   is not enough information to interpret it as a well-defined physical
 ///   point in time.
 ///
 /// One consequence is that timestamp values without a timezone cannot
 /// be reliably compared or ordered, since they may have different points of
 /// reference.  In particular, it is *not* possible to interpret an unset
 /// or empty timezone as the same as "UTC".
 ///
 /// Conversion between timezones
 /// ----------------------------
 ///
 /// If a Timestamp column has a non-empty timezone, changing the timezone
 /// to a different non-empty value is a metadata-only operation:
 /// the timestamp values need not change as their point of reference remains
 /// the same (the Unix epoch).
 ///
 /// However, if a Timestamp column has no timezone value, changing it to a
 /// non-empty value requires to think about the desired semantics.
 /// One possibility is to assume that the original timestamp values are
 /// relative to the epoch of the timezone being set; timestamp values should
 /// then adjusted to the Unix epoch (for example, changing the timezone from
 /// empty to "Europe/Paris" would require converting the timestamp values
 /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
 /// nevertheless correct).
 ///
 /// Guidelines for encoding data from external libraries
 /// ----------------------------------------------------
 ///
 /// Date & time libraries often have multiple different data types for temporal
 /// data. In order to ease interoperability between different implementations the
 /// Arrow project has some recommendations for encoding these types into a Timestamp
 /// column.
 ///
 /// An "instant" represents a physical point in time that has no relevant timezone
 /// (for example, astronomical data). To encode an instant, use a Timestamp with
 /// the timezone string set to "UTC", and make sure the Timestamp values
 /// are relative to the UTC epoch (January 1st 1970, midnight).
 ///
 /// A "zoned date-time" represents a physical point in time annotated with an
 /// informative timezone (for example, the timezone in which the data was
 /// recorded).  To encode a zoned date-time, use a Timestamp with the timezone
 /// string set to the name of the timezone, and make sure the Timestamp values
 /// are relative to the UTC epoch (January 1st 1970, midnight).
 ///
 ///  (There is some ambiguity between an instant and a zoned date-time with the
 ///   UTC timezone.  Both of these are stored the same in Arrow.  Typically,
 ///   this distinction does not matter.  If it does, then an application should
 ///   use custom metadata or an extension type to distinguish between the two cases.)
 ///
 /// An "offset date-time" represents a physical point in time combined with an
 /// explicit offset from UTC.  To encode an offset date-time, use a Timestamp
 /// with the timezone string set to the numeric timezone offset string
 /// (e.g. "+03:00"), and make sure the Timestamp values are relative to
 /// the UTC epoch (January 1st 1970, midnight).
 ///
 /// A "naive date-time" (also called "local date-time" in some libraries)
 /// represents a wall clock time combined with a calendar date, but with
 /// no indication of how to map this information to a physical point in time.
 /// Naive date-times must be handled with care because of this missing
 /// information, and also because daylight saving time (DST) may make
 /// some values ambiguous or non-existent. A naive date-time may be
 /// stored as a struct with Date and Time fields. However, it may also be
 /// encoded into a Timestamp column with an empty timezone. The timestamp
 /// values should be computed "as if" the timezone of the date-time values
 /// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
 /// be encoded as timestamp value 0.
 table Timestamp {
   unit: TimeUnit;

   /// The timezone is an optional string indicating the name of a timezone,
   /// one of:
   ///
   /// * As used in the Olson timezone database (the "tz database" or
   ///   "tzdata"), such as "America/New_York".
   /// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
   ///   such as "+07:30".
   ///
   /// Whether a timezone string is present indicates different semantics about
   /// the data (see above).
   timezone: string;
 }

 enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
 // A "calendar" interval which models types that don't necessarily
 // have a precise duration without the context of a base timestamp (e.g.
 // days can differ in length during day light savings time transitions).
 // All integers in the types below are stored in the endianness indicated
 // by the schema.
 //
 // YEAR_MONTH - Indicates the number of elapsed whole months, stored as
 //   4-byte signed integers.
 // DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds),
 //   stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support
 //   of this IntervalUnit is not required for full arrow compatibility.
 // MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds.
 //  The values are stored contiguously in 16-byte blocks. Months and days are
 //  encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit
 //  signed integer. Nanoseconds does not allow for leap seconds. Each field is
 //  independent (e.g. there is no constraint that nanoseconds have the same
 //  sign as days or that the quantity of nanoseconds represents less than a
 //  day's worth of time).
 table Interval {
   unit: IntervalUnit;
 }

 // An absolute length of time unrelated to any calendar artifacts.
 //
 // For the purposes of Arrow Implementations, adding this value to a Timestamp
 // ("t1") naively (i.e. simply summing the two number) is acceptable even
 // though in some cases the resulting Timestamp (t2) would not account for
 // leap-seconds during the elapsed time between "t1" and "t2".  Similarly,
 // representing the difference between two Unix timestamp is acceptable, but
 // would yield a value that is possibly a few seconds off from the true elapsed
 // time.
 //
 //  The resolution defaults to millisecond, but can be any of the other
 //  supported TimeUnit values as with Timestamp and Time types.  This type is
 //  always represented as an 8-byte integer.
 table Duration {
   unit: TimeUnit = MILLISECOND;
 }

 /// ----------------------------------------------------------------------
 /// Top-level Type value, enabling extensible type-specific metadata. We can
 /// add new logical types to Type without breaking backwards compatibility

 union Type {
   Null,
   Int,
   FloatingPoint,
   Binary,
   Utf8,
   Bool,
   Decimal,
   Date,
   Time,
   Timestamp,
   Interval,
   List,
   Struct_,
   Union,
   FixedSizeBinary,
   FixedSizeList,
   Map,
   Duration,
   LargeBinary,
   LargeUtf8,
   LargeList,
 }

 /// ----------------------------------------------------------------------
 /// user defined key value pairs to add custom metadata to arrow
 /// key namespacing is the responsibility of the user

 table KeyValue {
   key: string;
   value: string;
 }

 /// ----------------------------------------------------------------------
 /// Dictionary encoding metadata
 /// Maintained for forwards compatibility, in the future
 /// Dictionaries might be explicit maps between integers and values
 /// allowing for non-contiguous index values
 enum DictionaryKind : short { DenseArray }
 table DictionaryEncoding {
   /// The known dictionary id in the application where this data is used. In
   /// the file or streaming formats, the dictionary ids are found in the
   /// DictionaryBatch messages
   id: long;

   /// The dictionary indices are constrained to be non-negative integers. If
   /// this field is null, the indices must be signed int32. To maximize
   /// cross-language compatibility and performance, implementations are
   /// recommended to prefer signed integer types over unsigned integer types
   /// and to avoid uint64 indices unless they are required by an application.
   indexType: Int;

   /// By default, dictionaries are not ordered, or the order does not have
   /// semantic meaning. In some statistical, applications, dictionary-encoding
   /// is used to represent ordered categorical data, and we provide a way to
   /// preserve that metadata here
   isOrdered: bool;

   dictionaryKind: DictionaryKind;
 }

 /// ----------------------------------------------------------------------
 /// A field represents a named column in a record / row batch or child of a
 /// nested type.

 table Field {
   /// Name is not required, in i.e. a List
   name: string;

   /// Whether or not this field can contain nulls. Should be true in general.
   nullable: bool;

   /// This is the type of the decoded value if the field is dictionary encoded.
   type: Type;

   /// Present only if the field is dictionary encoded.
   dictionary: DictionaryEncoding;

   /// children apply only to nested data types like Struct, List and Union. For
   /// primitive types children will have length 0.
   children: [ Field ];

   /// User-defined metadata
   custom_metadata: [ KeyValue ];
 }

 /// ----------------------------------------------------------------------
 /// Endianness of the platform producing the data

 enum Endianness:short { Little, Big }

 /// ----------------------------------------------------------------------
 /// A Buffer represents a single contiguous memory segment
 struct Buffer {
   /// The relative offset into the shared memory page where the bytes for this
   /// buffer starts
   offset: long;

   /// The absolute length (in bytes) of the memory buffer. The memory is found
   /// from offset (inclusive) to offset + length (non-inclusive). When building
   /// messages using the encapsulated IPC message, padding bytes may be written
   /// after a buffer, but such padding bytes do not need to be accounted for in
   /// the size here.
   length: long;
 }

 /// ----------------------------------------------------------------------
 /// A Schema describes the columns in a row batch

 table Schema {

   /// endianness of the buffer
   /// it is Little Endian by default
   /// if endianness doesn't match the underlying system then the vectors need to be converted
   endianness: Endianness=Little;

   fields: [Field];
   // User-defined metadata
   custom_metadata: [ KeyValue ];

   /// Features used in the stream/file.
   features : [ Feature ];
 }

 root_type Schema;
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	/// Logical types, vector layouts, and schemas

	/// Format Version History.
	/// Version 1.0 - Forward and backwards compatibility guaranteed.
	/// Version 1.1 - Add Decimal256 (No format release).
	/// Version 1.2 (Pending)- Add Interval MONTH_DAY_NANO

	namespace org.apache.arrow.flatbuf;

	enum MetadataVersion:short {
	/// 0.1.0 (October 2016).
	V1,

	/// 0.2.0 (February 2017). Non-backwards compatible with V1.
	V2,

	/// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
	V3,

	/// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
	V4,

	/// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
	/// metadata and IPC messages). Implementations are recommended to provide a
	/// V4 compatibility mode with V5 format changes disabled.
	///
	/// Incompatible changes between V4 and V5:
	/// - Union buffer layout has changed. In V5, Unions don't have a validity
	/// bitmap buffer.
	V5,
	}

	/// Represents Arrow Features that might not have full support
	/// within implementations. This is intended to be used in
	/// two scenarios:
	/// 1. A mechanism for readers of Arrow Streams
	/// and files to understand that the stream or file makes
	/// use of a feature that isn't supported or unknown to
	/// the implementation (and therefore can meet the Arrow
	/// forward compatibility guarantees).
	/// 2. A means of negotiating between a client and server
	/// what features a stream is allowed to use. The enums
	/// values here are intented to represent higher level
	/// features, additional details maybe negotiated
	/// with key-value pairs specific to the protocol.
	///
	/// Enums added to this list should be assigned power-of-two values
	/// to facilitate exchanging and comparing bitmaps for supported
	/// features.
	enum Feature : long {
	/// Needed to make flatbuffers happy.
	UNUSED = 0,
	/// The stream makes use of multiple full dictionaries with the
	/// same ID and assumes clients implement dictionary replacement
	/// correctly.
	DICTIONARY_REPLACEMENT = 1,
	/// The stream makes use of compressed bodies as described
	/// in Message.fbs.
	COMPRESSED_BODY = 2
	}

	/// These are stored in the flatbuffer in the Type union below

	table Null {
	}

	/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
	/// (according to the physical memory layout). We used Struct_ here as
	/// Struct is a reserved word in Flatbuffers
	table Struct_ {
	}

	table List {
	}

	/// Same as List, but with 64-bit offsets, allowing to represent
	/// extremely large data values.
	table LargeList {
	}

	table FixedSizeList {
	/// Number of list items per value
	listSize: int;
	}

	/// A Map is a logical nested type that is represented as
	///
	/// List<entries: Struct<key: K, value: V>>
	///
	/// In this layout, the keys and values are each respectively contiguous. We do
	/// not constrain the key and value types, so the application is responsible
	/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
	/// may be set in the metadata for this field.
	///
	/// In a field with Map type, the field has a child Struct field, which then
	/// has two children: key type and the second the value type. The names of the
	/// child fields may be respectively "entries", "key", and "value", but this is
	/// not enforced.
	///
	/// Map
	/// ```text
	/// - child[0] entries: Struct
	/// - child[0] key: K
	/// - child[1] value: V
	/// ```
	/// Neither the "entries" field nor the "key" field may be nullable.
	///
	/// The metadata is structured so that Arrow systems without special handling
	/// for Map can make Map an alias for List. The "layout" attribute for the Map
	/// field must have the same contents as a List.
	table Map {
	/// Set to true if the keys within each value are sorted
	keysSorted: bool;
	}

	enum UnionMode:short { Sparse, Dense }

	/// A union is a complex type with children in Field
	/// By default ids in the type vector refer to the offsets in the children
	/// optionally typeIds provides an indirection between the child offset and the type id
	/// for each child `typeIds[offset]` is the id used in the type vector
	table Union {
	mode: UnionMode;
	typeIds: [ int ]; // optional, describes typeid of each child.
	}

	table Int {
	bitWidth: int; // restricted to 8, 16, 32, and 64 in v1
	is_signed: bool;
	}

	enum Precision:short {HALF, SINGLE, DOUBLE}

	table FloatingPoint {
	precision: Precision;
	}

	/// Unicode with UTF-8 encoding
	table Utf8 {
	}

	/// Opaque binary data
	table Binary {
	}

	/// Same as Utf8, but with 64-bit offsets, allowing to represent
	/// extremely large data values.
	table LargeUtf8 {
	}

	/// Same as Binary, but with 64-bit offsets, allowing to represent
	/// extremely large data values.
	table LargeBinary {
	}

	table FixedSizeBinary {
	/// Number of bytes per value
	byteWidth: int;
	}

	table Bool {
	}

	/// Exact decimal value represented as an integer value in two's
	/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
	/// are used. The representation uses the endianness indicated
	/// in the Schema.
	table Decimal {
	/// Total number of decimal digits
	precision: int;

	/// Number of digits after the decimal point "."
	scale: int;

	/// Number of bits per value. The only accepted widths are 128 and 256.
	/// We use bitWidth for consistency with Int::bitWidth.
	bitWidth: int = 128;
	}

	enum DateUnit: short {
	DAY,
	MILLISECOND
	}

	/// Date is either a 32-bit or 64-bit signed integer type representing an
	/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
	///
	/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
	/// leap seconds), where the values are evenly divisible by 86400000
	/// * Days (32 bits) since the UNIX epoch
	table Date {
	unit: DateUnit = MILLISECOND;
	}

	enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND }

	/// Time is either a 32-bit or 64-bit signed integer type representing an
	/// elapsed time since midnight, stored in either of four units: seconds,
	/// milliseconds, microseconds or nanoseconds.
	///
	/// The integer `bitWidth` depends on the `unit` and must be one of the following:
	/// * SECOND and MILLISECOND: 32 bits
	/// * MICROSECOND and NANOSECOND: 64 bits
	///
	/// The allowed values are between 0 (inclusive) and 86400 (=246060) seconds
	/// (exclusive), adjusted for the time unit (for example, up to 86400000
	/// exclusive for the MILLISECOND unit).
	/// This definition doesn't allow for leap seconds. Time values from
	/// measurements with leap seconds will need to be corrected when ingesting
	/// into Arrow (for example by replacing the value 86400 with 86399).
	table Time {
	unit: TimeUnit = MILLISECOND;
	bitWidth: int = 32;
	}

	/// Timestamp is a 64-bit signed integer representing an elapsed time since a
	/// fixed epoch, stored in either of four units: seconds, milliseconds,
	/// microseconds or nanoseconds, and is optionally annotated with a timezone.
	///
	/// Timestamp values do not include any leap seconds (in other words, all
	/// days are considered 86400 seconds long).
	///
	/// Timestamps with a non-empty timezone
	/// ------------------------------------
	///
	/// If a Timestamp column has a non-empty timezone value, its epoch is
	/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the UTC timezone
	/// (the Unix epoch), regardless of the Timestamp's own timezone.
	///
	/// Therefore, timestamp values with a non-empty timezone correspond to
	/// physical points in time together with some additional information about
	/// how the data was obtained and/or how to display it (the timezone).
	///
	/// For example, the timestamp value 0 with the timezone string "Europe/Paris"
	/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
	/// application may prefer to display it as "January 1st 1970, 01h00" in
	/// the Europe/Paris timezone (which is the same physical point in time).
	///
	/// One consequence is that timestamp values with a non-empty timezone
	/// can be compared and ordered directly, since they all share the same
	/// well-known point of reference (the Unix epoch).
	///
	/// Timestamps with an unset / empty timezone
	/// -----------------------------------------
	///
	/// If a Timestamp column has no timezone value, its epoch is
	/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an unknown timezone.
	///
	/// Therefore, timestamp values without a timezone cannot be meaningfully
	/// interpreted as physical points in time, but only as calendar / clock
	/// indications ("wall clock time") in an unspecified timezone.
	///
	/// For example, the timestamp value 0 with an empty timezone string
	/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
	/// is not enough information to interpret it as a well-defined physical
	/// point in time.
	///
	/// One consequence is that timestamp values without a timezone cannot
	/// be reliably compared or ordered, since they may have different points of
	/// reference. In particular, it is not possible to interpret an unset
	/// or empty timezone as the same as "UTC".
	///
	/// Conversion between timezones
	/// ----------------------------
	///
	/// If a Timestamp column has a non-empty timezone, changing the timezone
	/// to a different non-empty value is a metadata-only operation:
	/// the timestamp values need not change as their point of reference remains
	/// the same (the Unix epoch).
	///
	/// However, if a Timestamp column has no timezone value, changing it to a
	/// non-empty value requires to think about the desired semantics.
	/// One possibility is to assume that the original timestamp values are
	/// relative to the epoch of the timezone being set; timestamp values should
	/// then adjusted to the Unix epoch (for example, changing the timezone from
	/// empty to "Europe/Paris" would require converting the timestamp values
	/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
	/// nevertheless correct).
	///
	/// Guidelines for encoding data from external libraries
	/// ----------------------------------------------------
	///
	/// Date & time libraries often have multiple different data types for temporal
	/// data. In order to ease interoperability between different implementations the
	/// Arrow project has some recommendations for encoding these types into a Timestamp
	/// column.
	///
	/// An "instant" represents a physical point in time that has no relevant timezone
	/// (for example, astronomical data). To encode an instant, use a Timestamp with
	/// the timezone string set to "UTC", and make sure the Timestamp values
	/// are relative to the UTC epoch (January 1st 1970, midnight).
	///
	/// A "zoned date-time" represents a physical point in time annotated with an
	/// informative timezone (for example, the timezone in which the data was
	/// recorded). To encode a zoned date-time, use a Timestamp with the timezone
	/// string set to the name of the timezone, and make sure the Timestamp values
	/// are relative to the UTC epoch (January 1st 1970, midnight).
	///
	/// (There is some ambiguity between an instant and a zoned date-time with the
	/// UTC timezone. Both of these are stored the same in Arrow. Typically,
	/// this distinction does not matter. If it does, then an application should
	/// use custom metadata or an extension type to distinguish between the two cases.)
	///
	/// An "offset date-time" represents a physical point in time combined with an
	/// explicit offset from UTC. To encode an offset date-time, use a Timestamp
	/// with the timezone string set to the numeric timezone offset string
	/// (e.g. "+03:00"), and make sure the Timestamp values are relative to
	/// the UTC epoch (January 1st 1970, midnight).
	///
	/// A "naive date-time" (also called "local date-time" in some libraries)
	/// represents a wall clock time combined with a calendar date, but with
	/// no indication of how to map this information to a physical point in time.
	/// Naive date-times must be handled with care because of this missing
	/// information, and also because daylight saving time (DST) may make
	/// some values ambiguous or non-existent. A naive date-time may be
	/// stored as a struct with Date and Time fields. However, it may also be
	/// encoded into a Timestamp column with an empty timezone. The timestamp
	/// values should be computed "as if" the timezone of the date-time values
	/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
	/// be encoded as timestamp value 0.
	table Timestamp {
	unit: TimeUnit;

	/// The timezone is an optional string indicating the name of a timezone,
	/// one of:
	///
	/// * As used in the Olson timezone database (the "tz database" or
	/// "tzdata"), such as "America/New_York".
	/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
	/// such as "+07:30".
	///
	/// Whether a timezone string is present indicates different semantics about
	/// the data (see above).
	timezone: string;
	}

	enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
	// A "calendar" interval which models types that don't necessarily
	// have a precise duration without the context of a base timestamp (e.g.
	// days can differ in length during day light savings time transitions).
	// All integers in the types below are stored in the endianness indicated
	// by the schema.
	//
	// YEAR_MONTH - Indicates the number of elapsed whole months, stored as
	// 4-byte signed integers.
	// DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds),
	// stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support
	// of this IntervalUnit is not required for full arrow compatibility.
	// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds.
	// The values are stored contiguously in 16-byte blocks. Months and days are
	// encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit
	// signed integer. Nanoseconds does not allow for leap seconds. Each field is
	// independent (e.g. there is no constraint that nanoseconds have the same
	// sign as days or that the quantity of nanoseconds represents less than a
	// day's worth of time).
	table Interval {
	unit: IntervalUnit;
	}

	// An absolute length of time unrelated to any calendar artifacts.
	//
	// For the purposes of Arrow Implementations, adding this value to a Timestamp
	// ("t1") naively (i.e. simply summing the two number) is acceptable even
	// though in some cases the resulting Timestamp (t2) would not account for
	// leap-seconds during the elapsed time between "t1" and "t2". Similarly,
	// representing the difference between two Unix timestamp is acceptable, but
	// would yield a value that is possibly a few seconds off from the true elapsed
	// time.
	//
	// The resolution defaults to millisecond, but can be any of the other
	// supported TimeUnit values as with Timestamp and Time types. This type is
	// always represented as an 8-byte integer.
	table Duration {
	unit: TimeUnit = MILLISECOND;
	}

	/// ----------------------------------------------------------------------
	/// Top-level Type value, enabling extensible type-specific metadata. We can
	/// add new logical types to Type without breaking backwards compatibility

	union Type {
	Null,
	Int,
	FloatingPoint,
	Binary,
	Utf8,
	Bool,
	Decimal,
	Date,
	Time,
	Timestamp,
	Interval,
	List,
	Struct_,
	Union,
	FixedSizeBinary,
	FixedSizeList,
	Map,
	Duration,
	LargeBinary,
	LargeUtf8,
	LargeList,
	}

	/// ----------------------------------------------------------------------
	/// user defined key value pairs to add custom metadata to arrow
	/// key namespacing is the responsibility of the user

	table KeyValue {
	key: string;
	value: string;
	}

	/// ----------------------------------------------------------------------
	/// Dictionary encoding metadata
	/// Maintained for forwards compatibility, in the future
	/// Dictionaries might be explicit maps between integers and values
	/// allowing for non-contiguous index values
	enum DictionaryKind : short { DenseArray }
	table DictionaryEncoding {
	/// The known dictionary id in the application where this data is used. In
	/// the file or streaming formats, the dictionary ids are found in the
	/// DictionaryBatch messages
	id: long;

	/// The dictionary indices are constrained to be non-negative integers. If
	/// this field is null, the indices must be signed int32. To maximize
	/// cross-language compatibility and performance, implementations are
	/// recommended to prefer signed integer types over unsigned integer types
	/// and to avoid uint64 indices unless they are required by an application.
	indexType: Int;

	/// By default, dictionaries are not ordered, or the order does not have
	/// semantic meaning. In some statistical, applications, dictionary-encoding
	/// is used to represent ordered categorical data, and we provide a way to
	/// preserve that metadata here
	isOrdered: bool;

	dictionaryKind: DictionaryKind;
	}

	/// ----------------------------------------------------------------------
	/// A field represents a named column in a record / row batch or child of a
	/// nested type.

	table Field {
	/// Name is not required, in i.e. a List
	name: string;

	/// Whether or not this field can contain nulls. Should be true in general.
	nullable: bool;

	/// This is the type of the decoded value if the field is dictionary encoded.
	type: Type;

	/// Present only if the field is dictionary encoded.
	dictionary: DictionaryEncoding;

	/// children apply only to nested data types like Struct, List and Union. For
	/// primitive types children will have length 0.
	children: [ Field ];

	/// User-defined metadata
	custom_metadata: [ KeyValue ];
	}

	/// ----------------------------------------------------------------------
	/// Endianness of the platform producing the data

	enum Endianness:short { Little, Big }

	/// ----------------------------------------------------------------------
	/// A Buffer represents a single contiguous memory segment
	struct Buffer {
	/// The relative offset into the shared memory page where the bytes for this
	/// buffer starts
	offset: long;

	/// The absolute length (in bytes) of the memory buffer. The memory is found
	/// from offset (inclusive) to offset + length (non-inclusive). When building
	/// messages using the encapsulated IPC message, padding bytes may be written
	/// after a buffer, but such padding bytes do not need to be accounted for in
	/// the size here.
	length: long;
	}

	/// ----------------------------------------------------------------------
	/// A Schema describes the columns in a row batch

	table Schema {

	/// endianness of the buffer
	/// it is Little Endian by default
	/// if endianness doesn't match the underlying system then the vectors need to be converted
	endianness: Endianness=Little;

	fields: [Field];
	// User-defined metadata
	custom_metadata: [ KeyValue ];

	/// Features used in the stream/file.
	features : [ Feature ];
	}

	root_type Schema;