format/Message.fbs - arrow - Git at Google

 namespace apache.arrow.flatbuf;

 /// ----------------------------------------------------------------------
 /// Logical types and their metadata (if any)
 ///
 /// These are stored in the flatbuffer in the Type union below

 /// A Tuple in the flatbuffer metadata is the same as an Arrow Struct
 /// (according to the physical memory layout). We used Tuple here as Struct is
 /// a reserved word in Flatbuffers
 table Tuple {
 }

 table List {
 }

 enum UnionMode:int { Sparse, Dense }

 table Union {
   mode: UnionMode;
 }

 table Bit {
 }

 table Int {
   bitWidth: int; // 1 to 64
   is_signed: bool;
 }

 enum Precision:int {SINGLE, DOUBLE}

 table FloatingPoint {
   precision: Precision;
 }

 table Utf8 {
 }

 table Binary {
 }

 table Bool {
 }

 table Decimal {
   precision: int;
   scale: int;
 }

 table Timestamp {
   timezone: string;
 }

 table JSONScalar {
   dense:bool=true;
 }

 /// ----------------------------------------------------------------------
 /// Top-level Type value, enabling extensible type-specific metadata. We can
 /// add new logical types to Type without breaking backwards compatibility

 union Type {
   Int,
   Bit,
   FloatingPoint,
   Binary,
   Utf8,
   Bool,
   Decimal,
   Timestamp,
   List,
   Tuple,
   Union,
   JSONScalar
 }

 /// ----------------------------------------------------------------------
 /// A field represents a named column in a record / row batch or child of a
 /// nested type.
 ///
 /// - children is only for nested Arrow arrays
 /// - For primitive types, children will have length 0
 /// - nullable should default to true in general

 table Field {
   // Name is not required, in i.e. a List
   name: string;
   nullable: bool;
   type: Type;
   children: [Field];
 }

 /// ----------------------------------------------------------------------
 /// A Schema describes the columns in a row batch

 table Schema {
   fields: [Field];
 }

 /// ----------------------------------------------------------------------
 /// Data structures for describing a table row batch (a collection of
 /// equal-length Arrow arrays)

 /// A Buffer represents a single contiguous memory segment
 struct Buffer {
   /// The shared memory page id where this buffer is located. Currently this is
   /// not used
   page: int;

   /// The relative offset into the shared memory page where the bytes for this
   /// buffer starts
   offset: long;

   /// The absolute length (in bytes) of the memory buffer. The memory is found
   /// from offset (inclusive) to offset + length (non-inclusive).
   length: long;
 }

 /// Metadata about a field at some level of a nested type tree (but not
 /// its children).
 ///
 /// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
 /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
 /// null_count: 0} for its Int16 node, as separate FieldNode structs
 struct FieldNode {
   /// The number of value slots in the Arrow array at this level of a nested
   /// tree
   length: int;

   /// The number of observed nulls. Fields with null_count == 0 may choose not
   /// to write their physical validity bitmap out as a materialized buffer,
   /// instead setting the length of the bitmap buffer to 0.
   null_count: int;
 }

 /// A data header describing the shared memory layout of a "record" or "row"
 /// batch. Some systems call this a "row batch" internally and others a "record
 /// batch".
 table RecordBatch {
   /// number of records / rows. The arrays in the batch should all have this
   /// length
   length: int;

   /// Nodes correspond to the pre-ordered flattened logical schema
   nodes: [FieldNode];

   /// Buffers correspond to the pre-ordered flattened buffer tree
   ///
   /// The number of buffers appended to this list depends on the schema. For
   /// example, most primitive arrays will have 2 buffers, 1 for the validity
   /// bitmap and 1 for the values. For struct arrays, there will only be a
   /// single buffer for the validity (nulls) bitmap
   buffers: [Buffer];
 }

 /// ----------------------------------------------------------------------
 /// For sending dictionary encoding information. Any Field can be
 /// dictionary-encoded, but in this case none of its children may be
 /// dictionary-encoded.
 ///
 /// TODO(wesm): To be documented in more detail

 table DictionaryBatch {
   id: long;
   data: RecordBatch;
 }

 /// ----------------------------------------------------------------------
 /// The root Message type

 /// This union enables us to easily send different message types without
 /// redundant storage, and in the future we can easily add new message types.
 union MessageHeader {
   Schema, DictionaryBatch, RecordBatch
 }

 table Message {
   header: MessageHeader;
   bodyLength: long;
 }

 root_type Message;
	namespace apache.arrow.flatbuf;

	/// ----------------------------------------------------------------------
	/// Logical types and their metadata (if any)
	///
	/// These are stored in the flatbuffer in the Type union below

	/// A Tuple in the flatbuffer metadata is the same as an Arrow Struct
	/// (according to the physical memory layout). We used Tuple here as Struct is
	/// a reserved word in Flatbuffers
	table Tuple {
	}

	table List {
	}

	enum UnionMode:int { Sparse, Dense }

	table Union {
	mode: UnionMode;
	}

	table Bit {
	}

	table Int {
	bitWidth: int; // 1 to 64
	is_signed: bool;
	}

	enum Precision:int {SINGLE, DOUBLE}

	table FloatingPoint {
	precision: Precision;
	}

	table Utf8 {
	}

	table Binary {
	}

	table Bool {
	}

	table Decimal {
	precision: int;
	scale: int;
	}

	table Timestamp {
	timezone: string;
	}

	table JSONScalar {
	dense:bool=true;
	}

	/// ----------------------------------------------------------------------
	/// Top-level Type value, enabling extensible type-specific metadata. We can
	/// add new logical types to Type without breaking backwards compatibility

	union Type {
	Int,
	Bit,
	FloatingPoint,
	Binary,
	Utf8,
	Bool,
	Decimal,
	Timestamp,
	List,
	Tuple,
	Union,
	JSONScalar
	}

	/// ----------------------------------------------------------------------
	/// A field represents a named column in a record / row batch or child of a
	/// nested type.
	///
	/// - children is only for nested Arrow arrays
	/// - For primitive types, children will have length 0
	/// - nullable should default to true in general

	table Field {
	// Name is not required, in i.e. a List
	name: string;
	nullable: bool;
	type: Type;
	children: [Field];
	}

	/// ----------------------------------------------------------------------
	/// A Schema describes the columns in a row batch

	table Schema {
	fields: [Field];
	}

	/// ----------------------------------------------------------------------
	/// Data structures for describing a table row batch (a collection of
	/// equal-length Arrow arrays)

	/// A Buffer represents a single contiguous memory segment
	struct Buffer {
	/// The shared memory page id where this buffer is located. Currently this is
	/// not used
	page: int;

	/// The relative offset into the shared memory page where the bytes for this
	/// buffer starts
	offset: long;

	/// The absolute length (in bytes) of the memory buffer. The memory is found
	/// from offset (inclusive) to offset + length (non-inclusive).
	length: long;
	}

	/// Metadata about a field at some level of a nested type tree (but not
	/// its children).
	///
	/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
	/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
	/// null_count: 0} for its Int16 node, as separate FieldNode structs
	struct FieldNode {
	/// The number of value slots in the Arrow array at this level of a nested
	/// tree
	length: int;

	/// The number of observed nulls. Fields with null_count == 0 may choose not
	/// to write their physical validity bitmap out as a materialized buffer,
	/// instead setting the length of the bitmap buffer to 0.
	null_count: int;
	}

	/// A data header describing the shared memory layout of a "record" or "row"
	/// batch. Some systems call this a "row batch" internally and others a "record
	/// batch".
	table RecordBatch {
	/// number of records / rows. The arrays in the batch should all have this
	/// length
	length: int;

	/// Nodes correspond to the pre-ordered flattened logical schema
	nodes: [FieldNode];

	/// Buffers correspond to the pre-ordered flattened buffer tree
	///
	/// The number of buffers appended to this list depends on the schema. For
	/// example, most primitive arrays will have 2 buffers, 1 for the validity
	/// bitmap and 1 for the values. For struct arrays, there will only be a
	/// single buffer for the validity (nulls) bitmap
	buffers: [Buffer];
	}

	/// ----------------------------------------------------------------------
	/// For sending dictionary encoding information. Any Field can be
	/// dictionary-encoded, but in this case none of its children may be
	/// dictionary-encoded.
	///
	/// TODO(wesm): To be documented in more detail

	table DictionaryBatch {
	id: long;
	data: RecordBatch;
	}

	/// ----------------------------------------------------------------------
	/// The root Message type

	/// This union enables us to easily send different message types without
	/// redundant storage, and in the future we can easily add new message types.
	union MessageHeader {
	Schema, DictionaryBatch, RecordBatch
	}

	table Message {
	header: MessageHeader;
	bodyLength: long;
	}

	root_type Message;