format/Message.fbs - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 include "Schema.fbs";
 include "SparseTensor.fbs";
 include "Tensor.fbs";

 namespace org.apache.arrow.flatbuf;

 /// ----------------------------------------------------------------------
 /// Data structures for describing a table row batch (a collection of
 /// equal-length Arrow arrays)

 /// Metadata about a field at some level of a nested type tree (but not
 /// its children).
 ///
 /// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
 /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
 /// null_count: 0} for its Int16 node, as separate FieldNode structs
 struct FieldNode {
   /// The number of value slots in the Arrow array at this level of a nested
   /// tree
   length: long;

   /// The number of observed nulls. Fields with null_count == 0 may choose not
   /// to write their physical validity bitmap out as a materialized buffer,
   /// instead setting the length of the bitmap buffer to 0.
   null_count: long;
 }

 enum CompressionType:byte {
   // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers
   // thereof. Not to be confused with "raw" (also called "block") format
   // provided by lz4.h
   LZ4_FRAME,

   // Zstandard
   ZSTD
 }

 /// Provided for forward compatibility in case we need to support different
 /// strategies for compressing the IPC message body (like whole-body
 /// compression rather than buffer-level) in the future
 enum BodyCompressionMethod:byte {
   /// Each constituent buffer is first compressed with the indicated
   /// compressor, and then written with the uncompressed length in the first 8
   /// bytes as a 64-bit little-endian signed integer followed by the compressed
   /// buffer bytes (and then padding as required by the protocol). The
   /// uncompressed length may be set to -1 to indicate that the data that
   /// follows is not compressed, which can be useful for cases where
   /// compression does not yield appreciable savings.
   BUFFER
 }

 /// Optional compression for the memory buffers constituting IPC message
 /// bodies. Intended for use with RecordBatch but could be used for other
 /// message types
 table BodyCompression {
   /// Compressor library
   codec: CompressionType = LZ4_FRAME;

   /// Indicates the way the record batch body was compressed
   method: BodyCompressionMethod = BUFFER;
 }

 /// A data header describing the shared memory layout of a "record" or "row"
 /// batch. Some systems call this a "row batch" internally and others a "record
 /// batch".
 table RecordBatch {
   /// number of records / rows. The arrays in the batch should all have this
   /// length
   length: long;

   /// Nodes correspond to the pre-ordered flattened logical schema
   nodes: [FieldNode];

   /// Buffers correspond to the pre-ordered flattened buffer tree
   ///
   /// The number of buffers appended to this list depends on the schema. For
   /// example, most primitive arrays will have 2 buffers, 1 for the validity
   /// bitmap and 1 for the values. For struct arrays, there will only be a
   /// single buffer for the validity (nulls) bitmap
   buffers: [Buffer];

   /// Optional compression of the message body
   compression: BodyCompression;
 }

 /// For sending dictionary encoding information. Any Field can be
 /// dictionary-encoded, but in this case none of its children may be
 /// dictionary-encoded.
 /// There is one vector / column per dictionary, but that vector / column
 /// may be spread across multiple dictionary batches by using the isDelta
 /// flag

 table DictionaryBatch {
   id: long;
   data: RecordBatch;

   /// If isDelta is true the values in the dictionary are to be appended to a
   /// dictionary with the indicated id. If isDelta is false this dictionary
   /// should replace the existing dictionary.
   isDelta: bool = false;
 }

 /// ----------------------------------------------------------------------
 /// The root Message type

 /// This union enables us to easily send different message types without
 /// redundant storage, and in the future we can easily add new message types.
 ///
 /// Arrow implementations do not need to implement all of the message types,
 /// which may include experimental metadata types. For maximum compatibility,
 /// it is best to send data using RecordBatch
 union MessageHeader {
   Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
 }

 table Message {
   version: org.apache.arrow.flatbuf.MetadataVersion;
   header: MessageHeader;
   bodyLength: long;
   custom_metadata: [ KeyValue ];
 }

 root_type Message;
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	include "Schema.fbs";
	include "SparseTensor.fbs";
	include "Tensor.fbs";

	namespace org.apache.arrow.flatbuf;

	/// ----------------------------------------------------------------------
	/// Data structures for describing a table row batch (a collection of
	/// equal-length Arrow arrays)

	/// Metadata about a field at some level of a nested type tree (but not
	/// its children).
	///
	/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
	/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
	/// null_count: 0} for its Int16 node, as separate FieldNode structs
	struct FieldNode {
	/// The number of value slots in the Arrow array at this level of a nested
	/// tree
	length: long;

	/// The number of observed nulls. Fields with null_count == 0 may choose not
	/// to write their physical validity bitmap out as a materialized buffer,
	/// instead setting the length of the bitmap buffer to 0.
	null_count: long;
	}

	enum CompressionType:byte {
	// LZ4 frame format, for portability, as provided by lz4frame.h or wrappers
	// thereof. Not to be confused with "raw" (also called "block") format
	// provided by lz4.h
	LZ4_FRAME,

	// Zstandard
	ZSTD
	}

	/// Provided for forward compatibility in case we need to support different
	/// strategies for compressing the IPC message body (like whole-body
	/// compression rather than buffer-level) in the future
	enum BodyCompressionMethod:byte {
	/// Each constituent buffer is first compressed with the indicated
	/// compressor, and then written with the uncompressed length in the first 8
	/// bytes as a 64-bit little-endian signed integer followed by the compressed
	/// buffer bytes (and then padding as required by the protocol). The
	/// uncompressed length may be set to -1 to indicate that the data that
	/// follows is not compressed, which can be useful for cases where
	/// compression does not yield appreciable savings.
	BUFFER
	}

	/// Optional compression for the memory buffers constituting IPC message
	/// bodies. Intended for use with RecordBatch but could be used for other
	/// message types
	table BodyCompression {
	/// Compressor library
	codec: CompressionType = LZ4_FRAME;

	/// Indicates the way the record batch body was compressed
	method: BodyCompressionMethod = BUFFER;
	}

	/// A data header describing the shared memory layout of a "record" or "row"
	/// batch. Some systems call this a "row batch" internally and others a "record
	/// batch".
	table RecordBatch {
	/// number of records / rows. The arrays in the batch should all have this
	/// length
	length: long;

	/// Nodes correspond to the pre-ordered flattened logical schema
	nodes: [FieldNode];

	/// Buffers correspond to the pre-ordered flattened buffer tree
	///
	/// The number of buffers appended to this list depends on the schema. For
	/// example, most primitive arrays will have 2 buffers, 1 for the validity
	/// bitmap and 1 for the values. For struct arrays, there will only be a
	/// single buffer for the validity (nulls) bitmap
	buffers: [Buffer];

	/// Optional compression of the message body
	compression: BodyCompression;
	}

	/// For sending dictionary encoding information. Any Field can be
	/// dictionary-encoded, but in this case none of its children may be
	/// dictionary-encoded.
	/// There is one vector / column per dictionary, but that vector / column
	/// may be spread across multiple dictionary batches by using the isDelta
	/// flag

	table DictionaryBatch {
	id: long;
	data: RecordBatch;

	/// If isDelta is true the values in the dictionary are to be appended to a
	/// dictionary with the indicated id. If isDelta is false this dictionary
	/// should replace the existing dictionary.
	isDelta: bool = false;
	}

	/// ----------------------------------------------------------------------
	/// The root Message type

	/// This union enables us to easily send different message types without
	/// redundant storage, and in the future we can easily add new message types.
	///
	/// Arrow implementations do not need to implement all of the message types,
	/// which may include experimental metadata types. For maximum compatibility,
	/// it is best to send data using RecordBatch
	union MessageHeader {
	Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
	}

	table Message {
	version: org.apache.arrow.flatbuf.MetadataVersion;
	header: MessageHeader;
	bodyLength: long;
	custom_metadata: [ KeyValue ];
	}

	root_type Message;