src/kudu/common/common.proto - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // Protobufs which are common throughout Kudu.
 //
 // This file may contain protobufs which are persisted on disk
 // as well as sent on the wire. If a particular protobuf is only
 // used as part of the client-server wire protocol, it should go
 // in common/wire_protocol.proto instead. If it is only used within
 // the server(s), it should go in cfile/cfile.proto, server/metadata.proto,
 // etc, as appropriate.
 package kudu;

 option java_package = "org.kududb";

 // If you add a new type keep in mind to add it to the end
 // or update AddMapping() functions like the one in key_encoder.cc
 // that have a vector that maps the protobuf tag with the index.
 enum DataType {
   UNKNOWN_DATA = 999;
   UINT8 = 0;
   INT8 = 1;
   UINT16 = 2;
   INT16 = 3;
   UINT32 = 4;
   INT32 = 5;
   UINT64 = 6;
   INT64 = 7;
   STRING = 8;
   BOOL = 9;
   FLOAT = 10;
   DOUBLE = 11;
   BINARY = 12;
   TIMESTAMP = 13;
 }

 enum EncodingType {
   UNKNOWN_ENCODING = 999;
   AUTO_ENCODING = 0;
   PLAIN_ENCODING = 1;
   PREFIX_ENCODING = 2;
   GROUP_VARINT = 3;
   RLE = 4;
   DICT_ENCODING = 5;
   BIT_SHUFFLE = 6;
 }

 enum CompressionType {
   UNKNOWN_COMPRESSION = 999;
   DEFAULT_COMPRESSION = 0;
   NO_COMPRESSION = 1;
   SNAPPY = 2;
   LZ4 = 3;
   ZLIB = 4;
 }

 // TODO: Differentiate between the schema attributes
 // that are only relevant to the server (e.g.,
 // encoding and compression) and those that also
 // matter to the client.
 message ColumnSchemaPB {
   optional uint32 id = 1;
   required string name = 2;
   required DataType type = 3;
   optional bool is_key = 4 [default = false];
   optional bool is_nullable = 5 [default = false];
   optional bytes read_default_value = 6;
   optional bytes write_default_value = 7;

   // The following attributes refer to the on-disk storage of the column.
   // They won't always be set, depending on context.
   optional EncodingType encoding = 8 [default=AUTO_ENCODING];
   optional CompressionType compression = 9 [default=DEFAULT_COMPRESSION];
   optional int32 cfile_block_size = 10 [default=0];
 }

 message SchemaPB {
   repeated ColumnSchemaPB columns = 1;
 }

 message HostPortPB {
   required string host = 1;
   required uint32 port = 2;
 }

 // The external consistency mode for client requests.
 // This defines how transactions and/or sequences of operations that touch
 // several TabletServers, in different machines, can be observed by external
 // clients.
 //
 // Note that ExternalConsistencyMode makes no guarantee on atomicity, i.e.
 // no sequence of operations is made atomic (or transactional) just because
 // an external consistency mode is set.
 // Note also that ExternalConsistencyMode has no implication on the
 // consistency between replicas of the same tablet.
 enum ExternalConsistencyMode {
   UNKNOWN_EXTERNAL_CONSISTENCY_MODE = 0;

   // The response to any write will contain a timestamp.
   // Any further calls from the same client to other servers will update
   // those servers with that timestamp. The user will make sure that the
   // timestamp is propagated through back-channels to other
   // KuduClient's.
   //
   // WARNING: Failure to propagate timestamp information through
   // back-channels will negate any external consistency guarantee under this
   // mode.
   //
   // Example:
   // 1 - Client A executes operation X in Tablet A
   // 2 - Afterwards, Client A executes operation Y in Tablet B
   //
   //
   // Client B may observe the following operation sequences:
   // {}, {X}, {X Y}
   //
   // This is the default mode.
   CLIENT_PROPAGATED = 1;

   // The server will guarantee that each transaction is externally
   // consistent by making sure that none of its results are visible
   // until every Kudu server agrees that the transaction is in the past.
   // The client is not obligated to forward timestamp information
   // through back-channels.
   //
   // WARNING: Depending on the clock synchronization state of TabletServers
   // this may imply considerable latency. Moreover operations with
   // COMMIT_WAIT requested external consistency will outright fail if
   // TabletServer clocks are either unsynchronized or synchronized but
   // with a maximum error which surpasses a pre-configured one.
   //
   // Example:
   // - Client A executes operation X in Tablet A
   // - Afterwards, Client A executes operation Y in Tablet B
   //
   //
   // Client B may observe the following operation sequences:
   // {}, {X}, {X Y}
   COMMIT_WAIT = 2;
 };

 // The possible read modes for clients.
 // Clients set these in Scan requests.
 // The server keeps 2 snapshot boundaries:
 // - The earliest snapshot: this corresponds to the earliest kept undo records
 //   in the tablet, meaning the current state (Base) can be undone up to
 //   this snapshot.
 // - The latest snapshot: This corresponds to the instant beyond which no
 //   no transaction will have an earlier timestamp. Usually this corresponds
 //   to whatever clock->Now() returns, but can be higher if the client propagates
 //   a timestamp (see below).
 enum ReadMode {
   UNKNOWN_READ_MODE = 0;

   // When READ_LATEST is specified the server will execute the read independently
   // of the clock and will always return all visible writes at the time the request
   // was received. This type of read does not return a snapshot timestamp since
   // it might not be repeatable, i.e. a later read executed at the same snapshot
   // timestamp might yield rows that were committed by in-flight transactions.
   //
   // This is the default mode.
   READ_LATEST = 1;

   // When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
   // at the required snapshot. If no snapshot is defined the server will take the
   // current time as the snapshot timestamp. Snapshot reads are repeatable, i.e.
   // all future reads at the same timestamp will yield the same rows. This is
   // performed at the expense of waiting for in-flight transactions whose timestamp
   // is lower than the snapshot's timestamp to complete.
   //
   // When mixing reads and writes clients that specify COMMIT_WAIT as their
   // external consistency mode and then use the returned write_timestamp to
   // to perform snapshot reads are guaranteed that that snapshot time is
   // considered in the past by all servers and no additional action is
   // necessary. Clients using CLIENT_PROPAGATED however must forcibly propagate
   // the timestamps even at read time, so that the server will not generate
   // any more transactions before the snapshot requested by the client.
   // The latter option is implemented by allowing the client to specify one or
   // two timestamps, the first one obtained from the previous CLIENT_PROPAGATED
   // write, directly or through back-channels, must be signed and will be
   // checked by the server. The second one, if defined, is the actual snapshot
   // read time. When selecting both the latter must be lower than or equal to
   // the former.
   // TODO implement actually signing the propagated timestamp.
   READ_AT_SNAPSHOT = 2;
 }

 // The possible order modes for clients.
 // Clients specify these in new scan requests.
 // Ordered scans are fault-tolerant, and can be retried elsewhere in the case
 // of tablet server failure. However, ordered scans impose additional overhead
 // since the tablet server needs to sort the result rows.
 enum OrderMode {
   UNKNOWN_ORDER_MODE = 0;
   // This is the default order mode.
   UNORDERED = 1;
   ORDERED = 2;
 }

 // The serialized format of a Kudu table partition schema.
 message PartitionSchemaPB {

   // A column identifier for partition schemas. In general, the name will be
   // used when a client creates the table since column IDs are assigned by the
   // master. All other uses of partition schemas will use the numeric column ID.
   message ColumnIdentifierPB {
     oneof identifier {
       int32 id = 1;
       string name = 2;
     }
   }

   message RangeSchemaPB {
     // Column identifiers of columns included in the range. All columns must be
     // a component of the primary key.
     repeated ColumnIdentifierPB columns = 1;
   }

   message HashBucketSchemaPB {
     // Column identifiers of columns included in the hash. Every column must be
     // a component of the primary key.
     repeated ColumnIdentifierPB columns = 1;

     // Number of buckets into which columns will be hashed. Must be at least 2.
     required int32 num_buckets = 2;

     // Seed value for hash calculation. Administrators may set a seed value
     // on a per-table basis in order to randomize the mapping of rows to
     // buckets. Setting a seed provides some amount of protection against denial
     // of service attacks when the hash bucket columns contain user provided
     // input.
     optional uint32 seed = 3;

     enum HashAlgorithm {
       UNKNOWN = 0;
       MURMUR_HASH_2 = 1;
     }

     // The hash algorithm to use for calculating the hash bucket.
     optional HashAlgorithm hash_algorithm = 4;
   }

   repeated HashBucketSchemaPB hash_bucket_schemas = 1;
   optional RangeSchemaPB range_schema = 2;
 }

 // The serialized format of a Kudu table partition.
 message PartitionPB {
   // The hash buckets of the partition. The number of hash buckets must match
   // the number of hash bucket components in the partition's schema.
   repeated int32 hash_buckets = 1 [packed = true];
   // The encoded start partition key (inclusive).
   optional bytes partition_key_start = 2;
   // The encoded end partition key (exclusive).
   optional bytes partition_key_end = 3;
 }

 // A predicate that can be applied on a Kudu column.
 message ColumnPredicatePB {
   // The predicate column name.
   optional string column = 1;

   message Range {

     // Bounds should be encoded as follows:
     // - STRING/BINARY values: simply the exact string value for the bound.
     // - other type: the canonical x86 in-memory representation -- eg for
     //   uint32s, a little-endian value.
     //
     // Note that this predicate type should not be used for NULL data --
     // NULL is defined to neither be greater than or less than other values
     // for the comparison operator. We will eventually add a special
     // predicate type for null-ness.

     // The inclusive lower bound.
     optional bytes lower = 1;

     // The exclusive upper bound.
     optional bytes upper = 2;
   }

   message Equality {
     // The inclusive lower bound. See comment in Range for notes on the
     // encoding.
     optional bytes value = 1;
   }

   message IsNotNull {}

   oneof predicate {
     Range range = 2;
     Equality equality = 3;
     IsNotNull is_not_null = 4;
   }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// Protobufs which are common throughout Kudu.
	//
	// This file may contain protobufs which are persisted on disk
	// as well as sent on the wire. If a particular protobuf is only
	// used as part of the client-server wire protocol, it should go
	// in common/wire_protocol.proto instead. If it is only used within
	// the server(s), it should go in cfile/cfile.proto, server/metadata.proto,
	// etc, as appropriate.
	package kudu;

	option java_package = "org.kududb";

	// If you add a new type keep in mind to add it to the end
	// or update AddMapping() functions like the one in key_encoder.cc
	// that have a vector that maps the protobuf tag with the index.
	enum DataType {
	UNKNOWN_DATA = 999;
	UINT8 = 0;
	INT8 = 1;
	UINT16 = 2;
	INT16 = 3;
	UINT32 = 4;
	INT32 = 5;
	UINT64 = 6;
	INT64 = 7;
	STRING = 8;
	BOOL = 9;
	FLOAT = 10;
	DOUBLE = 11;
	BINARY = 12;
	TIMESTAMP = 13;
	}

	enum EncodingType {
	UNKNOWN_ENCODING = 999;
	AUTO_ENCODING = 0;
	PLAIN_ENCODING = 1;
	PREFIX_ENCODING = 2;
	GROUP_VARINT = 3;
	RLE = 4;
	DICT_ENCODING = 5;
	BIT_SHUFFLE = 6;
	}

	enum CompressionType {
	UNKNOWN_COMPRESSION = 999;
	DEFAULT_COMPRESSION = 0;
	NO_COMPRESSION = 1;
	SNAPPY = 2;
	LZ4 = 3;
	ZLIB = 4;
	}

	// TODO: Differentiate between the schema attributes
	// that are only relevant to the server (e.g.,
	// encoding and compression) and those that also
	// matter to the client.
	message ColumnSchemaPB {
	optional uint32 id = 1;
	required string name = 2;
	required DataType type = 3;
	optional bool is_key = 4 [default = false];
	optional bool is_nullable = 5 [default = false];
	optional bytes read_default_value = 6;
	optional bytes write_default_value = 7;

	// The following attributes refer to the on-disk storage of the column.
	// They won't always be set, depending on context.
	optional EncodingType encoding = 8 [default=AUTO_ENCODING];
	optional CompressionType compression = 9 [default=DEFAULT_COMPRESSION];
	optional int32 cfile_block_size = 10 [default=0];
	}

	message SchemaPB {
	repeated ColumnSchemaPB columns = 1;
	}

	message HostPortPB {
	required string host = 1;
	required uint32 port = 2;
	}

	// The external consistency mode for client requests.
	// This defines how transactions and/or sequences of operations that touch
	// several TabletServers, in different machines, can be observed by external
	// clients.
	//
	// Note that ExternalConsistencyMode makes no guarantee on atomicity, i.e.
	// no sequence of operations is made atomic (or transactional) just because
	// an external consistency mode is set.
	// Note also that ExternalConsistencyMode has no implication on the
	// consistency between replicas of the same tablet.
	enum ExternalConsistencyMode {
	UNKNOWN_EXTERNAL_CONSISTENCY_MODE = 0;

	// The response to any write will contain a timestamp.
	// Any further calls from the same client to other servers will update
	// those servers with that timestamp. The user will make sure that the
	// timestamp is propagated through back-channels to other
	// KuduClient's.
	//
	// WARNING: Failure to propagate timestamp information through
	// back-channels will negate any external consistency guarantee under this
	// mode.
	//
	// Example:
	// 1 - Client A executes operation X in Tablet A
	// 2 - Afterwards, Client A executes operation Y in Tablet B
	//
	//
	// Client B may observe the following operation sequences:
	// {}, {X}, {X Y}
	//
	// This is the default mode.
	CLIENT_PROPAGATED = 1;

	// The server will guarantee that each transaction is externally
	// consistent by making sure that none of its results are visible
	// until every Kudu server agrees that the transaction is in the past.
	// The client is not obligated to forward timestamp information
	// through back-channels.
	//
	// WARNING: Depending on the clock synchronization state of TabletServers
	// this may imply considerable latency. Moreover operations with
	// COMMIT_WAIT requested external consistency will outright fail if
	// TabletServer clocks are either unsynchronized or synchronized but
	// with a maximum error which surpasses a pre-configured one.
	//
	// Example:
	// - Client A executes operation X in Tablet A
	// - Afterwards, Client A executes operation Y in Tablet B
	//
	//
	// Client B may observe the following operation sequences:
	// {}, {X}, {X Y}
	COMMIT_WAIT = 2;
	};

	// The possible read modes for clients.
	// Clients set these in Scan requests.
	// The server keeps 2 snapshot boundaries:
	// - The earliest snapshot: this corresponds to the earliest kept undo records
	// in the tablet, meaning the current state (Base) can be undone up to
	// this snapshot.
	// - The latest snapshot: This corresponds to the instant beyond which no
	// no transaction will have an earlier timestamp. Usually this corresponds
	// to whatever clock->Now() returns, but can be higher if the client propagates
	// a timestamp (see below).
	enum ReadMode {
	UNKNOWN_READ_MODE = 0;

	// When READ_LATEST is specified the server will execute the read independently
	// of the clock and will always return all visible writes at the time the request
	// was received. This type of read does not return a snapshot timestamp since
	// it might not be repeatable, i.e. a later read executed at the same snapshot
	// timestamp might yield rows that were committed by in-flight transactions.
	//
	// This is the default mode.
	READ_LATEST = 1;

	// When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
	// at the required snapshot. If no snapshot is defined the server will take the
	// current time as the snapshot timestamp. Snapshot reads are repeatable, i.e.
	// all future reads at the same timestamp will yield the same rows. This is
	// performed at the expense of waiting for in-flight transactions whose timestamp
	// is lower than the snapshot's timestamp to complete.
	//
	// When mixing reads and writes clients that specify COMMIT_WAIT as their
	// external consistency mode and then use the returned write_timestamp to
	// to perform snapshot reads are guaranteed that that snapshot time is
	// considered in the past by all servers and no additional action is
	// necessary. Clients using CLIENT_PROPAGATED however must forcibly propagate
	// the timestamps even at read time, so that the server will not generate
	// any more transactions before the snapshot requested by the client.
	// The latter option is implemented by allowing the client to specify one or
	// two timestamps, the first one obtained from the previous CLIENT_PROPAGATED
	// write, directly or through back-channels, must be signed and will be
	// checked by the server. The second one, if defined, is the actual snapshot
	// read time. When selecting both the latter must be lower than or equal to
	// the former.
	// TODO implement actually signing the propagated timestamp.
	READ_AT_SNAPSHOT = 2;
	}

	// The possible order modes for clients.
	// Clients specify these in new scan requests.
	// Ordered scans are fault-tolerant, and can be retried elsewhere in the case
	// of tablet server failure. However, ordered scans impose additional overhead
	// since the tablet server needs to sort the result rows.
	enum OrderMode {
	UNKNOWN_ORDER_MODE = 0;
	// This is the default order mode.
	UNORDERED = 1;
	ORDERED = 2;
	}

	// The serialized format of a Kudu table partition schema.
	message PartitionSchemaPB {

	// A column identifier for partition schemas. In general, the name will be
	// used when a client creates the table since column IDs are assigned by the
	// master. All other uses of partition schemas will use the numeric column ID.
	message ColumnIdentifierPB {
	oneof identifier {
	int32 id = 1;
	string name = 2;
	}
	}

	message RangeSchemaPB {
	// Column identifiers of columns included in the range. All columns must be
	// a component of the primary key.
	repeated ColumnIdentifierPB columns = 1;
	}

	message HashBucketSchemaPB {
	// Column identifiers of columns included in the hash. Every column must be
	// a component of the primary key.
	repeated ColumnIdentifierPB columns = 1;

	// Number of buckets into which columns will be hashed. Must be at least 2.
	required int32 num_buckets = 2;

	// Seed value for hash calculation. Administrators may set a seed value
	// on a per-table basis in order to randomize the mapping of rows to
	// buckets. Setting a seed provides some amount of protection against denial
	// of service attacks when the hash bucket columns contain user provided
	// input.
	optional uint32 seed = 3;

	enum HashAlgorithm {
	UNKNOWN = 0;
	MURMUR_HASH_2 = 1;
	}

	// The hash algorithm to use for calculating the hash bucket.
	optional HashAlgorithm hash_algorithm = 4;
	}

	repeated HashBucketSchemaPB hash_bucket_schemas = 1;
	optional RangeSchemaPB range_schema = 2;
	}

	// The serialized format of a Kudu table partition.
	message PartitionPB {
	// The hash buckets of the partition. The number of hash buckets must match
	// the number of hash bucket components in the partition's schema.
	repeated int32 hash_buckets = 1 [packed = true];
	// The encoded start partition key (inclusive).
	optional bytes partition_key_start = 2;
	// The encoded end partition key (exclusive).
	optional bytes partition_key_end = 3;
	}

	// A predicate that can be applied on a Kudu column.
	message ColumnPredicatePB {
	// The predicate column name.
	optional string column = 1;

	message Range {

	// Bounds should be encoded as follows:
	// - STRING/BINARY values: simply the exact string value for the bound.
	// - other type: the canonical x86 in-memory representation -- eg for
	// uint32s, a little-endian value.
	//
	// Note that this predicate type should not be used for NULL data --
	// NULL is defined to neither be greater than or less than other values
	// for the comparison operator. We will eventually add a special
	// predicate type for null-ness.

	// The inclusive lower bound.
	optional bytes lower = 1;

	// The exclusive upper bound.
	optional bytes upper = 2;
	}

	message Equality {
	// The inclusive lower bound. See comment in Range for notes on the
	// encoding.
	optional bytes value = 1;
	}

	message IsNotNull {}

	oneof predicate {
	Range range = 2;
	Equality equality = 3;
	IsNotNull is_not_null = 4;
	}
	}