src/kudu/common/common.proto - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // Protobufs which are common throughout Kudu.
 //
 // This file may contain protobufs which are persisted on disk
 // as well as sent on the wire. If a particular protobuf is only
 // used as part of the client-server wire protocol, it should go
 // in common/wire_protocol.proto instead. If it is only used within
 // the server(s), it should go in cfile/cfile.proto, server/metadata.proto,
 // etc, as appropriate.
 syntax = "proto2";
 package kudu;

 option java_package = "org.apache.kudu";

 import "kudu/common/row_operations.proto";
 import "kudu/util/block_bloom_filter.proto";
 import "kudu/util/compression/compression.proto";
 import "kudu/util/hash.proto";
 import "kudu/util/pb_util.proto";

 // If you add a new type keep in mind to add it to the end
 // or update AddMapping() functions like the one in key_encoder.cc
 // that have a vector that maps the protobuf tag with the index.
 enum DataType {
   UNKNOWN_DATA = 999;
   UINT8 = 0;
   INT8 = 1;
   UINT16 = 2;
   INT16 = 3;
   UINT32 = 4;
   INT32 = 5;
   UINT64 = 6;
   INT64 = 7;
   STRING = 8;
   BOOL = 9;
   FLOAT = 10;
   DOUBLE = 11;
   BINARY = 12;
   UNIXTIME_MICROS = 13;
   INT128 = 14;
   DECIMAL32 = 15;
   DECIMAL64 = 16;
   DECIMAL128 = 17;
   IS_DELETED = 18; // virtual column; not a real data type
   VARCHAR = 19;
   DATE = 20;
 }

 enum EncodingType {
   UNKNOWN_ENCODING = 999;
   AUTO_ENCODING = 0;
   PLAIN_ENCODING = 1;
   PREFIX_ENCODING = 2;
   // GROUP_VARINT encoding is deprecated and no longer implemented.
   GROUP_VARINT = 3;
   RLE = 4;
   DICT_ENCODING = 5;
   BIT_SHUFFLE = 6;
 }

 // Enums that specify the HMS-related configurations for a Kudu mini-cluster.
 enum HmsMode {
   // No HMS will be started.
   NONE = 0;

   // The HMS will be started, but will not be configured to use the Kudu
   // plugin.
   DISABLE_HIVE_METASTORE = 3;

   // The HMS will be started and configured to use the Kudu plugin, but the
   // Kudu mini-cluster will not be configured to synchronize with it.
   ENABLE_HIVE_METASTORE = 1;

   // The HMS will be started and configured to use the Kudu plugin, and the
   // Kudu mini-cluster will be configured to synchronize with it.
   ENABLE_METASTORE_INTEGRATION = 2;
 };

 // Holds detailed attributes for the column. Only certain fields will be set,
 // depending on the type of the column.
 message ColumnTypeAttributesPB {
   // For decimal columns
   optional int32 precision = 1;
   optional int32 scale = 2;
   // For varchar columns
   optional int32 length = 3;
 }

 // TODO: Differentiate between the schema attributes
 // that are only relevant to the server (e.g.,
 // encoding and compression) and those that also
 // matter to the client.
 message ColumnSchemaPB {
   optional uint32 id = 1;
   required string name = 2;
   required DataType type = 3;
   optional bool is_key = 4 [default = false];
   optional bool is_nullable = 5 [default = false];

   // Default values.
   // NOTE: as far as clients are concerned, there is only one
   // "default value" of a column. The read/write defaults are used
   // internally and should not be exposed by any public client APIs.
   //
   // When passing schemas to the master for create/alter table,
   // specify the default in 'read_default_value'.
   //
   // Contrary to this, when the client opens a table, it will receive
   // both the read and write defaults, but the *write* default is
   // what should be exposed as the "current" default.
   optional bytes read_default_value = 6;
   optional bytes write_default_value = 7;

   // The following attributes refer to the on-disk storage of the column.
   // They won't always be set, depending on context.
   optional EncodingType encoding = 8 [default=AUTO_ENCODING];
   optional CompressionType compression = 9 [default=DEFAULT_COMPRESSION];
   optional int32 cfile_block_size = 10 [default=0];

   optional ColumnTypeAttributesPB type_attributes = 11;

   // The comment for the column.
   optional string comment = 12;

   optional bool immutable = 13 [default = false];
 }

 message ColumnSchemaDeltaPB {
   optional string name = 1;
   optional string new_name = 2;

   optional bytes default_value = 4;
   optional bool remove_default = 5;

   optional EncodingType encoding = 6;
   optional CompressionType compression = 7;
   optional int32 block_size = 8;

   optional string new_comment = 9;
 }

 message SchemaPB {
   repeated ColumnSchemaPB columns = 1;
 }

 message HostPortPB {
   required string host = 1;
   required uint32 port = 2;
 }

 // The external consistency mode for client requests.
 // This defines how ops and/or sequences of operations that touch
 // several TabletServers, in different machines, can be observed by external
 // clients.
 //
 // Note that ExternalConsistencyMode makes no guarantee on atomicity, i.e.
 // no sequence of operations is made atomic (or transactional) just because
 // an external consistency mode is set.
 // Note also that ExternalConsistencyMode has no implication on the
 // consistency between replicas of the same tablet.
 enum ExternalConsistencyMode {
   UNKNOWN_EXTERNAL_CONSISTENCY_MODE = 0;

   // The response to any write will contain a timestamp.
   // Any further calls from the same client to other servers will update
   // those servers with that timestamp. The user will make sure that the
   // timestamp is propagated through back-channels to other
   // KuduClient's.
   //
   // WARNING: Failure to propagate timestamp information through
   // back-channels will negate any external consistency guarantee under this
   // mode.
   //
   // Example:
   // 1 - Client A executes operation X in Tablet A
   // 2 - Afterwards, Client A executes operation Y in Tablet B
   //
   //
   // Client B may observe the following operation sequences:
   // {}, {X}, {X Y}
   //
   // This is the default mode.
   CLIENT_PROPAGATED = 1;

   // The server will guarantee that each op is externally consistent by making
   // sure that none of its results are visible until every Kudu server agrees
   // that the op is in the past.  The client is not obligated to forward
   // timestamp information through back-channels.
   //
   // WARNING: Depending on the clock synchronization state of TabletServers
   // this may imply considerable latency. Moreover operations with
   // COMMIT_WAIT requested external consistency will outright fail if
   // TabletServer clocks are either unsynchronized or synchronized but
   // with a maximum error which surpasses a pre-configured one.
   //
   // Example:
   // - Client A executes operation X in Tablet A
   // - Afterwards, Client A executes operation Y in Tablet B
   //
   //
   // Client B may observe the following operation sequences:
   // {}, {X}, {X Y}
   COMMIT_WAIT = 2;
 };

 // The possible read modes for clients.
 // Clients set these in Scan requests.
 // The server keeps 2 snapshot boundaries:
 // - The earliest snapshot: this corresponds to the earliest kept undo records
 //   in the tablet, meaning the current state (Base) can be undone up to
 //   this snapshot.
 // - The latest snapshot: This corresponds to the instant beyond which no op
 //   will have an earlier timestamp. Usually this corresponds to whatever
 //   clock->Now() returns, but can be higher if the client propagates a
 //   timestamp (see below).
 enum ReadMode {
   UNKNOWN_READ_MODE = 0;

   // When READ_LATEST is specified the server will execute the read independently
   // of the clock and will always return all visible writes at the time the request
   // was received. This type of read does not return a snapshot timestamp since
   // it might not be repeatable, i.e. a later read executed at the same snapshot
   // timestamp might yield rows that were committed by in-flight ops.
   //
   // This is the default mode.
   READ_LATEST = 1;

   // When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
   // at the required snapshot. If no snapshot is defined the server will take the
   // current time as the snapshot timestamp. Snapshot reads are repeatable, i.e.
   // all future reads at the same timestamp will yield the same rows. This is
   // performed at the expense of waiting for in-flight ops whose timestamp
   // is lower than the snapshot's timestamp to complete.
   //
   // When mixing reads and writes clients that specify COMMIT_WAIT as their
   // external consistency mode and then use the returned write_timestamp
   // to perform snapshot reads are guaranteed that that snapshot time is
   // considered in the past by all servers and no additional action is
   // necessary. Clients using CLIENT_PROPAGATED however must forcibly propagate
   // the timestamps even at read time, so that the server will not generate
   // any more ops before the snapshot requested by the client.
   // The latter option is implemented by allowing the client to specify one or
   // two timestamps, the first one obtained from the previous CLIENT_PROPAGATED
   // write, directly or through back-channels, must be signed and will be
   // checked by the server. The second one, if defined, is the actual snapshot
   // read time. When selecting both the latter must be lower than or equal to
   // the former.
   // TODO implement actually signing the propagated timestamp.
   READ_AT_SNAPSHOT = 2;

   // When READ_YOUR_WRITES is specified, the server will pick a timestamp to use
   // for a server-local snapshot scan subject to the following criteria:
   // (1) It will be higher than the propagated timestamp,
   // (2) It will attempt to minimize latency caused by waiting for outstanding
   //     write ops to complete.
   // More specifically, the server will choose the latest timestamp higher than
   // the provided propagated timestamp bound that allows execution of the
   // reads without being blocked by the in-flight ops (however the
   // read can be blocked if the propagated timestamp is higher than some in-flight
   // ops). If no propagated timestamp is provided the server will choose
   // a timestamp such that all ops before it are committed. The chosen
   // timestamp will be returned back to the client as 'snapshot timestamp'. The Kudu
   // client library will use it as the propagated timestamp for subsequent reads
   // to avoid unnecessarily waiting.
   //
   // Reads in this mode are not repeatable: two READ_YOUR_WRITES reads, even if
   // they provide the same propagated timestamp bound, can execute at different
   // timestamps and thus return different results. However, it allows
   // read-your-writes and read-your-reads for each client, as the chosen
   // timestamp must be higher than the one of the last write or read,
   // known from the propagated timestamp.
   READ_YOUR_WRITES = 3;
 }

 // The possible order modes for clients.
 // Clients specify these in new scan requests.
 // Ordered scans are fault-tolerant, and can be retried elsewhere in the case
 // of tablet server failure. However, ordered scans impose additional overhead
 // since the tablet server needs to sort the result rows.
 enum OrderMode {
   UNKNOWN_ORDER_MODE = 0;
   // This is the default order mode.
   UNORDERED = 1;
   ORDERED = 2;
 }

 // Policy with which to choose among multiple replicas.
 enum ReplicaSelection {
   UNKNOWN_REPLICA_SELECTION = 0;
   // Select the LEADER replica.
   LEADER_ONLY = 1;
   // Select the closest replica to the client. Replicas are classified from
   // closest to furthest as follows:
   //   - Local replicas
   //   - Replicas whose tablet server has the same location as the client
   //   - All other replicas
   CLOSEST_REPLICA = 2;
 }

 // The serialized format of a Kudu table partition schema.
 message PartitionSchemaPB {

   // A column identifier for partition schemas. In general, the name will be
   // used when a client creates the table since column IDs are assigned by the
   // master. All other uses of partition schemas will use the numeric column ID.
   message ColumnIdentifierPB {
     oneof identifier {
       int32 id = 1;
       string name = 2;
     }
   }

   message RangeSchemaPB {
     // Column identifiers of columns included in the range. All columns must be
     // a component of the primary key.
     repeated ColumnIdentifierPB columns = 1;
   }

   message HashBucketSchemaPB {
     // Column identifiers of columns included in the hash. Every column must be
     // a component of the primary key.
     repeated ColumnIdentifierPB columns = 1;

     // Number of buckets into which columns will be hashed. Must be at least 2.
     required int32 num_buckets = 2;

     // Seed value for hash calculation. Administrators may set a seed value
     // on a per-table basis in order to randomize the mapping of rows to
     // buckets. Setting a seed provides some amount of protection against denial
     // of service attacks when the hash bucket columns contain user provided
     // input.
     optional uint32 seed = 3;

     // The hash algorithm to use for calculating the hash bucket.
     // NOTE: this is not used yet -- don't expect setting it to have any effect
     optional HashAlgorithm hash_algorithm = 4;
   }

   // This data structure represents a range partition with a custom hash schema.
   message RangeWithHashSchemaPB {
     // Row operations containing the lower and upper range bound for the range.
     optional RowOperationsPB range_bounds = 1;
     // Hash schema for the range.
     repeated HashBucketSchemaPB hash_schema = 2;
   }

   // Table-wide hash schema. Hash schema for a particular range may be
   // overriden by corresponding element in 'custom_hash_schema_ranges'.
   repeated HashBucketSchemaPB hash_schema = 1;

   // Range schema to partition the key space into ranges.
   optional RangeSchemaPB range_schema = 2;

   // Two fields were deprecated in favor of using 'custom_hash_schema_ranges'.
   reserved 3;
   reserved 4;

   // If the 'custom_hash_schema_ranges' field is empty, the table-wide hash
   // schema specified by the 'hash_schema' field is used for all the ranges
   // of the table. Otherwise, particular ranges have their hash schema
   // as specified by corresponding elements in 'custom_hash_schema_ranges'.
   repeated RangeWithHashSchemaPB custom_hash_schema_ranges = 5;
 }

 // The serialized format of a Kudu table partition.
 message PartitionPB {
   // The hash buckets of the partition. The number of hash buckets must match
   // the number of hash dimensions in the partition's schema.
   repeated int32 hash_buckets = 1 [packed = true];
   // The encoded start partition key (inclusive).
   optional bytes partition_key_start = 2;
   // The encoded end partition key (exclusive).
   optional bytes partition_key_end = 3;
 }

 // A predicate that can be applied on a Kudu column.
 message ColumnPredicatePB {
   // The predicate column name.
   optional string column = 1;

   message Range {

     // Bounds should be encoded as follows:
     // - STRING/BINARY values: simply the exact string value for the bound.
     // - other type: the canonical x86 in-memory representation -- eg for
     //   uint32s, a little-endian value.
     //
     // Note that this predicate type should not be used for NULL data --
     // NULL is defined to neither be greater than or less than other values
     // for the comparison operator.

     // The inclusive lower bound.
     optional bytes lower = 1 [(kudu.REDACT) = true];

     // The exclusive upper bound.
     optional bytes upper = 2 [(kudu.REDACT) = true];
   }

   message Equality {
     // The inclusive lower bound. See comment in Range for notes on the
     // encoding.
     optional bytes value = 1 [(kudu.REDACT) = true];
   }

   message InList {
     // A list of values for the field. See comment in Range for notes on
     // the encoding.
     repeated bytes values = 1 [(kudu.REDACT) = true];
   }

   message IsNotNull {}

   message IsNull {}

   message InBloomFilter {
     // A list of bloom filters for the field.
     repeated BlockBloomFilterPB bloom_filters = 1;

     // lower and upper are optional for InBloomFilter.
     // When using both InBloomFilter and Range predicate for the same column the
     // merged result can be InBloomFilter within specified range.
     //
     // The inclusive lower bound.
     optional bytes lower = 2 [(kudu.REDACT) = true];

     // The exclusive upper bound.
     optional bytes upper = 3 [(kudu.REDACT) = true];
   }

   oneof predicate {
     Range range = 2;
     Equality equality = 3;
     IsNotNull is_not_null = 4;
     InList in_list = 5;
     IsNull is_null = 6;
     InBloomFilter in_bloom_filter = 7;
   }
 }

 // The primary key range of a Kudu tablet.
 message KeyRangePB {
   // Encoded primary key to begin scanning at (inclusive).
   optional bytes start_primary_key = 1 [(kudu.REDACT) = true];
   // Encoded primary key to stop scanning at (exclusive).
   optional bytes stop_primary_key = 2 [(kudu.REDACT) = true];
   // Number of bytes in chunk.
   required uint64 size_bytes_estimates = 3;
 }

 message TableExtraConfigPB {
   // Number of seconds to retain history for tablets in this table,
   // including history required to perform diff scans and incremental
   // backups. Reads initiated at a snapshot that is older than this
   // age will be rejected. Equivalent to --tablet_history_max_age_sec.
   optional int32 history_max_age_sec = 1;

   // Priority level of a table for maintenance, it will be clamped into
   // range [-FLAGS_max_priority_range, FLAGS_max_priority_range] when
   // calculate maintenance priority score.
   optional int32 maintenance_priority = 2;

   // If set true, the table's data on disk is not compacted.
   optional bool disable_compaction = 3;
 }

 // The type of a given table. This is useful in determining whether a
 // table/tablet stores user-specified data, as opposed to being a Kudu-internal
 // system table.
 enum TableTypePB {
   // The table stores user data.
   DEFAULT_TABLE = 0;

   // The table stores transaction status management metadata.
   TXN_STATUS_TABLE = 1;
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// Protobufs which are common throughout Kudu.
	//
	// This file may contain protobufs which are persisted on disk
	// as well as sent on the wire. If a particular protobuf is only
	// used as part of the client-server wire protocol, it should go
	// in common/wire_protocol.proto instead. If it is only used within
	// the server(s), it should go in cfile/cfile.proto, server/metadata.proto,
	// etc, as appropriate.
	syntax = "proto2";
	package kudu;

	option java_package = "org.apache.kudu";

	import "kudu/common/row_operations.proto";
	import "kudu/util/block_bloom_filter.proto";
	import "kudu/util/compression/compression.proto";
	import "kudu/util/hash.proto";
	import "kudu/util/pb_util.proto";

	// If you add a new type keep in mind to add it to the end
	// or update AddMapping() functions like the one in key_encoder.cc
	// that have a vector that maps the protobuf tag with the index.
	enum DataType {
	UNKNOWN_DATA = 999;
	UINT8 = 0;
	INT8 = 1;
	UINT16 = 2;
	INT16 = 3;
	UINT32 = 4;
	INT32 = 5;
	UINT64 = 6;
	INT64 = 7;
	STRING = 8;
	BOOL = 9;
	FLOAT = 10;
	DOUBLE = 11;
	BINARY = 12;
	UNIXTIME_MICROS = 13;
	INT128 = 14;
	DECIMAL32 = 15;
	DECIMAL64 = 16;
	DECIMAL128 = 17;
	IS_DELETED = 18; // virtual column; not a real data type
	VARCHAR = 19;
	DATE = 20;
	}

	enum EncodingType {
	UNKNOWN_ENCODING = 999;
	AUTO_ENCODING = 0;
	PLAIN_ENCODING = 1;
	PREFIX_ENCODING = 2;
	// GROUP_VARINT encoding is deprecated and no longer implemented.
	GROUP_VARINT = 3;
	RLE = 4;
	DICT_ENCODING = 5;
	BIT_SHUFFLE = 6;
	}

	// Enums that specify the HMS-related configurations for a Kudu mini-cluster.
	enum HmsMode {
	// No HMS will be started.
	NONE = 0;

	// The HMS will be started, but will not be configured to use the Kudu
	// plugin.
	DISABLE_HIVE_METASTORE = 3;

	// The HMS will be started and configured to use the Kudu plugin, but the
	// Kudu mini-cluster will not be configured to synchronize with it.
	ENABLE_HIVE_METASTORE = 1;

	// The HMS will be started and configured to use the Kudu plugin, and the
	// Kudu mini-cluster will be configured to synchronize with it.
	ENABLE_METASTORE_INTEGRATION = 2;
	};

	// Holds detailed attributes for the column. Only certain fields will be set,
	// depending on the type of the column.
	message ColumnTypeAttributesPB {
	// For decimal columns
	optional int32 precision = 1;
	optional int32 scale = 2;
	// For varchar columns
	optional int32 length = 3;
	}

	// TODO: Differentiate between the schema attributes
	// that are only relevant to the server (e.g.,
	// encoding and compression) and those that also
	// matter to the client.
	message ColumnSchemaPB {
	optional uint32 id = 1;
	required string name = 2;
	required DataType type = 3;
	optional bool is_key = 4 [default = false];
	optional bool is_nullable = 5 [default = false];

	// Default values.
	// NOTE: as far as clients are concerned, there is only one
	// "default value" of a column. The read/write defaults are used
	// internally and should not be exposed by any public client APIs.
	//
	// When passing schemas to the master for create/alter table,
	// specify the default in 'read_default_value'.
	//
	// Contrary to this, when the client opens a table, it will receive
	// both the read and write defaults, but the write default is
	// what should be exposed as the "current" default.
	optional bytes read_default_value = 6;
	optional bytes write_default_value = 7;

	// The following attributes refer to the on-disk storage of the column.
	// They won't always be set, depending on context.
	optional EncodingType encoding = 8 [default=AUTO_ENCODING];
	optional CompressionType compression = 9 [default=DEFAULT_COMPRESSION];
	optional int32 cfile_block_size = 10 [default=0];

	optional ColumnTypeAttributesPB type_attributes = 11;

	// The comment for the column.
	optional string comment = 12;

	optional bool immutable = 13 [default = false];
	}

	message ColumnSchemaDeltaPB {
	optional string name = 1;
	optional string new_name = 2;

	optional bytes default_value = 4;
	optional bool remove_default = 5;

	optional EncodingType encoding = 6;
	optional CompressionType compression = 7;
	optional int32 block_size = 8;

	optional string new_comment = 9;
	}

	message SchemaPB {
	repeated ColumnSchemaPB columns = 1;
	}

	message HostPortPB {
	required string host = 1;
	required uint32 port = 2;
	}

	// The external consistency mode for client requests.
	// This defines how ops and/or sequences of operations that touch
	// several TabletServers, in different machines, can be observed by external
	// clients.
	//
	// Note that ExternalConsistencyMode makes no guarantee on atomicity, i.e.
	// no sequence of operations is made atomic (or transactional) just because
	// an external consistency mode is set.
	// Note also that ExternalConsistencyMode has no implication on the
	// consistency between replicas of the same tablet.
	enum ExternalConsistencyMode {
	UNKNOWN_EXTERNAL_CONSISTENCY_MODE = 0;

	// The response to any write will contain a timestamp.
	// Any further calls from the same client to other servers will update
	// those servers with that timestamp. The user will make sure that the
	// timestamp is propagated through back-channels to other
	// KuduClient's.
	//
	// WARNING: Failure to propagate timestamp information through
	// back-channels will negate any external consistency guarantee under this
	// mode.
	//
	// Example:
	// 1 - Client A executes operation X in Tablet A
	// 2 - Afterwards, Client A executes operation Y in Tablet B
	//
	//
	// Client B may observe the following operation sequences:
	// {}, {X}, {X Y}
	//
	// This is the default mode.
	CLIENT_PROPAGATED = 1;

	// The server will guarantee that each op is externally consistent by making
	// sure that none of its results are visible until every Kudu server agrees
	// that the op is in the past. The client is not obligated to forward
	// timestamp information through back-channels.
	//
	// WARNING: Depending on the clock synchronization state of TabletServers
	// this may imply considerable latency. Moreover operations with
	// COMMIT_WAIT requested external consistency will outright fail if
	// TabletServer clocks are either unsynchronized or synchronized but
	// with a maximum error which surpasses a pre-configured one.
	//
	// Example:
	// - Client A executes operation X in Tablet A
	// - Afterwards, Client A executes operation Y in Tablet B
	//
	//
	// Client B may observe the following operation sequences:
	// {}, {X}, {X Y}
	COMMIT_WAIT = 2;
	};

	// The possible read modes for clients.
	// Clients set these in Scan requests.
	// The server keeps 2 snapshot boundaries:
	// - The earliest snapshot: this corresponds to the earliest kept undo records
	// in the tablet, meaning the current state (Base) can be undone up to
	// this snapshot.
	// - The latest snapshot: This corresponds to the instant beyond which no op
	// will have an earlier timestamp. Usually this corresponds to whatever
	// clock->Now() returns, but can be higher if the client propagates a
	// timestamp (see below).
	enum ReadMode {
	UNKNOWN_READ_MODE = 0;

	// When READ_LATEST is specified the server will execute the read independently
	// of the clock and will always return all visible writes at the time the request
	// was received. This type of read does not return a snapshot timestamp since
	// it might not be repeatable, i.e. a later read executed at the same snapshot
	// timestamp might yield rows that were committed by in-flight ops.
	//
	// This is the default mode.
	READ_LATEST = 1;

	// When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
	// at the required snapshot. If no snapshot is defined the server will take the
	// current time as the snapshot timestamp. Snapshot reads are repeatable, i.e.
	// all future reads at the same timestamp will yield the same rows. This is
	// performed at the expense of waiting for in-flight ops whose timestamp
	// is lower than the snapshot's timestamp to complete.
	//
	// When mixing reads and writes clients that specify COMMIT_WAIT as their
	// external consistency mode and then use the returned write_timestamp
	// to perform snapshot reads are guaranteed that that snapshot time is
	// considered in the past by all servers and no additional action is
	// necessary. Clients using CLIENT_PROPAGATED however must forcibly propagate
	// the timestamps even at read time, so that the server will not generate
	// any more ops before the snapshot requested by the client.
	// The latter option is implemented by allowing the client to specify one or
	// two timestamps, the first one obtained from the previous CLIENT_PROPAGATED
	// write, directly or through back-channels, must be signed and will be
	// checked by the server. The second one, if defined, is the actual snapshot
	// read time. When selecting both the latter must be lower than or equal to
	// the former.
	// TODO implement actually signing the propagated timestamp.
	READ_AT_SNAPSHOT = 2;

	// When READ_YOUR_WRITES is specified, the server will pick a timestamp to use
	// for a server-local snapshot scan subject to the following criteria:
	// (1) It will be higher than the propagated timestamp,
	// (2) It will attempt to minimize latency caused by waiting for outstanding
	// write ops to complete.
	// More specifically, the server will choose the latest timestamp higher than
	// the provided propagated timestamp bound that allows execution of the
	// reads without being blocked by the in-flight ops (however the
	// read can be blocked if the propagated timestamp is higher than some in-flight
	// ops). If no propagated timestamp is provided the server will choose
	// a timestamp such that all ops before it are committed. The chosen
	// timestamp will be returned back to the client as 'snapshot timestamp'. The Kudu
	// client library will use it as the propagated timestamp for subsequent reads
	// to avoid unnecessarily waiting.
	//
	// Reads in this mode are not repeatable: two READ_YOUR_WRITES reads, even if
	// they provide the same propagated timestamp bound, can execute at different
	// timestamps and thus return different results. However, it allows
	// read-your-writes and read-your-reads for each client, as the chosen
	// timestamp must be higher than the one of the last write or read,
	// known from the propagated timestamp.
	READ_YOUR_WRITES = 3;
	}

	// The possible order modes for clients.
	// Clients specify these in new scan requests.
	// Ordered scans are fault-tolerant, and can be retried elsewhere in the case
	// of tablet server failure. However, ordered scans impose additional overhead
	// since the tablet server needs to sort the result rows.
	enum OrderMode {
	UNKNOWN_ORDER_MODE = 0;
	// This is the default order mode.
	UNORDERED = 1;
	ORDERED = 2;
	}

	// Policy with which to choose among multiple replicas.
	enum ReplicaSelection {
	UNKNOWN_REPLICA_SELECTION = 0;
	// Select the LEADER replica.
	LEADER_ONLY = 1;
	// Select the closest replica to the client. Replicas are classified from
	// closest to furthest as follows:
	// - Local replicas
	// - Replicas whose tablet server has the same location as the client
	// - All other replicas
	CLOSEST_REPLICA = 2;
	}

	// The serialized format of a Kudu table partition schema.
	message PartitionSchemaPB {

	// A column identifier for partition schemas. In general, the name will be
	// used when a client creates the table since column IDs are assigned by the
	// master. All other uses of partition schemas will use the numeric column ID.
	message ColumnIdentifierPB {
	oneof identifier {
	int32 id = 1;
	string name = 2;
	}
	}

	message RangeSchemaPB {
	// Column identifiers of columns included in the range. All columns must be
	// a component of the primary key.
	repeated ColumnIdentifierPB columns = 1;
	}

	message HashBucketSchemaPB {
	// Column identifiers of columns included in the hash. Every column must be
	// a component of the primary key.
	repeated ColumnIdentifierPB columns = 1;

	// Number of buckets into which columns will be hashed. Must be at least 2.
	required int32 num_buckets = 2;

	// Seed value for hash calculation. Administrators may set a seed value
	// on a per-table basis in order to randomize the mapping of rows to
	// buckets. Setting a seed provides some amount of protection against denial
	// of service attacks when the hash bucket columns contain user provided
	// input.
	optional uint32 seed = 3;

	// The hash algorithm to use for calculating the hash bucket.
	// NOTE: this is not used yet -- don't expect setting it to have any effect
	optional HashAlgorithm hash_algorithm = 4;
	}

	// This data structure represents a range partition with a custom hash schema.
	message RangeWithHashSchemaPB {
	// Row operations containing the lower and upper range bound for the range.
	optional RowOperationsPB range_bounds = 1;
	// Hash schema for the range.
	repeated HashBucketSchemaPB hash_schema = 2;
	}

	// Table-wide hash schema. Hash schema for a particular range may be
	// overriden by corresponding element in 'custom_hash_schema_ranges'.
	repeated HashBucketSchemaPB hash_schema = 1;

	// Range schema to partition the key space into ranges.
	optional RangeSchemaPB range_schema = 2;

	// Two fields were deprecated in favor of using 'custom_hash_schema_ranges'.
	reserved 3;
	reserved 4;

	// If the 'custom_hash_schema_ranges' field is empty, the table-wide hash
	// schema specified by the 'hash_schema' field is used for all the ranges
	// of the table. Otherwise, particular ranges have their hash schema
	// as specified by corresponding elements in 'custom_hash_schema_ranges'.
	repeated RangeWithHashSchemaPB custom_hash_schema_ranges = 5;
	}

	// The serialized format of a Kudu table partition.
	message PartitionPB {
	// The hash buckets of the partition. The number of hash buckets must match
	// the number of hash dimensions in the partition's schema.
	repeated int32 hash_buckets = 1 [packed = true];
	// The encoded start partition key (inclusive).
	optional bytes partition_key_start = 2;
	// The encoded end partition key (exclusive).
	optional bytes partition_key_end = 3;
	}

	// A predicate that can be applied on a Kudu column.
	message ColumnPredicatePB {
	// The predicate column name.
	optional string column = 1;

	message Range {

	// Bounds should be encoded as follows:
	// - STRING/BINARY values: simply the exact string value for the bound.
	// - other type: the canonical x86 in-memory representation -- eg for
	// uint32s, a little-endian value.
	//
	// Note that this predicate type should not be used for NULL data --
	// NULL is defined to neither be greater than or less than other values
	// for the comparison operator.

	// The inclusive lower bound.
	optional bytes lower = 1 [(kudu.REDACT) = true];

	// The exclusive upper bound.
	optional bytes upper = 2 [(kudu.REDACT) = true];
	}

	message Equality {
	// The inclusive lower bound. See comment in Range for notes on the
	// encoding.
	optional bytes value = 1 [(kudu.REDACT) = true];
	}

	message InList {
	// A list of values for the field. See comment in Range for notes on
	// the encoding.
	repeated bytes values = 1 [(kudu.REDACT) = true];
	}

	message IsNotNull {}

	message IsNull {}

	message InBloomFilter {
	// A list of bloom filters for the field.
	repeated BlockBloomFilterPB bloom_filters = 1;

	// lower and upper are optional for InBloomFilter.
	// When using both InBloomFilter and Range predicate for the same column the
	// merged result can be InBloomFilter within specified range.
	//
	// The inclusive lower bound.
	optional bytes lower = 2 [(kudu.REDACT) = true];

	// The exclusive upper bound.
	optional bytes upper = 3 [(kudu.REDACT) = true];
	}

	oneof predicate {
	Range range = 2;
	Equality equality = 3;
	IsNotNull is_not_null = 4;
	InList in_list = 5;
	IsNull is_null = 6;
	InBloomFilter in_bloom_filter = 7;
	}
	}

	// The primary key range of a Kudu tablet.
	message KeyRangePB {
	// Encoded primary key to begin scanning at (inclusive).
	optional bytes start_primary_key = 1 [(kudu.REDACT) = true];
	// Encoded primary key to stop scanning at (exclusive).
	optional bytes stop_primary_key = 2 [(kudu.REDACT) = true];
	// Number of bytes in chunk.
	required uint64 size_bytes_estimates = 3;
	}

	message TableExtraConfigPB {
	// Number of seconds to retain history for tablets in this table,
	// including history required to perform diff scans and incremental
	// backups. Reads initiated at a snapshot that is older than this
	// age will be rejected. Equivalent to --tablet_history_max_age_sec.
	optional int32 history_max_age_sec = 1;

	// Priority level of a table for maintenance, it will be clamped into
	// range [-FLAGS_max_priority_range, FLAGS_max_priority_range] when
	// calculate maintenance priority score.
	optional int32 maintenance_priority = 2;

	// If set true, the table's data on disk is not compacted.
	optional bool disable_compaction = 3;
	}

	// The type of a given table. This is useful in determining whether a
	// table/tablet stores user-specified data, as opposed to being a Kudu-internal
	// system table.
	enum TableTypePB {
	// The table stores user data.
	DEFAULT_TABLE = 0;

	// The table stores transaction status management metadata.
	TXN_STATUS_TABLE = 1;
	}