blob: a3505bb0799af2c5b763af617f05a397f0eddd99 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// Protobufs which are common throughout Kudu.
//
// This file may contain protobufs which are persisted on disk
// as well as sent on the wire. If a particular protobuf is only
// used as part of the client-server wire protocol, it should go
// in common/wire_protocol.proto instead. If it is only used within
// the server(s), it should go in cfile/cfile.proto, server/metadata.proto,
// etc, as appropriate.
package kudu;
option java_package = "org.kududb";
// If you add a new type keep in mind to add it to the end
// or update AddMapping() functions like the one in key_encoder.cc
// that have a vector that maps the protobuf tag with the index.
enum DataType {
UNKNOWN_DATA = 999;
UINT8 = 0;
INT8 = 1;
UINT16 = 2;
INT16 = 3;
UINT32 = 4;
INT32 = 5;
UINT64 = 6;
INT64 = 7;
STRING = 8;
BOOL = 9;
FLOAT = 10;
DOUBLE = 11;
BINARY = 12;
TIMESTAMP = 13;
}
enum EncodingType {
UNKNOWN_ENCODING = 999;
AUTO_ENCODING = 0;
PLAIN_ENCODING = 1;
PREFIX_ENCODING = 2;
GROUP_VARINT = 3;
RLE = 4;
DICT_ENCODING = 5;
BIT_SHUFFLE = 6;
}
enum CompressionType {
UNKNOWN_COMPRESSION = 999;
DEFAULT_COMPRESSION = 0;
NO_COMPRESSION = 1;
SNAPPY = 2;
LZ4 = 3;
ZLIB = 4;
}
// TODO: Differentiate between the schema attributes
// that are only relevant to the server (e.g.,
// encoding and compression) and those that also
// matter to the client.
message ColumnSchemaPB {
optional uint32 id = 1;
required string name = 2;
required DataType type = 3;
optional bool is_key = 4 [default = false];
optional bool is_nullable = 5 [default = false];
optional bytes read_default_value = 6;
optional bytes write_default_value = 7;
// The following attributes refer to the on-disk storage of the column.
// They won't always be set, depending on context.
optional EncodingType encoding = 8 [default=AUTO_ENCODING];
optional CompressionType compression = 9 [default=DEFAULT_COMPRESSION];
optional int32 cfile_block_size = 10 [default=0];
}
message SchemaPB {
repeated ColumnSchemaPB columns = 1;
}
message HostPortPB {
required string host = 1;
required uint32 port = 2;
}
// The external consistency mode for client requests.
// This defines how transactions and/or sequences of operations that touch
// several TabletServers, in different machines, can be observed by external
// clients.
//
// Note that ExternalConsistencyMode makes no guarantee on atomicity, i.e.
// no sequence of operations is made atomic (or transactional) just because
// an external consistency mode is set.
// Note also that ExternalConsistencyMode has no implication on the
// consistency between replicas of the same tablet.
enum ExternalConsistencyMode {
UNKNOWN_EXTERNAL_CONSISTENCY_MODE = 0;
// The response to any write will contain a timestamp.
// Any further calls from the same client to other servers will update
// those servers with that timestamp. The user will make sure that the
// timestamp is propagated through back-channels to other
// KuduClient's.
//
// WARNING: Failure to propagate timestamp information through
// back-channels will negate any external consistency guarantee under this
// mode.
//
// Example:
// 1 - Client A executes operation X in Tablet A
// 2 - Afterwards, Client A executes operation Y in Tablet B
//
//
// Client B may observe the following operation sequences:
// {}, {X}, {X Y}
//
// This is the default mode.
CLIENT_PROPAGATED = 1;
// The server will guarantee that each transaction is externally
// consistent by making sure that none of its results are visible
// until every Kudu server agrees that the transaction is in the past.
// The client is not obligated to forward timestamp information
// through back-channels.
//
// WARNING: Depending on the clock synchronization state of TabletServers
// this may imply considerable latency. Moreover operations with
// COMMIT_WAIT requested external consistency will outright fail if
// TabletServer clocks are either unsynchronized or synchronized but
// with a maximum error which surpasses a pre-configured one.
//
// Example:
// - Client A executes operation X in Tablet A
// - Afterwards, Client A executes operation Y in Tablet B
//
//
// Client B may observe the following operation sequences:
// {}, {X}, {X Y}
COMMIT_WAIT = 2;
};
// The possible read modes for clients.
// Clients set these in Scan requests.
// The server keeps 2 snapshot boundaries:
// - The earliest snapshot: this corresponds to the earliest kept undo records
// in the tablet, meaning the current state (Base) can be undone up to
// this snapshot.
// - The latest snapshot: This corresponds to the instant beyond which no
// no transaction will have an earlier timestamp. Usually this corresponds
// to whatever clock->Now() returns, but can be higher if the client propagates
// a timestamp (see below).
enum ReadMode {
UNKNOWN_READ_MODE = 0;
// When READ_LATEST is specified the server will execute the read independently
// of the clock and will always return all visible writes at the time the request
// was received. This type of read does not return a snapshot timestamp since
// it might not be repeatable, i.e. a later read executed at the same snapshot
// timestamp might yield rows that were committed by in-flight transactions.
//
// This is the default mode.
READ_LATEST = 1;
// When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
// at the required snapshot. If no snapshot is defined the server will take the
// current time as the snapshot timestamp. Snapshot reads are repeatable, i.e.
// all future reads at the same timestamp will yield the same rows. This is
// performed at the expense of waiting for in-flight transactions whose timestamp
// is lower than the snapshot's timestamp to complete.
//
// When mixing reads and writes clients that specify COMMIT_WAIT as their
// external consistency mode and then use the returned write_timestamp to
// to perform snapshot reads are guaranteed that that snapshot time is
// considered in the past by all servers and no additional action is
// necessary. Clients using CLIENT_PROPAGATED however must forcibly propagate
// the timestamps even at read time, so that the server will not generate
// any more transactions before the snapshot requested by the client.
// The latter option is implemented by allowing the client to specify one or
// two timestamps, the first one obtained from the previous CLIENT_PROPAGATED
// write, directly or through back-channels, must be signed and will be
// checked by the server. The second one, if defined, is the actual snapshot
// read time. When selecting both the latter must be lower than or equal to
// the former.
// TODO implement actually signing the propagated timestamp.
READ_AT_SNAPSHOT = 2;
}
// The possible order modes for clients.
// Clients specify these in new scan requests.
// Ordered scans are fault-tolerant, and can be retried elsewhere in the case
// of tablet server failure. However, ordered scans impose additional overhead
// since the tablet server needs to sort the result rows.
enum OrderMode {
UNKNOWN_ORDER_MODE = 0;
// This is the default order mode.
UNORDERED = 1;
ORDERED = 2;
}
// The serialized format of a Kudu table partition schema.
message PartitionSchemaPB {
// A column identifier for partition schemas. In general, the name will be
// used when a client creates the table since column IDs are assigned by the
// master. All other uses of partition schemas will use the numeric column ID.
message ColumnIdentifierPB {
oneof identifier {
int32 id = 1;
string name = 2;
}
}
message RangeSchemaPB {
// Column identifiers of columns included in the range. All columns must be
// a component of the primary key.
repeated ColumnIdentifierPB columns = 1;
}
message HashBucketSchemaPB {
// Column identifiers of columns included in the hash. Every column must be
// a component of the primary key.
repeated ColumnIdentifierPB columns = 1;
// Number of buckets into which columns will be hashed. Must be at least 2.
required int32 num_buckets = 2;
// Seed value for hash calculation. Administrators may set a seed value
// on a per-table basis in order to randomize the mapping of rows to
// buckets. Setting a seed provides some amount of protection against denial
// of service attacks when the hash bucket columns contain user provided
// input.
optional uint32 seed = 3;
enum HashAlgorithm {
UNKNOWN = 0;
MURMUR_HASH_2 = 1;
}
// The hash algorithm to use for calculating the hash bucket.
optional HashAlgorithm hash_algorithm = 4;
}
repeated HashBucketSchemaPB hash_bucket_schemas = 1;
optional RangeSchemaPB range_schema = 2;
}
// The serialized format of a Kudu table partition.
message PartitionPB {
// The hash buckets of the partition. The number of hash buckets must match
// the number of hash bucket components in the partition's schema.
repeated int32 hash_buckets = 1 [packed = true];
// The encoded start partition key (inclusive).
optional bytes partition_key_start = 2;
// The encoded end partition key (exclusive).
optional bytes partition_key_end = 3;
}
// A predicate that can be applied on a Kudu column.
message ColumnPredicatePB {
// The predicate column name.
optional string column = 1;
message Range {
// Bounds should be encoded as follows:
// - STRING/BINARY values: simply the exact string value for the bound.
// - other type: the canonical x86 in-memory representation -- eg for
// uint32s, a little-endian value.
//
// Note that this predicate type should not be used for NULL data --
// NULL is defined to neither be greater than or less than other values
// for the comparison operator. We will eventually add a special
// predicate type for null-ness.
// The inclusive lower bound.
optional bytes lower = 1;
// The exclusive upper bound.
optional bytes upper = 2;
}
message Equality {
// The inclusive lower bound. See comment in Range for notes on the
// encoding.
optional bytes value = 1;
}
message IsNotNull {}
oneof predicate {
Range range = 2;
Equality equality = 3;
IsNotNull is_not_null = 4;
}
}