blob: 01e5cb6a5d12e2e56449fb758c8c6866e20bb04f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
import "kv.proto";
import "storage.proto";
package bookkeeper.proto.kv.rpc;
option java_multiple_files = true;
option java_package = "org.apache.bookkeeper.stream.proto.kv.rpc";
// Routing Header added to identify the range & storage container information (used in the stream store)
message RoutingHeader {
// stream id
int64 stream_id = 1;
// range id
int64 range_id = 2;
// routing key
bytes r_key = 3;
}
service TableService {
// Range gets the keys in the range from the key-value store.
// NOT supported yet.
rpc Range(RangeRequest) returns (RangeResponse) {}
// Put puts the given key into the key-value store.
// A put request increments the revision of the key-value store
// and generates one event in the event history.
rpc Put(PutRequest) returns (PutResponse) {}
// DeleteRange deletes the given range from the key-value store.
// A delete request increments the revision of the key-value store
// and generates a delete event in the event history for every deleted key.
rpc Delete(DeleteRangeRequest) returns (DeleteRangeResponse) {}
// Txn processes multiple requests in a single transaction.
// A txn request increments the revision of the key-value store
// and generates events with the same revision for every completed request.
// It is not allowed to modify the same key several times within one txn.
rpc Txn(TxnRequest) returns (TxnResponse) {}
// Increment increments the amount associated with the keys
rpc Increment(IncrementRequest) returns (IncrementResponse) {}
}
message ResponseHeader {
// Status Code
storage.StatusCode code = 1;
// routing header
RoutingHeader routing_header = 99;
}
message RangeRequest {
enum SortOrder {
NONE = 0; // default, no sorting
ASCEND = 1; // lowest target value first
DESCEND = 2; // highest target value first
}
enum SortTarget {
KEY = 0;
VERSION = 1;
CREATE = 2;
MOD = 3;
VALUE = 4;
}
// key is the first key for the range. If range_end is not given, the request only looks up key.
bytes key = 1;
// range_end is the upper bound on the requested range [key, range_end).
// If range_end is '\0', the range is all keys >= key.
// If range_end is key plus one (e.g., "aa"+1 == "ab", "a\xff"+1 == "b"),
// then the range request gets all keys prefixed with key.
// If both key and range_end are '\0', then the range request returns all keys.
bytes range_end = 2;
// limit is a limit on the number of keys returned for the request.
int64 limit = 3;
// revision is the point-in-time of the key-value store to use for the range.
// If revision is less or equal to zero, the range is over the newest key-value store.
// If the revision has been compacted, ErrCompacted is returned as a response.
int64 revision = 4;
// sort_order is the order for returned sorted results.
SortOrder sort_order = 5;
// sort_target is the key-value field to use for sorting.
SortTarget sort_target = 6;
// serializable sets the range request to use serializable member-local reads.
// Range requests are linearizable by default; linearizable requests have higher
// latency and lower throughput than serializable requests but reflect the current
// consensus of the cluster. For better performance, in exchange for possible stale reads,
// a serializable range request is served locally without needing to reach consensus
// with other nodes in the cluster.
bool serializable = 7;
// keys_only when set returns only the keys and not the values.
bool keys_only = 8;
// count_only when set returns only the count of the keys in the range.
bool count_only = 9;
// min_mod_revision is the lower bound for returned key mod revisions; all keys with
// lesser mod revisions will be filtered away.
int64 min_mod_revision = 10;
// max_mod_revision is the upper bound for returned key mod revisions; all keys with
// greater mod revisions will be filtered away.
int64 max_mod_revision = 11;
// min_create_revision is the lower bound for returned key create revisions; all keys with
// lesser create trevisions will be filtered away.
int64 min_create_revision = 12;
// max_create_revision is the upper bound for returned key create revisions; all keys with
// greater create revisions will be filtered away.
int64 max_create_revision = 13;
// header
RoutingHeader header = 99;
}
message RangeResponse {
ResponseHeader header = 1;
// kvs is the list of key-value pairs matched by the range request.
// kvs is empty when count is requested.
repeated kv.KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
// count is set to the number of keys within the range when requested.
int64 count = 4;
}
message PutRequest {
// key is the key, in bytes, to put into the key-value store.
bytes key = 1;
// value is the value, in bytes, to associate with the key in the key-value store.
bytes value = 2;
// lease is the lease ID to associate with the key in the key-value store. A lease
// value of 0 indicates no lease.
int64 lease = 3;
// If prev_kv is set, etcd gets the previous key-value pair before changing it.
// The previous key-value pair will be returned in the put response.
bool prev_kv = 4;
// If ignore_value is set, etcd updates the key using its current value.
// Returns an error if the key does not exist.
bool ignore_value = 5;
// If ignore_lease is set, etcd updates the key using its current lease.
// Returns an error if the key does not exist.
bool ignore_lease = 6;
// header
RoutingHeader header = 99;
int64 expected_version = 100;
}
message PutResponse {
ResponseHeader header = 1;
// if prev_kv is set in the request, the previous key-value pair will be returned.
kv.KeyValue prev_kv = 2;
}
message IncrementRequest {
// key is the key, in bytes, to increment the value
bytes key = 1;
// amount is a long number to increment
int64 amount = 2;
// return the total amount after increment
bool get_total = 3;
// header
RoutingHeader header = 99;
}
message IncrementResponse {
ResponseHeader header = 1;
// if get_total is set in the request, the total amount will be returned after this increment.
int64 total_amount = 2;
}
message DeleteRangeRequest {
// key is the first key to delete in the range.
bytes key = 1;
// range_end is the key following the last key to delete for the range [key, range_end).
// If range_end is not given, the range is defined to contain only the key argument.
// If range_end is one bit larger than the given key, then the range is all
// the all keys with the prefix (the given key).
// If range_end is '\0', the range is all keys greater than or equal to the key argument.
bytes range_end = 2;
// If prev_kv is set, etcd gets the previous key-value pairs before deleting it.
// The previous key-value pairs will be returned in the delte response.
bool prev_kv = 3;
// header
RoutingHeader header = 99;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
// deleted is the number of keys deleted by the delete range request.
int64 deleted = 2;
// if prev_kv is set in the request, the previous key-value pairs will be returned.
repeated kv.KeyValue prev_kvs = 3;
}
message RequestOp {
// request is a union of request types accepted by a transaction.
oneof request {
RangeRequest request_range = 1;
PutRequest request_put = 2;
DeleteRangeRequest request_delete_range = 3;
}
}
message ResponseOp {
// response is a union of response types returned by a transaction.
oneof response {
RangeResponse response_range = 1;
PutResponse response_put = 2;
DeleteRangeResponse response_delete_range = 3;
}
}
message Compare {
enum CompareResult {
EQUAL = 0;
GREATER = 1;
LESS = 2;
NOT_EQUAL = 3;
}
enum CompareTarget {
VERSION = 0;
CREATE = 1;
MOD = 2;
VALUE= 3;
}
// result is logical comparison operation for this comparison.
CompareResult result = 1;
// target is the key-value field to inspect for the comparison.
CompareTarget target = 2;
// key is the subject key for the comparison operation.
bytes key = 3;
oneof target_union {
// version is the version of the given key
int64 version = 4;
// create_revision is the creation revision of the given key
int64 create_revision = 5;
// mod_revision is the last modified revision of the given key.
int64 mod_revision = 6;
// value is the value of the given key, in bytes.
bytes value = 7;
}
}
// From google paxosdb paper:
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
// operations except for iteration are implemented as a single call to MultiOp. A MultiOp is applied atomically
// and consists of three components:
// 1. A list of tests called guard. Each test in guard checks a single entry in the database. It may check
// for the absence or presence of a value, or compare with a given value. Two different tests in the guard
// may apply to the same or different entries in the database. All tests in the guard are applied and
// MultiOp returns the results. If all tests are true, MultiOp executes t op (see item 2 below), otherwise
// it executes f op (see item 3 below).
// 2. A list of database operations called t op. Each operation in the list is either an insert, delete, or
// lookup operation, and applies to a single database entry. Two different operations in the list may apply
// to the same or different entries in the database. These operations are executed
// if guard evaluates to
// true.
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
message TxnRequest {
// compare is a list of predicates representing a conjunction of terms.
// If the comparisons succeed, then the success requests will be processed in order,
// and the response will contain their respective responses in order.
// If the comparisons fail, then the failure requests will be processed in order,
// and the response will contain their respective responses in order.
repeated Compare compare = 1;
// success is a list of requests which will be applied when compare evaluates to true.
repeated RequestOp success = 2;
// failure is a list of requests which will be applied when compare evaluates to false.
repeated RequestOp failure = 3;
// header
RoutingHeader header = 99;
}
message TxnResponse {
ResponseHeader header = 1;
// succeeded is set to true if the compare evaluated to true or false otherwise.
bool succeeded = 2;
// responses is a list of responses corresponding to the results from applying
// success if succeeded is true or failure if succeeded is false.
repeated ResponseOp responses = 3;
}