| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| syntax = "proto2"; |
| package kudu.tserver; |
| |
| option java_package = "org.apache.kudu.tserver"; |
| |
| import "kudu/common/common.proto"; |
| import "kudu/common/row_operations.proto"; |
| import "kudu/common/wire_protocol.proto"; |
| import "kudu/consensus/metadata.proto"; |
| import "kudu/security/token.proto"; |
| import "kudu/tablet/tablet.proto"; |
| import "kudu/util/pb_util.proto"; |
| |
| // Tablet-server specific errors use this protobuf. |
| message TabletServerErrorPB { |
| enum Code { |
| // An error which has no more specific error code. |
| // The code and message in 'status' may reveal more details. |
| // |
| // RPCs should avoid returning this, since callers will not be |
| // able to easily parse the error. |
| UNKNOWN_ERROR = 1; |
| |
| // The schema provided for a request was not well-formed. |
| INVALID_SCHEMA = 2; |
| |
| // The row data provided for a request was not well-formed. |
| INVALID_ROW_BLOCK = 3; |
| |
| // The mutations or mutation keys provided for a request were |
| // not well formed. |
| INVALID_MUTATION = 4; |
| |
| // The schema provided for a request didn't match the actual |
| // schema of the tablet. |
| MISMATCHED_SCHEMA = 5; |
| |
| // The requested tablet_id is not currently hosted on this server. |
| TABLET_NOT_FOUND = 6; |
| |
| // A request was made against a scanner ID that was either never |
| // created or has expired. |
| SCANNER_EXPIRED = 7; |
| |
| // An invalid scan was specified. Examples of invalid scan requests include |
| // invalid or unimplemented combinations of scan options and incorrect |
| // sizes for values passed as predicates. |
| INVALID_SCAN_SPEC = 8; |
| |
| // The provided configuration was not well-formed and/or |
| // had a sequence number that was below the current config. |
| INVALID_CONFIG = 9; |
| |
| // On a create tablet request, signals that the tablet already exists. |
| TABLET_ALREADY_EXISTS = 10; |
| |
| // If the tablet has a newer schema than the requested one the "alter" |
| // request will be rejected with this error. |
| TABLET_HAS_A_NEWER_SCHEMA = 11; |
| |
| // The tablet is hosted on this server, but not in RUNNING state. |
| TABLET_NOT_RUNNING = 12; |
| |
| // Client requested a snapshot read but the snapshot was invalid. The |
| // client may be able to respond to this error by choosing a different |
| // start or end timestamp for the snapshot. |
| INVALID_SNAPSHOT = 13; |
| |
| // An invalid scan call sequence ID was specified. |
| INVALID_SCAN_CALL_SEQ_ID = 14; |
| |
| // This tserver is not the leader of the consensus configuration. |
| NOT_THE_LEADER = 15; |
| |
| // The destination UUID in the request does not match this server. |
| WRONG_SERVER_UUID = 16; |
| |
| // The compare-and-swap specified by an atomic RPC operation failed. |
| CAS_FAILED = 17; |
| |
| // The requested operation is already inprogress, e.g. TabletCopy. |
| ALREADY_INPROGRESS = 18; |
| |
| // The request is throttled. |
| THROTTLED = 19; |
| |
| // The tablet needs to be evicted and reassigned. |
| TABLET_FAILED = 20; |
| |
| // The request is disallowed for the given user. |
| NOT_AUTHORIZED = 21; |
| |
| // The requested transaction is not in the appropriate state. |
| TXN_ILLEGAL_STATE = 22; |
| |
| // The requested transaction participant op was already applied. |
| TXN_OP_ALREADY_APPLIED = 23; |
| |
| // The requested transaction needs to be aborted for deadlock prevention. |
| TXN_LOCKED_ABORT = 24; |
| |
| // The requested transaction participant op or write op needs to be |
| // retried, because the required lock is held by another transaction. |
| TXN_LOCKED_RETRY_OP = 25; |
| } |
| |
| // The error code. |
| required Code code = 1 [ default = UNKNOWN_ERROR ]; |
| |
| // The Status object for the error. This will include a textual |
| // message that may be more useful to present in log messages, etc, |
| // though its error code is less specific. |
| required AppStatusPB status = 2; |
| } |
| |
| |
| message PingRequestPB { |
| } |
| |
| message PingResponsePB { |
| } |
| |
| // A batched set of insert/mutate requests. |
| message WriteRequestPB { |
| required bytes tablet_id = 1; |
| |
| // The schema as seen by the client. This may be out-of-date, in which case |
| // it will be projected to the current schema automatically, with defaults/NULLs |
| // being filled in. |
| optional SchemaPB schema = 2; |
| |
| // Operations to perform (insert/update/delete) |
| optional RowOperationsPB row_operations = 3; |
| |
| // The required consistency mode for this write. |
| optional ExternalConsistencyMode external_consistency_mode = 4 [default = CLIENT_PROPAGATED]; |
| |
| // A timestamp obtained by the client from a previous request. |
| // TODO crypto sign this and propagate the signature along with |
| // the timestamp. |
| optional fixed64 propagated_timestamp = 5; |
| |
| // An authorization token with which to authorize this request. |
| optional security.SignedTokenPB authz_token = 6; |
| |
| // The transaction ID associated with this write request, if any. |
| optional int64 txn_id = 7; |
| } |
| |
| message WriteResponsePB { |
| // If the entire WriteResponsePB request failed, the error status that |
| // caused the failure. This type of error is triggered for |
| // cases such as the tablet not being on this server, or the |
| // schema not matching. If any error specific to a given row |
| // occurs, this error will be recorded in per_row_errors below, |
| // even if all rows failed. |
| optional TabletServerErrorPB error = 1; |
| |
| // If errors occurred with particular row operations, then the errors |
| // for those operations will be passed back in 'per_row_errors'. |
| message PerRowErrorPB { |
| // The index of the row in the incoming batch. |
| required int32 row_index = 1; |
| // The error that occurred. |
| required AppStatusPB error = 2; |
| } |
| repeated PerRowErrorPB per_row_errors = 2; |
| |
| // The timestamp chosen by the server for this write. |
| // TODO KUDU-611 propagate timestamps with server signature. |
| optional fixed64 timestamp = 3; |
| |
| // The write operation metrics of this RPC. |
| // This metrics contains the number of successful/unsuccessful operation |
| // ('successful_inserts' to 'delete_ignore_errors' fields in ResourceMetricsPB message) |
| // related to the associated write request. |
| // Additionally, contains 'commit_wait_duration_usec' metric if the responding server is |
| // a LEADER and the request external_consistency_mode is COMMIT_WAIT. |
| optional ResourceMetricsPB resource_metrics = 4; |
| } |
| |
| // A list tablets request |
| message ListTabletsRequestPB { |
| // Whether the server should include schema information in the response. |
| // These fields can be relatively large, so not including it can make this call |
| // less heavy-weight. |
| optional bool need_schema_info = 1 [default = true]; |
| } |
| |
| // A list tablets response |
| message ListTabletsResponsePB { |
| optional TabletServerErrorPB error = 1; |
| |
| message StatusAndSchemaPB { |
| required tablet.TabletStatusPB tablet_status = 1; |
| |
| // 'schema' and 'partition_schema' will only be included if the original request |
| // set 'need_schema_info'. |
| optional SchemaPB schema = 2; |
| optional PartitionSchemaPB partition_schema = 3; |
| optional uint32 schema_version = 4; |
| optional consensus.RaftPeerPB.Role role = 5; |
| } |
| |
| repeated StatusAndSchemaPB status_and_schema = 2; |
| } |
| |
| // DEPRECATED: Use ColumnPredicatePB |
| // |
| // A range predicate on one of the columns in the underlying |
| // data. |
| message ColumnRangePredicatePB { |
| required ColumnSchemaPB column = 1; |
| |
| // These bounds should be encoded as follows: |
| // - STRING values: simply the exact string value for the bound. |
| // - other type: the canonical x86 in-memory representation -- eg for |
| // uint32s, a little-endian value. |
| // |
| // Note that this predicate type should not be used for NULL data -- |
| // NULL is defined to neither be greater than or less than other values |
| // for the comparison operator. We will eventually add a special |
| // predicate type for null-ness. |
| // |
| // Both bounds are inclusive. |
| optional bytes lower_bound = 2 [(kudu.REDACT) = true]; |
| optional bytes inclusive_upper_bound = 3 [(kudu.REDACT) = true]; |
| } |
| |
| // List of predicates used by the Java client. Will rapidly evolve into something more reusable |
| // as a way to pass scanner configurations. |
| message ColumnRangePredicateListPB { |
| repeated ColumnRangePredicatePB range_predicates = 1; |
| } |
| |
| enum RowFormatFlags { |
| NO_FLAGS = 0; |
| PAD_UNIX_TIME_MICROS_TO_16_BYTES = 1; |
| |
| // Return a ColumnarRowBlockPB instead of RowwiseRowBlockPB. |
| // Incompatible with PAD_UNIX_TIME_MICROS_TO_16_BYTES. |
| COLUMNAR_LAYOUT = 2; |
| } |
| |
| message NewScanRequestPB { |
| // The tablet to scan. |
| required bytes tablet_id = 1; |
| |
| // The maximum number of rows to scan with the new scanner. |
| // |
| // The scanner will automatically stop yielding results and close itself |
| // after reaching this number of result rows. |
| optional uint64 limit = 2; |
| |
| // DEPRECATED: use column_predicates field. |
| // |
| // Any column range predicates to enforce. |
| repeated ColumnRangePredicatePB DEPRECATED_range_predicates = 3; |
| |
| // Column predicates to enforce. |
| repeated ColumnPredicatePB column_predicates = 13; |
| |
| // Encoded primary key to begin scanning at (inclusive). |
| optional bytes start_primary_key = 8 [(kudu.REDACT) = true]; |
| // Encoded primary key to stop scanning at (exclusive). |
| optional bytes stop_primary_key = 9 [(kudu.REDACT) = true]; |
| |
| // Which columns to select. |
| // if this is an empty list, no data will be returned, but the num_rows |
| // field of the returned RowBlock will indicate how many rows passed |
| // the predicates. Note that in some cases, the scan may still require |
| // multiple round-trips, and the caller must aggregate the counts. |
| repeated ColumnSchemaPB projected_columns = 4; |
| |
| // The read mode for this scan request. |
| // See common.proto for further information about read modes. |
| optional ReadMode read_mode = 5 [default = READ_LATEST]; |
| |
| // The start timestamp of a diff scan. If set, 'snap_timestamp' must also be |
| // specified and the read mode must be set to READ_AT_SNAPSHOT. Specifying |
| // this field turns the scan into a diff scan, where all changes between the |
| // start and end timestamp are returned. |
| // |
| // Diff Scans |
| // ========== |
| // |
| // When starting a diff scan, a column of type IS_DELETED (known as a virtual |
| // column) must also be included in the scan projection schema. The results |
| // of a diff scan include all values in the projection. |
| // Any row deleted between the start and end timestamps will have its |
| // IS_DELETED column set to true. In this case, any non-primary key column |
| // values in the result are undefined (they are not guaranteed to be the |
| // latest value before the row was deleted) and should be ignored. If the |
| // IS_DELETED column is set to false, then the row was inserted or updated |
| // betweeen the start and end timestamps. There is currently no way to tell |
| // whether a row returned from a diff scan with the IS_DELETED column set to |
| // false was included with the results because it was inserted or because it |
| // was updated. |
| optional fixed64 snap_start_timestamp = 16; |
| |
| // The requested snapshot timestamp. This is only used when the read mode is |
| // set to READ_AT_SNAPSHOT. When 'snap_start_timestamp' is specified then |
| // this is the "end" timestamp of a diff scan. |
| optional fixed64 snap_timestamp = 6; |
| |
| // Sent by clients which previously executed CLIENT_PROPAGATED writes. |
| // This updates the server's time so that no op will be assigned |
| // a timestamp lower than or equal to 'previous_known_timestamp' |
| optional fixed64 propagated_timestamp = 7; |
| |
| // Whether data blocks will be cached when read from the files or discarded after use. |
| // Disable this to lower cache churn when doing large scans. |
| optional bool cache_blocks = 10 [default = true]; |
| |
| // Whether to order the returned rows by primary key. |
| // This is used for scanner fault-tolerance. |
| optional OrderMode order_mode = 11 [default = UNORDERED]; |
| |
| // If retrying a scan, the final primary key retrieved in the previous scan |
| // attempt. If set, this will take precedence over the `start_primary_key` |
| // field, and functions as an exclusive start primary key. |
| optional bytes last_primary_key = 12 [(kudu.REDACT) = true]; |
| |
| // Row format flags. |
| // |
| // The client may pass "row format modifier" flags that change the way the server encodes |
| // the returned row data in some way. Only on/off modifiers are supported, which are encoded |
| // as a bitset in this uint64. |
| // |
| // The default value corresponds to RowFormatFlags::NO_FLAGS, which can't be set |
| // as the actual default since the types differ. |
| optional uint64 row_format_flags = 14 [default = 0]; |
| |
| // An authorization token with which to authorize this request. |
| optional security.SignedTokenPB authz_token = 15; |
| } |
| |
| // A scan request. Initially, it should specify a scan. Later on, you |
| // can use the scanner id returned to fetch result batches with a different |
| // scan request. |
| // |
| // The scanner will remain open if there are more results, and it's not |
| // asked to be closed explicitly. Some errors on the Tablet Server may |
| // close the scanner automatically if the scanner state becomes |
| // inconsistent. |
| // |
| // Clients may choose to retry scan requests that fail to complete (due to, for |
| // example, a timeout or network error). If a scan request completes with an |
| // error result, the scanner should be closed by the client. |
| // |
| // You can fetch the results and ask the scanner to be closed to save |
| // a trip if you are not interested in remaining results. |
| // |
| // This is modeled somewhat after HBase's scanner API. |
| message ScanRequestPB { |
| // If continuing an existing scan, then you must set scanner_id. |
| // Otherwise, you must set 'new_scan_request'. |
| optional bytes scanner_id = 1; |
| optional NewScanRequestPB new_scan_request = 2; |
| |
| // The sequence ID of this call. The sequence ID should start at 0 |
| // with the request for a new scanner, and after each successful request, |
| // the client should increment it by 1. When retrying a request, the client |
| // should _not_ increment this value. If the server detects that the client |
| // missed a chunk of rows from the middle of a scan, it will respond with an |
| // error. |
| optional uint32 call_seq_id = 3; |
| |
| // The maximum number of bytes to send in the response. |
| // This is a hint, not a requirement: the server may send |
| // arbitrarily fewer or more bytes than requested. |
| optional uint32 batch_size_bytes = 4; |
| |
| // If set, the server will close the scanner after responding to |
| // this request, regardless of whether all rows have been delivered. |
| // In order to simply close a scanner without selecting any rows, you |
| // may set batch_size_bytes to 0 in conjunction with setting this flag. |
| optional bool close_scanner = 5; |
| } |
| |
| // RPC's resource metrics. |
| message ResourceMetricsPB { |
| // All metrics MUST be the type of int64. |
| // Number of bytes that were read because of a block cache miss. |
| optional int64 cfile_cache_miss_bytes = 1; |
| // Number of bytes that were read from the block cache because of a hit. |
| optional int64 cfile_cache_hit_bytes = 2; |
| // Number of bytes read from disk (or cache) by the scanner. |
| optional int64 bytes_read = 3; |
| // Total time taken between scan rpc requests being accepted and when they were handled in |
| // nanoseconds for this scanner. |
| optional int64 queue_duration_nanos = 4; |
| // Total time taken for all scan rpc requests to complete in nanoseconds for this scanner. |
| optional int64 total_duration_nanos = 5; |
| // Total elapsed CPU user time in nanoseconds for all scan rpc requests for this scanner. |
| optional int64 cpu_user_nanos = 6; |
| // Total elapsed CPU system time in nanoseconds for all scan rpc requests for this scanner. |
| optional int64 cpu_system_nanos = 7; |
| // Total number of successful INSERT/INSERT_IGNORE operation. |
| optional int64 successful_inserts = 8; |
| // Total number of unsuccessful INSERT_IGNORE operation. |
| optional int64 insert_ignore_errors = 9; |
| // Total number of successful UPSERT operation. |
| optional int64 successful_upserts = 10; |
| // Total number of successful UPDATE/UPDATE_IGNORE operation. |
| optional int64 successful_updates = 11; |
| // Total number of unsuccessful UPDATE_IGNORE operation. |
| optional int64 update_ignore_errors = 12; |
| // Total number of successful DELETE operation. |
| optional int64 successful_deletes = 13; |
| // Total number of unsuccessful DELETE_IGNORE operation. |
| optional int64 delete_ignore_errors = 14; |
| // Total observed commit wait duration in microseconds. |
| optional int64 commit_wait_duration_usec = 15; |
| // Total number of UPSERT_IGNORE operations with error. |
| optional int64 upsert_ignore_errors = 16; |
| } |
| |
| message ScanResponsePB { |
| // The error, if an error occurred with this request. |
| optional TabletServerErrorPB error = 1; |
| |
| // When a scanner is created, returns the scanner ID which may be used |
| // to pull new rows from the scanner. |
| optional bytes scanner_id = 2; |
| |
| // Set to true to indicate that there may be further results to be fetched |
| // from this scanner. If the scanner has no more results, then the scanner |
| // ID will become invalid and cannot continue to be used. |
| // |
| // Note that if a scan returns no results, then the initial response from |
| // the first RPC may return false in this flag, in which case there will |
| // be no scanner ID assigned. |
| optional bool has_more_results = 3; |
| |
| // The block of returned rows. |
| // |
| // NOTE: the schema-related fields will not be present in this row block. |
| // The schema will match the schema requested by the client when it created |
| // the scanner. |
| optional RowwiseRowBlockPB data = 4; |
| // Set instead of 'data' if COLUMNAR_LAYOUT is passed. |
| optional ColumnarRowBlockPB columnar_data = 5; |
| |
| // The snapshot timestamp at which the scan was executed. This is only set |
| // in the first response (i.e. the response to the request that had |
| // 'new_scan_request' set) and only for READ_AT_SNAPSHOT scans. |
| optional fixed64 snap_timestamp = 6; |
| |
| // If this is a fault-tolerant scanner, this is set to the encoded primary |
| // key of the last row returned in the response. |
| optional bytes last_primary_key = 7 [(kudu.REDACT) = true]; |
| |
| // The resource usage of this RPC. |
| optional ResourceMetricsPB resource_metrics = 8; |
| |
| // The server's time upon sending out the scan response. Should always |
| // be greater than the scan timestamp. |
| optional fixed64 propagated_timestamp = 9; |
| } |
| |
| // A scanner keep-alive request. |
| // Updates the scanner access time, increasing its time-to-live. |
| message ScannerKeepAliveRequestPB { |
| required bytes scanner_id = 1; |
| } |
| |
| message ScannerKeepAliveResponsePB { |
| // The error, if an error occurred with this request. |
| optional TabletServerErrorPB error = 1; |
| } |
| |
| // A split key range request. Split tablet to key ranges, the request |
| // doesn't change layout of tablet. |
| message SplitKeyRangeRequestPB { |
| required bytes tablet_id = 1; |
| |
| // Encoded primary key to begin scanning at (inclusive). |
| optional bytes start_primary_key = 2 [(kudu.REDACT) = true]; |
| // Encoded primary key to stop scanning at (exclusive). |
| optional bytes stop_primary_key = 3 [(kudu.REDACT) = true]; |
| |
| // Number of bytes to try to return in each chunk. This is a hint. |
| // The tablet server may return chunks larger or smaller than this value. |
| optional uint64 target_chunk_size_bytes = 4; |
| |
| // The columns to consider when chunking. |
| // If specified, then the size estimate used for 'target_chunk_size_bytes' |
| // should only include these columns. This can be used if a query will |
| // only scan a certain subset of the columns. |
| repeated ColumnSchemaPB columns = 5; |
| |
| // An authorization token with which to authorize this request. |
| optional security.SignedTokenPB authz_token = 6; |
| } |
| |
| message SplitKeyRangeResponsePB { |
| // The error, if an error occurred with this request. |
| optional TabletServerErrorPB error = 1; |
| |
| repeated KeyRangePB ranges = 2; |
| } |
| |
| enum TabletServerFeatures { |
| UNKNOWN_FEATURE = 0; |
| COLUMN_PREDICATES = 1; |
| // Whether the server supports padding UNIXTIME_MICROS slots to 16 bytes. |
| PAD_UNIXTIME_MICROS_TO_16_BYTES = 2; |
| QUIESCING = 3; |
| // Deprecated. See note below for BLOOM_FILTER_PREDICATE_V2. |
| BLOOM_FILTER_PREDICATE = 4; |
| // Whether the server supports the COLUMNAR_LAYOUT format flag. |
| COLUMNAR_LAYOUT_FEATURE = 5; |
| // Update to implementation of Fast hash for Bloom filter predicate leads |
| // to incorrect results if incompatible client and server versions are used. |
| BLOOM_FILTER_PREDICATE_V2 = 6; |
| } |