blob: cae4daed74359eb43d88b8ee589bbffabdc8483e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package kudu.master;
option java_package = "org.apache.kudu.master";
import "kudu/common/common.proto";
import "kudu/common/wire_protocol.proto";
import "kudu/consensus/metadata.proto";
import "kudu/tablet/metadata.proto";
////////////////////////////////////////////////////////////
// Common data structures
////////////////////////////////////////////////////////////
// Master specific errors use this protobuf.
message MasterErrorPB {
enum Code {
// An error which has no more specific error code.
// The code and message in 'status' may reveal more details.
//
// RPCs should avoid returning this, since callers will not be
// able to easily parse the error.
UNKNOWN_ERROR = 1;
// The schema provided for a request was not well-formed.
INVALID_SCHEMA = 2;
// The requested table does not exist
TABLE_NOT_FOUND = 3;
// The name requested for the table is already in use
TABLE_ALREADY_PRESENT = 4;
// The number of tablets requested for a new table is over the per TS limit.
TOO_MANY_TABLETS = 5;
// Catalog manager is not yet initialized.
CATALOG_MANAGER_NOT_INITIALIZED = 6;
// The operation attempted can only be invoked against either the
// leader or a single non-distributed master, which this node
// isn't.
NOT_THE_LEADER = 7;
// The number of replicas requested is greater than the number of live servers
// in the cluster.
REPLICATION_FACTOR_TOO_HIGH = 8;
// The request or response involved a tablet which is not yet running.
TABLET_NOT_RUNNING = 9;
}
// The error code.
required Code code = 1;
// The Status object for the error. This will include a textual
// message that may be more useful to present in log messages, etc,
// though its error code is less specific.
required AppStatusPB status = 2;
}
// Common information sent with every request from the tablet server
// to the master.
message TSToMasterCommonPB {
// The instance of the tablet server sending the heartbeat.
required NodeInstancePB ts_instance = 1;
}
message TableIdentifierPB {
// The table ID to fetch info.
optional bytes table_id = 1;
// The table name to fetch info.
optional string table_name = 2;
}
////////////////////////////////////////////////////////////
// Sys Tables Metadata
////////////////////////////////////////////////////////////
// The on-disk entry in the sys.catalog table ("metadata" column) for
// tablets entries.
message SysTabletsEntryPB {
enum State {
UNKNOWN = 999;
PREPARING = 0;
CREATING = 1;
RUNNING = 2;
REPLACED = 3;
DELETED = 4;
}
// DEPRECATED. Replaced by 'partition'.
optional bytes DEPRECATED_start_key = 1;
optional bytes DEPRECATED_end_key = 2;
// Tablet partition.
optional PartitionPB partition = 7;
// The latest committed consensus configuration reported to the Master.
optional consensus.ConsensusStatePB committed_consensus_state = 3;
// Debug state for the tablet.
optional State state = 4 [ default = UNKNOWN ];
optional bytes state_msg = 5;
// The table id for the tablet.
required bytes table_id = 6;
}
// The on-disk entry in the sys.catalog table ("metadata" column) for
// tables entries.
message SysTablesEntryPB {
enum State {
UNKNOWN = 0;
PREPARING = 1;
RUNNING = 2;
ALTERING = 3;
REMOVED = 4;
}
// Table name
required bytes name = 1;
// sequence-id for the table metadata.
// Used on tablet-report to avoid sending "alter-table" notifications.
required uint32 version = 2;
// Newest table schema (every TS will eventually have it).
required SchemaPB schema = 3;
// Last table schema that is guaranteed to have reached every TS, though
// not necessarily the newest schema.
//
// This is the schema provided to the user on client->GetSchema(tableName).
optional SchemaPB fully_applied_schema = 4;
// The table's partitioning schema.
optional PartitionSchemaPB partition_schema = 9;
// The next column ID to assign to newly added columns in this table.
// This prevents column ID reuse.
optional int32 next_column_id = 8;
// Number of TS replicas
required int32 num_replicas = 5;
// Debug state for the table.
optional State state = 6 [ default = UNKNOWN ];
optional bytes state_msg = 7;
}
////////////////////////////////////////////////////////////
// RPCs
////////////////////////////////////////////////////////////
message PingRequestPB {
}
message PingResponsePB {
}
// Sent by the TS when it first heartbeats with a master. This sends the
// master all of the necessary information about the current instance
// of the TS.
message TSRegistrationPB {
repeated HostPortPB rpc_addresses = 1;
repeated HostPortPB http_addresses = 2;
optional string software_version = 3;
}
message ReportedTabletPB {
required bytes tablet_id = 1;
optional tablet.TabletStatePB state = 2 [ default = UNKNOWN ];
optional tablet.TabletDataState tablet_data_state = 6 [ default = TABLET_DATA_UNKNOWN ];
// The latest _committed_ consensus state.
// This will be missing if the tablet is not in a RUNNING state
// (i.e. if it is BOOTSTRAPPING).
optional consensus.ConsensusStatePB committed_consensus_state = 3;
optional AppStatusPB error = 4;
optional uint32 schema_version = 5;
}
// Sent by the tablet server to report the set of tablets hosted by that TS.
message TabletReportPB {
// If false, then this is a full report, and any prior information about
// tablets hosted by this server should be dropped.
required bool is_incremental = 1;
// Tablets for which to update information. If 'is_incremental' is false,
// then this is the full set of tablets on the server, and any tablets
// which the master is aware of but not listed in this protobuf should
// be assumed to have been removed from this server.
repeated ReportedTabletPB updated_tablets = 2;
// Tablet IDs which the tablet server has removed and should no longer be
// considered hosted here. This will always be empty in a non-incremental
// report.
repeated bytes removed_tablet_ids = 3;
// Every time the TS generates a tablet report, it creates a sequence
// number. This can be useful in debugging, and also determining which
// changes have not yet been reported to the master.
// The first tablet report (non-incremental) is sequence number 0.
required int32 sequence_number = 4;
}
message ReportedTabletUpdatesPB {
required bytes tablet_id = 1;
optional string state_msg = 2;
}
// Sent by the Master in response to the TS tablet report (part of the heartbeats)
message TabletReportUpdatesPB {
repeated ReportedTabletUpdatesPB tablets = 1;
}
// Heartbeat sent from the tablet-server to the master
// to establish liveness and report back any status changes.
message TSHeartbeatRequestPB {
required TSToMasterCommonPB common = 1;
// Sent upon start-up of the TS, or in response to 'needs_reregister' on a heartbeat
// response.
optional TSRegistrationPB registration = 2;
// Sent when the tablet information has changed, or in response to
// 'needs_full_tablet_report'.
optional TabletReportPB tablet_report = 3;
// TODO; add a heartbeat sequence number?
// TODO: perhaps add some very basic metrics reporting here, like
// free space, reqs/sec, etc?
// The number of tablets that are BOOTSTRAPPING or RUNNING.
// Used by the master to determine load when creating new tablet replicas.
optional int32 num_live_tablets = 4;
}
message TSHeartbeatResponsePB {
optional MasterErrorPB error = 1;
// As with most other master RPC responses (e.g.,
// ListTablesResponsePB), all fields below are optional as they may
// not be set if there is an error.
optional NodeInstancePB master_instance = 2;
// Indicates that the server which heartbeated needs to re-register
// with the master -- i.e send a heartbeat with the 'registration'
// filled in.
optional bool needs_reregister = 3 [ default = false ];
optional bool needs_full_tablet_report = 4 [ default = false ];
// Sent when the master receives a TabletReport
optional TabletReportUpdatesPB tablet_report = 5;
// Specify whether or not the node is the leader master.
optional bool leader_master = 6;
}
//////////////////////////////
// GetTabletLocations
//////////////////////////////
message TabletLocationsPB {
message ReplicaPB {
required TSInfoPB ts_info = 1;
required consensus.RaftPeerPB.Role role = 2;
}
required bytes tablet_id = 1;
// DEPRECATED.
optional bytes start_key = 2;
optional bytes end_key = 3;
optional PartitionPB partition = 6;
repeated ReplicaPB replicas = 4;
// DEPRECATED. Still set by servers, but should be ignored by clients.
optional bool DEPRECATED_stale = 5;
}
// Info about a single tablet server, returned to the client as part
// of the GetTabletLocations response. This can be used on the client
// to update the local cache of where each TS UUID is located. In
// the future we may also want to transmit software version info,
// load info, topology, etc.
message TSInfoPB {
required bytes permanent_uuid = 1;
repeated HostPortPB rpc_addresses = 2;
}
message GetTabletLocationsRequestPB {
// The tablet IDs about which to fetch info.
repeated bytes tablet_ids = 1;
}
message GetTabletLocationsResponsePB {
optional MasterErrorPB error = 1;
repeated TabletLocationsPB tablet_locations = 2;
message Error {
required bytes tablet_id = 1;
required AppStatusPB status = 2;
}
repeated Error errors = 3;
}
// ============================================================================
// Catalog
// ============================================================================
message CreateTableRequestPB {
required string name = 1;
required SchemaPB schema = 2;
// repeated bytes pre_split_keys = 3;
// repeated PartialRowPB split_rows = 5;
// Holds either the split rows or the range bounds (or both) of the table.
optional RowOperationsPB split_rows_range_bounds = 6;
optional PartitionSchemaPB partition_schema = 7;
optional int32 num_replicas = 4;
}
message CreateTableResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
optional bytes table_id = 2;
}
message IsCreateTableDoneRequestPB {
required TableIdentifierPB table = 1;
}
message IsCreateTableDoneResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
// true if the create operation is completed, false otherwise
optional bool done = 3;
}
message DeleteTableRequestPB {
required TableIdentifierPB table = 1;
}
message DeleteTableResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
}
message ListTablesRequestPB {
// When used, only returns tables that satisfy a substring match on name_filter.
optional string name_filter = 1;
}
message ListTablesResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
message TableInfo {
required bytes id = 1;
required string name = 2;
}
repeated TableInfo tables = 2;
}
message GetTableLocationsRequestPB {
required TableIdentifierPB table = 1;
// Partition-key range.
optional bytes partition_key_start = 3;
optional bytes partition_key_end = 4;
optional uint32 max_returned_locations = 5 [ default = 10 ];
}
// The response to a GetTableLocations RPC. The master guarantees that:
//
// * The response contains a location for all tablets in the requested range,
// limited by the request's 'max_returned_locations'.
// * The tablet locations are returned in sorted order by the partition key range.
// * If *any* tablet in the response is not running, then the entire response
// will fail with MasterErrorPB::TABLET_NOT_RUNNING, and the tablet_locations
// field will be empty.
// * A gap between the partition key ranges of consecutive tablets indicates a
// non-covered partition range.
// * If the request's start partition key falls in a non-covered partition
// range, the response will contain the tablet immediately before the
// non-covered range, if it exists.
message GetTableLocationsResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
repeated TabletLocationsPB tablet_locations = 2;
// If the client caches table locations, the entries should not live longer
// than this timeout. Defaults to one hour.
optional uint32 ttl_millis = 3 [default = 36000000];
}
message AlterTableRequestPB {
enum StepType {
UNKNOWN = 0;
ADD_COLUMN = 1;
DROP_COLUMN = 2;
RENAME_COLUMN = 3;
// TODO(KUDU-861): this will subsume RENAME_COLUMN, but not yet implemented
// on the master side.
ALTER_COLUMN = 4;
ADD_RANGE_PARTITION = 5;
DROP_RANGE_PARTITION = 6;
}
message AddColumn {
// The schema to add.
// NOTE: the 'id' field of the schema should not be provided here --
// the server will assign an ID.
required ColumnSchemaPB schema = 1;
}
message DropColumn {
// Name of the column to drop.
required string name = 1;
}
message RenameColumn {
// Name of the column to rename;
required string old_name = 1;
required string new_name = 2;
}
message AddRangePartition {
// A set of row operations containing the lower and upper range bound for
// the range partition to add or drop.
optional RowOperationsPB range_bounds = 1;
}
message DropRangePartition {
// A set of row operations containing the lower and upper range bound for
// the range partition to add or drop.
optional RowOperationsPB range_bounds = 1;
}
message Step {
optional StepType type = 1 [ default = UNKNOWN ];
// Exactly one of the following must be set, based on 'type'
optional AddColumn add_column = 2;
optional DropColumn drop_column = 3;
optional RenameColumn rename_column = 4;
optional AddRangePartition add_range_partition = 5;
optional DropRangePartition drop_range_partition = 6;
}
required TableIdentifierPB table = 1;
repeated Step alter_schema_steps = 2;
optional string new_table_name = 3;
// The table schema to use when decoding the range bound row operations. Only
// necessary when partitions are being added or dropped.
optional SchemaPB schema = 4;
}
message AlterTableResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
optional uint32 schema_version = 2;
}
message IsAlterTableDoneRequestPB {
required TableIdentifierPB table = 1;
}
message IsAlterTableDoneResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
// this is the current schema, or the 'new' schema version if an alter is in progress
optional uint32 schema_version = 2;
// true if the alter operation is completed, false otherwise
optional bool done = 3;
}
message GetTableSchemaRequestPB {
required TableIdentifierPB table = 1;
}
message GetTableSchemaResponsePB {
// The error, if an error occurred with this request.
optional MasterErrorPB error = 1;
// This is the schema that every TS should be able to understand
// if your alter is keeping the schema compatible.
// In case of an alter table in progress, this is the previous schema;
// otherwise it is the latest schema.
optional SchemaPB schema = 2;
// The table's partition schema.
optional PartitionSchemaPB partition_schema = 5;
optional int32 num_replicas = 3;
// The ID of the table.
optional bytes table_id = 4;
// True if the create operation is completed, false otherwise.
optional bool create_table_done = 6;
// The table name.
optional string table_name = 7;
}
// ============================================================================
// Administration/monitoring
// ============================================================================
message ListTabletServersRequestPB {
}
message ListTabletServersResponsePB {
optional MasterErrorPB error = 1;
message Entry {
required NodeInstancePB instance_id = 1;
optional TSRegistrationPB registration = 2;
optional int32 millis_since_heartbeat = 3;
}
repeated Entry servers = 2;
}
// GetMasterRegistrationRequest/Response: get the instance id and
// HTTP/RPC addresses for this Master server.
message GetMasterRegistrationRequestPB {
}
// TODO: Just use ServerRegistration here.
message GetMasterRegistrationResponsePB {
// Node instance information is always set.
required NodeInstancePB instance_id = 1;
// These fields are optional, as they won't be set if there's an
// error retrieving the host/port information.
optional ServerRegistrationPB registration = 2;
// This server's role in the consensus configuration.
optional consensus.RaftPeerPB.Role role = 3;
// Set if there an error retrieving the registration information.
optional MasterErrorPB error = 4;
}
// ListMastersRequest/Response: get information about all of the known
// master servers, including this node.
message ListMastersRequestPB {
}
message ListMastersResponsePB {
// An entry for each individual master server.
repeated ServerEntryPB masters = 1;
// Set only if there's an error in retrieving the list of servers or
// in getting this server's own local registration information.
optional AppStatusPB error = 2;
}
enum MasterFeatures {
UNKNOWN_FEATURE = 0;
// The master supports creating tables with non-covering range partitions.
RANGE_PARTITION_BOUNDS = 1;
// The master supports adding and dropping range partitions.
ADD_DROP_RANGE_PARTITIONS = 2;
}
service MasterService {
rpc Ping(PingRequestPB) returns (PingResponsePB);
// TS->Master RPCs
rpc TSHeartbeat(TSHeartbeatRequestPB) returns (TSHeartbeatResponsePB);
// Client->Master RPCs
rpc GetTabletLocations(GetTabletLocationsRequestPB) returns (GetTabletLocationsResponsePB);
rpc CreateTable(CreateTableRequestPB) returns (CreateTableResponsePB);
rpc IsCreateTableDone(IsCreateTableDoneRequestPB) returns (IsCreateTableDoneResponsePB);
rpc DeleteTable(DeleteTableRequestPB) returns (DeleteTableResponsePB);
rpc AlterTable(AlterTableRequestPB) returns (AlterTableResponsePB);
rpc IsAlterTableDone(IsAlterTableDoneRequestPB) returns (IsAlterTableDoneResponsePB);
rpc ListTables(ListTablesRequestPB) returns (ListTablesResponsePB);
rpc GetTableLocations(GetTableLocationsRequestPB) returns (GetTableLocationsResponsePB);
rpc GetTableSchema(GetTableSchemaRequestPB) returns (GetTableSchemaResponsePB);
// Administrative/monitoring RPCs
rpc ListTabletServers(ListTabletServersRequestPB) returns (ListTabletServersResponsePB);
rpc ListMasters(ListMastersRequestPB) returns (ListMastersResponsePB);
rpc GetMasterRegistration(GetMasterRegistrationRequestPB) returns
(GetMasterRegistrationResponsePB);
}