blob: 0412128b0d1e1f214bee7643a38b29ed4907bb27 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package kudu.tablet;
option java_package = "org.apache.kudu.tablet";
import "kudu/common/common.proto";
import "kudu/consensus/opid.proto";
import "kudu/fs/fs.proto";
// ============================================================================
// Tablet Metadata
// ============================================================================
message ColumnDataPB {
required BlockIdPB block = 2;
// REMOVED: optional ColumnSchemaPB OBSOLETE_schema = 3;
optional int32 column_id = 4;
message DeltaDataPB {
required BlockIdPB block = 2;
message RowSetDataPB {
required uint64 id = 1;
required int64 last_durable_dms_id = 2;
repeated ColumnDataPB columns = 3;
repeated DeltaDataPB redo_deltas = 4;
repeated DeltaDataPB undo_deltas = 5;
optional BlockIdPB bloom_block = 6;
optional BlockIdPB adhoc_index_block = 7;
optional bytes min_encoded_key = 8;
optional bytes max_encoded_key = 9;
// Number of live rows that have been persisted.
optional int64 live_row_count = 10;
// State flags indicating whether the tablet is in the middle of being copied
// and is therefore not possible to bring up, whether it has been deleted, or
// whether the data is in a usable state.
enum TabletDataState {
// The tablet is set to TABLET_DATA_COPYING state when in the middle of
// copying data files from a remote peer. If a tablet server crashes with
// a tablet in this state, the tablet must be deleted and
// the Tablet Copy process must be restarted for that tablet.
// Fresh empty tablets and successfully copied tablets are set to the
// This tablet is in the process of being deleted.
// The tablet server should "roll forward" the deletion during boot,
// rather than trying to load the tablet.
// The tablet has been deleted, and now just consists of a "tombstone".
// Metadata that indicates the state of a transaction.
message TxnMetadataPB {
// Whether the transaction was aborted. If true, 'commit_timestamp' must not
// be set.
optional bool aborted = 1;
// The commit timestamp of the transaction. If set, 'aborted' must not be
// set.
optional int64 commit_timestamp = 2;
// The timestamp used by the MVCC op that tracks the commit of this
// transaction. This is sent to the transaction status manager in response to
// a BEGIN_COMMIT request to be used to assign a commit timestamp that is
// higher than all participants' commit MVCC op timestamps.
// When iterating through mutations at a specific clean snapshot (as in a
// READ_AT_SNAPSHOT or diff scan), both this MVCC op timestamp and the commit
// timestamp must be applied for the mutation to be considered committed in
// that snapshot.
// When iterating through mutations at the latest snapshot (as in READ_LATEST
// or during compactions), this MVCC op timestamp must be applied and there
// must be a commit timestamp for the mutation to be considered committed --
// this avoids reading dirty, uncommitted rows.
optional int64 commit_mvcc_op_timestamp = 3;
// Whether or not this transaction has flushed its MRS after committing. If
// set to true, Kudu should not create an MRS for this transaction when
// bootstrapping.
optional bool flushed_committed_mrs = 4;
// TODO(awong): add an owner field to this for uncommitted transactions.
// The super-block keeps track of the tablet data blocks.
// A tablet contains one or more RowSets, which contain
// a set of blocks (one for each column), a set of delta blocks
// and optionally a block containing the bloom filter
// and a block containing the compound-keys.
message TabletSuperBlockPB {
// Table ID of the table this tablet is part of.
required bytes table_id = 1;
// Tablet Id
required bytes tablet_id = 2;
// The type of table this tablet belongs to. If not set, the assumption is
// this is a user-defined table as opposed to a Kudu-internal system table.
optional TableTypePB table_type = 19;
// The latest durable MemRowSet id
required int64 last_durable_mrs_id = 3;
optional bytes start_key = 4;
optional bytes end_key = 5;
// The partition of the table.
optional PartitionPB partition = 13;
// Tablet RowSets
repeated RowSetDataPB rowsets = 6;
// The latest schema
// TODO: maybe this should be TableSchemaPB? Need to actually put those attributes
// into use throughout the code. Using the simpler one for now.
required string table_name = 7;
required SchemaPB schema = 8;
required uint32 schema_version = 9;
// The partition schema of the table.
optional PartitionSchemaPB partition_schema = 14;
// The current state of the tablet's data.
optional TabletDataState tablet_data_state = 10 [ default = TABLET_DATA_UNKNOWN ];
// Blocks that became orphans after flushing this superblock. In other
// words, the set difference of the blocks belonging to the previous
// superblock and this one.
// It's always safe to delete the blocks found here.
repeated BlockIdPB orphaned_blocks = 11;
// For tablets that have been tombstoned, stores the last OpId stored in the
// WAL before tombstoning.
// Only relevant for TOMBSTONED tablets.
optional consensus.OpId tombstone_last_logged_opid = 12;
// Tablet data is spread across a data directory group. If this is not set
// and the tablet state is TABLET_DATA_READY, it is assumed that the data is
// from a version of Kudu before 1.5.0. In this case, a new group will be
// created spanning all data directories.
optional DataDirGroupPB data_dir_group = 15;
// Whether the tablet supports counting live row. If false, 'live_row_count'
// may be inaccurate and should be ignored. It's only supported for the newly
// created ones, not for the ancient ones.
optional bool supports_live_row_count = 16;
// The table's extra-config.
optional TableExtraConfigPB extra_config = 17;
// The dimension label for tablet. Used by the master to determine load when
// creating new tablet replicas based on dimension.
optional string dimension_label = 18;
// Map from txn ID to metadata associated with the transaction. This is
// updated on each metadata flush to reflect the current in-memory state of
// transactions. In between an in-memory state update and a flush,
// participant ops should be anchored to replay the updates upon restarting.
// TODO(awong): consider storing these separately from the superblock.
map<int64, TxnMetadataPB> txn_metadata = 20;
// Tablet states represent stages of a TabletReplica's object lifecycle and are
// reported to the master via tablet reports.
// Legal state transitions for a single TabletReplica object:
// | | | ^ ^ ^
// | | | | | |
// | | +------------------+ | |
// | +-------------------------------------+ |
// +------------------------------------------------------+
// Since a TabletReplica instance is replaced when a Tablet Copy operation
// occurs, from a remote perspective it is possible for a tablet replica to
// appear to transition from SHUTDOWN back to NOT_INITIALIZED.
enum TabletStatePB {
UNKNOWN = 999;
// Tablet has not yet been initialized.
// Tablet has been initialized but not yet started.
// Indicates the Tablet is bootstrapping, i.e. that the Tablet is not
// available for RPC.
// Once the configuration phase is over Peers are in RUNNING state. In this
// state Peers are available for client RPCs.
// The tablet failed for some reason. TabletReplica::error() will return
// the reason for the failure.
// The Tablet is shutting down, and will not accept further requests.
// The tablet has been stopped, possibly because it has been tombstoned.
// The Tablet has been completely shut down.
// Statistics for a tablet replica.
message ReportedTabletStatsPB {
optional uint64 on_disk_size = 1;
optional uint64 live_row_count = 2;