// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package kudu.tablet;

option java_package = "org.apache.kudu.tablet";

import "kudu/common/common.proto";
import "kudu/consensus/opid.proto";
import "kudu/fs/fs.proto";

// ============================================================================
//  Tablet Metadata
// ============================================================================

message ColumnDataPB {
  required BlockIdPB block = 2;
  // REMOVED: optional ColumnSchemaPB OBSOLETE_schema = 3;
  optional int32 column_id = 4;
}

message DeltaDataPB {
  required BlockIdPB block = 2;
}

message RowSetDataPB {
  required uint64 id = 1;
  required int64 last_durable_dms_id = 2;
  repeated ColumnDataPB columns = 3;
  repeated DeltaDataPB redo_deltas = 4;
  repeated DeltaDataPB undo_deltas = 5;
  optional BlockIdPB bloom_block = 6;
  optional BlockIdPB adhoc_index_block = 7;
  optional bytes min_encoded_key = 8;
  optional bytes max_encoded_key = 9;

  // Number of live rows that have been persisted.
  optional int64 live_row_count = 10;
}

// State flags indicating whether the tablet is in the middle of being copied
// and is therefore not possible to bring up, whether it has been deleted, or
// whether the data is in a usable state.
enum TabletDataState {
  TABLET_DATA_UNKNOWN = 999;

  // The tablet is set to TABLET_DATA_COPYING state when in the middle of
  // copying data files from a remote peer. If a tablet server crashes with
  // a tablet in this state, the tablet must be deleted and
  // the Tablet Copy process must be restarted for that tablet.
  TABLET_DATA_COPYING = 0;

  // Fresh empty tablets and successfully copied tablets are set to the
  // TABLET_DATA_READY state.
  TABLET_DATA_READY = 1;

  // This tablet is in the process of being deleted.
  // The tablet server should "roll forward" the deletion during boot,
  // rather than trying to load the tablet.
  TABLET_DATA_DELETED = 2;

  // The tablet has been deleted, and now just consists of a "tombstone".
  TABLET_DATA_TOMBSTONED = 3;
}

// Metadata that indicates the state of a transaction.
message TxnMetadataPB {
  // Whether the transaction was aborted. If true, 'commit_timestamp' must not
  // be set.
  optional bool aborted = 1;

  // The commit timestamp of the transaction. If set, 'aborted' must not be
  // set.
  optional int64 commit_timestamp = 2;

  // The timestamp used by the MVCC op that tracks the commit of this
  // transaction. This is sent to the transaction status manager in response to
  // a BEGIN_COMMIT request to be used to assign a commit timestamp that is
  // higher than all participants' commit MVCC op timestamps.
  //
  // When iterating through mutations at a specific clean snapshot (as in a
  // READ_AT_SNAPSHOT or diff scan), both this MVCC op timestamp and the commit
  // timestamp must be applied for the mutation to be considered committed in
  // that snapshot.
  //
  // When iterating through mutations at the latest snapshot (as in READ_LATEST
  // or during compactions), this MVCC op timestamp must be applied and there
  // must be a commit timestamp for the mutation to be considered committed --
  // this avoids reading dirty, uncommitted rows.
  optional int64 commit_mvcc_op_timestamp = 3;

  // Whether or not this transaction has flushed its MRS after committing. If
  // set to true, Kudu should not create an MRS for this transaction when
  // bootstrapping.
  optional bool flushed_committed_mrs = 4;

  // TODO(awong): add an owner field to this for uncommitted transactions.
}

// The super-block keeps track of the tablet data blocks.
// A tablet contains one or more RowSets, which contain
// a set of blocks (one for each column), a set of delta blocks
// and optionally a block containing the bloom filter
// and a block containing the compound-keys.
message TabletSuperBlockPB {
  // Table ID of the table this tablet is part of.
  required bytes table_id = 1;

  // Tablet Id
  required bytes tablet_id = 2;

  // The type of table this tablet belongs to. If not set, the assumption is
  // this is a user-defined table as opposed to a Kudu-internal system table.
  optional TableTypePB table_type = 19;

  // The latest durable MemRowSet id
  required int64 last_durable_mrs_id = 3;

  // DEPRECATED.
  optional bytes start_key = 4;

  // DEPRECATED.
  optional bytes end_key = 5;

  // The partition of the table.
  optional PartitionPB partition = 13;

  // Tablet RowSets
  repeated RowSetDataPB rowsets = 6;

  // The latest schema
  // TODO: maybe this should be TableSchemaPB? Need to actually put those attributes
  // into use throughout the code. Using the simpler one for now.
  required string table_name = 7;
  required SchemaPB schema = 8;
  required uint32 schema_version = 9;

  // The partition schema of the table.
  optional PartitionSchemaPB partition_schema = 14;

  // The current state of the tablet's data.
  optional TabletDataState tablet_data_state = 10 [ default = TABLET_DATA_UNKNOWN ];

  // Blocks that became orphans after flushing this superblock. In other
  // words, the set difference of the blocks belonging to the previous
  // superblock and this one.
  //
  // It's always safe to delete the blocks found here.
  repeated BlockIdPB orphaned_blocks = 11;

  // For tablets that have been tombstoned, stores the last OpId stored in the
  // WAL before tombstoning.
  // Only relevant for TOMBSTONED tablets.
  optional consensus.OpId tombstone_last_logged_opid = 12;

  // Tablet data is spread across a data directory group. If this is not set
  // and the tablet state is TABLET_DATA_READY, it is assumed that the data is
  // from a version of Kudu before 1.5.0. In this case, a new group will be
  // created spanning all data directories.
  optional DataDirGroupPB data_dir_group = 15;

  // Whether the tablet supports counting live row. If false, 'live_row_count'
  // may be inaccurate and should be ignored. It's only supported for the newly
  // created ones, not for the ancient ones.
  optional bool supports_live_row_count = 16;

  // The table's extra-config.
  optional TableExtraConfigPB extra_config = 17;

  // The dimension label for tablet. Used by the master to determine load when
  // creating new tablet replicas based on dimension.
  optional string dimension_label = 18;

  // Map from txn ID to metadata associated with the transaction. This is
  // updated on each metadata flush to reflect the current in-memory state of
  // transactions. In between an in-memory state update and a flush,
  // participant ops should be anchored to replay the updates upon restarting.
  // TODO(awong): consider storing these separately from the superblock.
  map<int64, TxnMetadataPB> txn_metadata = 20;
}

// Tablet states represent stages of a TabletReplica's object lifecycle and are
// reported to the master via tablet reports.
//
// Legal state transitions for a single TabletReplica object:
//
// NOT_INITIALIZED -> INITIALIZED -> BOOTSTRAPPING -> RUNNING -> STOPPING -> STOPPED -> SHUTDOWN
//             |              |                |                  ^ ^ ^
//             |              |                |                  | | |
//             |              |                +------------------+ | |
//             |              +-------------------------------------+ |
//             +------------------------------------------------------+
//
// Since a TabletReplica instance is replaced when a Tablet Copy operation
// occurs, from a remote perspective it is possible for a tablet replica to
// appear to transition from SHUTDOWN back to NOT_INITIALIZED.
enum TabletStatePB {
  UNKNOWN = 999;

  // Tablet has not yet been initialized.
  NOT_INITIALIZED = 6;

  // Tablet has been initialized but not yet started.
  INITIALIZED = 5;

  // Indicates the Tablet is bootstrapping, i.e. that the Tablet is not
  // available for RPC.
  BOOTSTRAPPING = 0;

  // Once the configuration phase is over Peers are in RUNNING state. In this
  // state Peers are available for client RPCs.
  RUNNING = 1;

  // The tablet failed for some reason. TabletReplica::error() will return
  // the reason for the failure.
  FAILED = 2;

  // The Tablet is shutting down, and will not accept further requests.
  STOPPING = 3;

  // The tablet has been stopped, possibly because it has been tombstoned.
  STOPPED = 7;

  // The Tablet has been completely shut down.
  SHUTDOWN = 4;
}

// Statistics for a tablet replica.
message ReportedTabletStatsPB {
  optional uint64 on_disk_size = 1;
  optional uint64 live_row_count = 2;
}
