blob: 045b20e981ffa5bc97dca314371308f146a8b3c4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package kudu.consensus;
option java_package = "org.apache.kudu.consensus";
import "kudu/common/common.proto";
// ===========================================================================
// Consensus Metadata
// ===========================================================================
// Per-replica attributes.
message RaftPeerAttrsPB {
// Whether to promote a replica when it has caught up with the leader,
// changing its membership type from NON_VOTER to VOTER. This field is
// applicable only for NON_VOTER replicas.
optional bool promote = 1 [ default = false ];
// If set to 'true', the replica needs to be replaced regardless of
// its health report.
optional bool replace = 2 [ default = false ];
}
// Report on a replica's (peer's) health.
message HealthReportPB {
// HealthStatus respresents a fully-connected state machine, where
// transitions between any of the states are allowed.
enum HealthStatus {
// No information on the health status.
UNKNOWN = 999;
// Replica has failed and needs replacement. The failure might be a
// transient one, so replica may return to a healthy state soon.
FAILED = 0;
// Replica is functioning properly.
HEALTHY = 1;
// Replica has failed in an irreversible and unrecoverable way and needs
// replacement. The failure is permanent and the replica definitely cannot
// return to a healthy state.
FAILED_UNRECOVERABLE = 2;
}
// Overall health status of a replica. Reflects at least the responsiveness
// (i.e. time of last contact) and the lag of the replica's WAL
// behind the leader's.
optional HealthStatus overall_health = 1;
}
// A peer in a configuration.
message RaftPeerPB {
// The possible roles for peers.
enum Role {
UNKNOWN_ROLE = 999;
// Indicates this node is a follower in the configuration, i.e. that it participates
// in majorities and accepts Consensus::Update() calls.
FOLLOWER = 0;
// Indicates this node is the current leader of the configuration, i.e. that it
// participates in majorities and accepts Consensus::Append() calls.
LEADER = 1;
// Indicates that this node participates in the configuration in a passive role,
// i.e. that it accepts Consensus::Update() calls but does not participate
// in elections or majorities.
LEARNER = 2;
// Indicates that this node is not a participant of the configuration, i.e. does
// not accept Consensus::Update() or Consensus::Update() and cannot
// participate in elections or majorities. This is usually the role of a node
// that leaves the configuration.
NON_PARTICIPANT = 3;
};
enum MemberType {
UNKNOWN_MEMBER_TYPE = 999;
NON_VOTER = 0;
VOTER = 1;
};
// Permanent uuid is optional: RaftPeerPB/RaftConfigPB instances may
// be created before the permanent uuid is known (e.g., when
// manually specifying a configuration for Master/CatalogManager);
// permanent uuid can be retrieved at a later time through RPC.
optional bytes permanent_uuid = 1;
optional MemberType member_type = 2;
optional HostPortPB last_known_addr = 3;
// Replica attributes.
optional RaftPeerAttrsPB attrs = 4;
// Replica's health report, as seen by the leader. This is a run-time
// only field, it should not be persisted or read from the persistent storage.
optional HealthReportPB health_report = 5;
}
// A set of peers, serving a single tablet.
message RaftConfigPB {
// The index of the operation which serialized this RaftConfigPB through
// consensus. It is set when the operation is consensus-committed (replicated
// to a majority of voters) and before the consensus metadata is updated.
// It is left undefined if the operation isn't committed.
optional int64 opid_index = 1;
// Obsolete. This parameter has been retired.
optional bool OBSOLETE_local = 2;
// Flag to allow unsafe config change operations.
optional bool unsafe_config_change = 4 [ default = false ];
// The set of peers in the configuration.
repeated RaftPeerPB peers = 3;
}
// Represents a snapshot of a configuration at a given moment in time.
message ConsensusStatePB {
// A configuration is always guaranteed to have a known term.
required int64 current_term = 1;
// There may not always be a leader of a configuration at any given time.
//
// The node that the local peer considers to be leader changes based on rules
// defined in the Raft specification. Roughly, this corresponds either to
// being elected leader (in the case that the local peer is the leader), or
// when an update is accepted from another node, which basically just amounts
// to a term check on the UpdateConsensus() RPC request.
//
// Whenever the local peer sees a new term, the leader flag is cleared until
// a new leader is acknowledged based on the above criteria. Simply casting a
// vote for a peer is not sufficient to assume that the peer has won the
// election, so we do not update this field based on our vote.
//
// The leader may be a part of the committed or the pending configuration (or both).
optional string leader_uuid = 2;
// The committed peers. Initial peership is set on tablet start, so this
// field should always be present.
required RaftConfigPB committed_config = 3;
// The peers in the pending configuration, if there is one.
optional RaftConfigPB pending_config = 4;
}
// This PB is used to serialize all of the persistent state needed for
// Consensus that is not in the WAL, such as leader election and
// communication on startup.
message ConsensusMetadataPB {
// Last-committed peership.
required RaftConfigPB committed_config = 1;
// Latest term this server has seen.
// When a configuration is first created, initialized to 0.
//
// Whenever a new election is started, the candidate increments this by one
// and requests votes from peers.
//
// If any RPC request or response is received from another node containing a term higher
// than this one, the server should step down to FOLLOWER and set its current_term to
// match the caller's term.
//
// If a follower receives an UpdateConsensus RPC with a term lower than this
// term, then that implies that the RPC is coming from a former LEADER who has
// not realized yet that its term is over. In that case, we will reject the
// UpdateConsensus() call with ConsensusErrorPB::INVALID_TERM.
//
// If a follower receives a RequestConsensusVote() RPC with an earlier term,
// the vote is denied.
required int64 current_term = 2;
// Permanent UUID of the candidate voted for in 'current_term', or not present
// if no vote was made in the current term.
optional string voted_for = 3;
}