blob: f9cf84c961c971e92df84f1eccc21f90fbd96bb3 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package kudu.consensus;
option java_package = "org.kududb.consensus";
import "kudu/common/common.proto";
// ===========================================================================
// Consensus Metadata
// ===========================================================================
// A peer in a configuration.
message RaftPeerPB {
// The possible roles for peers.
enum Role {
UNKNOWN_ROLE = 999;
// Indicates this node is a follower in the configuration, i.e. that it participates
// in majorities and accepts Consensus::Update() calls.
FOLLOWER = 0;
// Indicates this node is the current leader of the configuration, i.e. that it
// participates in majorities and accepts Consensus::Append() calls.
LEADER = 1;
// Indicates that this node participates in the configuration in a passive role,
// i.e. that it accepts Consensus::Update() calls but does not participate
// in elections or majorities.
LEARNER = 2;
// Indicates that this node is not a participant of the configuration, i.e. does
// not accept Consensus::Update() or Consensus::Update() and cannot
// participate in elections or majorities. This is usually the role of a node
// that leaves the configuration.
NON_PARTICIPANT = 3;
};
enum MemberType {
UNKNOWN_MEMBER_TYPE = 999;
NON_VOTER = 0;
VOTER = 1;
};
// Permanent uuid is optional: RaftPeerPB/RaftConfigPB instances may
// be created before the permanent uuid is known (e.g., when
// manually specifying a configuration for Master/CatalogManager);
// permament uuid can be retrieved at a later time through RPC.
optional bytes permanent_uuid = 1;
optional MemberType member_type = 2;
optional HostPortPB last_known_addr = 3;
}
enum ConsensusConfigType {
CONSENSUS_CONFIG_UNKNOWN = 999;
// Committed consensus config. This includes the consensus configuration that
// has been serialized through consensus and committed, thus having a valid
// opid_index field set.
CONSENSUS_CONFIG_COMMITTED = 1;
// Active consensus config. This could be a pending consensus config that
// has not yet been committed. If the config is not committed, its opid_index
// field will not be set.
CONSENSUS_CONFIG_ACTIVE = 2;
}
// A set of peers, serving a single tablet.
message RaftConfigPB {
// The index of the operation which serialized this RaftConfigPB through
// consensus. It is set when the operation is consensus-committed (replicated
// to a majority of voters) and before the consensus metadata is updated.
// It is left undefined if the operation isn't committed.
optional int64 opid_index = 1;
// Whether this is a local or distributed configuration (i.e. whether to use a local or dist
// implementation of consensus).
optional bool local = 2 [default = true];
// The set of peers in the configuration.
repeated RaftPeerPB peers = 3;
}
// Represents a snapshot of a configuration at a given moment in time.
message ConsensusStatePB {
// A configuration is always guaranteed to have a known term.
required int64 current_term = 1;
// There may not always be a leader of a configuration at any given time.
//
// The node that the local peer considers to be leader changes based on rules
// defined in the Raft specification. Roughly, this corresponds either to
// being elected leader (in the case that the local peer is the leader), or
// when an update is accepted from another node, which basically just amounts
// to a term check on the UpdateConsensus() RPC request.
//
// Whenever the local peer sees a new term, the leader flag is cleared until
// a new leader is acknowledged based on the above critera. Simply casting a
// vote for a peer is not sufficient to assume that that peer has won the
// election, so we do not update this field based on our vote.
//
// The leader listed here, if any, should always be a member of 'configuration', and
// the term that the node is leader of _must_ equal the term listed above in
// the 'current_term' field. The Master will use the combination of current
// term and leader uuid to determine when to update its cache of the current
// leader for client lookup purposes.
//
// There is a corner case in Raft where a node may be elected leader of a
// pending (uncommitted) configuration. In such a case, if the leader of the pending
// configuration is not a member of the committed configuration, and it is the committed
// configuration that is being reported, then the leader_uuid field should be
// cleared by the process filling in the ConsensusStatePB object.
optional string leader_uuid = 2;
// The peers. In some contexts, this will be the committed configuration,
// which will always have configuration.opid_index set. In other contexts, this may
// a "pending" configuration, which is active but in the process of being committed.
// In any case, initial peership is set on tablet start, so this
// field should always be present.
required RaftConfigPB config = 3;
}
// This PB is used to serialize all of the persistent state needed for
// Consensus that is not in the WAL, such as leader election and
// communication on startup.
message ConsensusMetadataPB {
// Last-committed peership.
required RaftConfigPB committed_config = 1;
// Latest term this server has seen.
// When a configuration is first created, initialized to 0.
//
// Whenever a new election is started, the candidate increments this by one
// and requests votes from peers.
//
// If any RPC or RPC response is received from another node containing a term higher
// than this one, the server should step down to FOLLOWER and set its current_term to
// match the caller's term.
//
// If a follower receives an UpdateConsensus RPC with a term lower than this
// term, then that implies that the RPC is coming from a former LEADER who has
// not realized yet that its term is over. In that case, we will reject the
// UpdateConsensus() call with ConsensusErrorPB::INVALID_TERM.
//
// If a follower receives a RequestConsensusVote() RPC with an earlier term,
// the vote is denied.
required int64 current_term = 2;
// Permanent UUID of the candidate voted for in 'current_term', or not present
// if no vote was made in the current term.
optional string voted_for = 3;
}