#include <cstdint>
#include <ostream>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "kudu/common/wire_protocol.pb.h"
#include "kudu/consensus/consensus.pb.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/consensus/opid.pb.h"
#include "kudu/consensus/opid_util.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/integration-tests/cluster_itest_util.h"
#include "kudu/integration-tests/cluster_verifier.h"
#include "kudu/integration-tests/mini_cluster_fs_inspector.h"
#include "kudu/integration-tests/raft_consensus-itest-base.h"
#include "kudu/integration-tests/test_workload.h"
#include "kudu/mini-cluster/external_mini_cluster.h"
#include "kudu/tablet/metadata.pb.h"
#include "kudu/tserver/tablet_server-test-base.h"
#include "kudu/tserver/tserver.pb.h"
#include "kudu/util/metrics.h"
#include "kudu/util/monotime.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/status.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
using kudu::cluster::ExternalTabletServer;
using kudu::consensus::COMMITTED_OPID;
using kudu::consensus::ConsensusStatePB;
using kudu::consensus::MakeOpId;
using kudu::consensus::OpId;
using kudu::consensus::RaftPeerPB;
using kudu::itest::AddServer;
using kudu::itest::GetConsensusState;
using kudu::itest::GetLastOpIdForReplica;
using kudu::itest::GetReplicaStatusAndCheckIfLeader;
using kudu::itest::LeaderStepDown;
using kudu::itest::RemoveServer;
using kudu::itest::RequestVote;
using kudu::itest::StartElection;
using kudu::itest::TServerDetails;
using kudu::itest::TabletServerMap;
using kudu::itest::WaitUntilLeader;
using kudu::itest::WriteSimpleTestRow;
using kudu::pb_util::SecureShortDebugString;
using kudu::tablet::TABLET_DATA_COPYING;
using kudu::tablet::TABLET_DATA_TOMBSTONED;
using std::string;
using std::unordered_set;
using std::vector;
using strings::Substitute;
namespace kudu {
namespace tserver {
static const int kTestRowKey = 1234;
static const int kTestRowIntVal = 5678;
class RaftConsensusElectionITest : public RaftConsensusITestBase {
void CreateClusterForChurnyElectionsTests(const vector<string>& extra_ts_flags);
void DoTestChurnyElections(TestWorkload* workload, int max_rows_to_insert);
void RaftConsensusElectionITest::CreateClusterForChurnyElectionsTests(
const vector<string>& extra_ts_flags) {
vector<string> ts_flags;
// On TSAN builds, we need to be a little bit less churny in order to make
// any progress at all.
ts_flags.insert(ts_flags.end(), extra_ts_flags.cbegin(), extra_ts_flags.cend());
NO_FATALS(CreateCluster("raft_consensus-itest-cluster", std::move(ts_flags)));
void RaftConsensusElectionITest::DoTestChurnyElections(TestWorkload* workload,
int max_rows_to_insert) {
// Set a really high write timeout so that even in the presence of many failures we
// can verify an exact number of rows in the end, thanks to exactly once semantics.
workload->set_write_timeout_millis(60 * 1000 /* 60 seconds */);
// Run for either a prescribed number of writes, or 30 seconds,
// whichever comes first. This prevents test timeouts on slower
// build machines, TSAN builds, etc.
Stopwatch sw;
while (workload->rows_inserted() < max_rows_to_insert &&
sw.elapsed().wall_seconds() < 30) {
ASSERT_GT(workload->rows_inserted(), 0) << "No rows inserted";
// Ensure that the replicas converge.
// We expect an exact result due to exactly once semantics and snapshot scans.
ClusterVerifier v(cluster_.get());
TEST_F(RaftConsensusElectionITest, RunLeaderElection) {
// Reset consensus RPC timeout to the default value, otherwise the election
// might fail often, making the test flaky.
FLAGS_consensus_rpc_timeout_ms = 1000;
int num_iters = AllowSlowTests() ? 10 : 1;
FLAGS_client_inserts_per_thread * num_iters,
NO_FATALS(AssertAllReplicasAgree(FLAGS_client_inserts_per_thread * num_iters));
// Select the last follower to be new leader.
vector<TServerDetails*> followers;
GetOnlyLiveFollowerReplicas(tablet_id_, &followers);
// Now shutdown the current leader.
TServerDetails* leader = DCHECK_NOTNULL(GetLeaderReplicaOrNull(tablet_id_));
ExternalTabletServer* leader_ets = cluster_->tablet_server_by_uuid(leader->uuid());
TServerDetails* replica = followers.back();
CHECK_NE(leader->instance_id.permanent_uuid(), replica->instance_id.permanent_uuid());
// Make the new replica leader.
ASSERT_OK(StartElection(replica, tablet_id_, MonoDelta::FromSeconds(10)));
// Insert a bunch more rows.
InsertTestRowsRemoteThread(FLAGS_client_inserts_per_thread * num_iters,
FLAGS_client_inserts_per_thread * num_iters,
// Restart the original replica and make sure they all agree.
NO_FATALS(AssertAllReplicasAgree(FLAGS_client_inserts_per_thread * num_iters * 2));
// This test sets all of the election timers to be very short, resulting
// in a lot of churn. We expect to make some progress and not diverge or
// crash, despite the frequent re-elections and races.
TEST_F(RaftConsensusElectionITest, ChurnyElections) {
const int kNumWrites = AllowSlowTests() ? 10000 : 1000;
TestWorkload workload(cluster_.get());
NO_FATALS(DoTestChurnyElections(&workload, kNumWrites));
// The same test, except inject artificial latency when propagating notifications
// from the queue back to consensus. This previously reproduced bugs like KUDU-1078 which
// normally only appear under high load.
TEST_F(RaftConsensusElectionITest, ChurnyElections_WithNotificationLatency) {
const int kNumWrites = AllowSlowTests() ? 10000 : 1000;
TestWorkload workload(cluster_.get());
NO_FATALS(DoTestChurnyElections(&workload, kNumWrites));
// The same as TestChurnyElections except insert many duplicated rows.
// This emulates cases where there are many duplicate keys which, due to two phase
// locking, may cause deadlocks and other anomalies that cannot be observed when
// keys are unique.
TEST_F(RaftConsensusElectionITest, ChurnyElections_WithDuplicateKeys) {
const int kNumWrites = AllowSlowTests() ? 10000 : 1000;
TestWorkload workload(cluster_.get());
// Increase the number of rows per batch to get a higher chance of key collision.
NO_FATALS(DoTestChurnyElections(&workload, kNumWrites));
// Test automatic leader election by killing leaders.
TEST_F(RaftConsensusElectionITest, AutomaticLeaderElection) {
if (AllowSlowTests()) {
FLAGS_num_tablet_servers = 5;
FLAGS_num_replicas = 5;
TServerDetails* leader;
ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader));
unordered_set<TServerDetails*> killed_leaders;
const int kNumLeadersToKill = FLAGS_num_replicas / 2;
const int kFinalNumReplicas = FLAGS_num_replicas / 2 + 1;
for (int leaders_killed = 0; leaders_killed < kFinalNumReplicas; leaders_killed++) {
LOG(INFO) << Substitute("Writing data to leader of $0-node config ($1 alive)...",
FLAGS_num_replicas, FLAGS_num_replicas - leaders_killed);
InsertTestRowsRemoteThread(leaders_killed * FLAGS_client_inserts_per_thread,
// At this point, the writes are flushed but the commit index may not be
// propagated to all replicas. We kill the leader anyway.
if (leaders_killed < kNumLeadersToKill) {
LOG(INFO) << "Killing current leader " << leader->instance_id.permanent_uuid() << "...";
InsertOrDie(&killed_leaders, leader);
LOG(INFO) << "Waiting for new guy to be elected leader.";
ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader));
// Restart every node that was killed, and wait for the nodes to converge
for (TServerDetails* killed_node : killed_leaders) {
// Verify the data on the remaining replicas.
NO_FATALS(AssertAllReplicasAgree(FLAGS_client_inserts_per_thread * kFinalNumReplicas));
// Single-replica leader election test.
TEST_F(RaftConsensusElectionITest, AutomaticLeaderElectionOneReplica) {
FLAGS_num_tablet_servers = 1;
FLAGS_num_replicas = 1;
// Ensure that single-node Raft configs elect themselves as leader
// immediately upon Consensus startup.
tablet_id_, MonoDelta::FromMilliseconds(500)));
TEST_F(RaftConsensusElectionITest, LeaderStepDown) {
const auto kTimeout = MonoDelta::FromSeconds(10);
const vector<string> kTsFlags = {
const vector<string> kMasterFlags = {
FLAGS_num_replicas = 3;
FLAGS_num_tablet_servers = 3;
NO_FATALS(BuildAndStart(kTsFlags, kMasterFlags));
vector<TServerDetails*> tservers;
AppendValuesFromMap(tablet_servers_, &tservers);
// Start with no leader.
const auto* ts = tservers[0];
Status s = GetReplicaStatusAndCheckIfLeader(ts, tablet_id_, kTimeout);
ASSERT_TRUE(s.IsIllegalState()) << "TS #0 should not be leader yet: " << s.ToString();
// Become leader.
ASSERT_OK(StartElection(ts, tablet_id_, kTimeout));
ASSERT_OK(WaitUntilLeader(ts, tablet_id_, kTimeout));
ASSERT_OK(WriteSimpleTestRow(ts, tablet_id_, RowOperationsPB::INSERT,
kTestRowKey, kTestRowIntVal, "foo", kTimeout));
ASSERT_OK(WaitForServersToAgree(kTimeout, tablet_servers_, tablet_id_, 2));
// Get the Raft term from the newly established leader.
ConsensusStatePB cstate_before;
ASSERT_OK(GetConsensusState(ts, tablet_id_, kTimeout,
consensus::EXCLUDE_HEALTH_REPORT, &cstate_before));
// Step down and test that a 2nd stepdown returns the expected result.
ASSERT_OK(LeaderStepDown(ts, tablet_id_, kTimeout));
// Get the Raft term from the leader that has just stepped down.
ConsensusStatePB cstate_after;
ASSERT_OK(GetConsensusState(ts, tablet_id_, kTimeout,
consensus::EXCLUDE_HEALTH_REPORT, &cstate_after));
// The stepped-down leader should increment its run-time Raft term.
EXPECT_GT(cstate_after.current_term(), cstate_before.current_term());
TabletServerErrorPB error;
s = LeaderStepDown(ts, tablet_id_, kTimeout, &error);
ASSERT_TRUE(s.IsIllegalState()) << "TS #0 should not be leader anymore: " << s.ToString();
ASSERT_EQ(TabletServerErrorPB::NOT_THE_LEADER, error.code()) << SecureShortDebugString(error);
s = WriteSimpleTestRow(ts, tablet_id_, RowOperationsPB::INSERT,
kTestRowKey, kTestRowIntVal, "foo", kTimeout);
ASSERT_TRUE(s.IsIllegalState()) << "TS #0 should not accept writes as follower: "
<< s.ToString();
// Test for KUDU-699: sets the consensus RPC timeout to be long,
// and freezes both followers before asking the leader to step down.
// Prior to fixing KUDU-699, the step-down process would block
// until the pending requests timed out.
TEST_F(RaftConsensusElectionITest, StepDownWithSlowFollower) {
const vector<string> kTsFlags = {
// Bump up the RPC timeout, so that we can verify that the stepdown responds
// quickly even when an outbound request is hung.
const vector<string> kMasterFlags = {
NO_FATALS(BuildAndStart(kTsFlags, kMasterFlags));
vector<TServerDetails*> tservers;
AppendValuesFromMap(tablet_servers_, &tservers);
ASSERT_OK(StartElection(tservers[0], tablet_id_, MonoDelta::FromSeconds(10)));
ASSERT_OK(WaitUntilLeader(tservers[0], tablet_id_, MonoDelta::FromSeconds(10)));
// Stop both followers.
for (int i = 1; i < 3; i++) {
// Sleep a little bit of time to make sure that the leader has outstanding heartbeats
// to the paused followers before requesting the stepdown.
// Step down should respond quickly despite the hung requests.
ASSERT_OK(LeaderStepDown(tservers[0], tablet_id_, MonoDelta::FromSeconds(3)));
// Ensure that we can elect a server that is in the "pending" configuration.
// This is required by the Raft protocol. See Diego Ongaro's PhD thesis, section
// 4.1, where it states that "it is the caller’s configuration that is used in
// reaching consensus, both for voting and for log replication".
// This test also tests the case where a node comes back from the dead to a
// leader that was not in its configuration when it died. That should also work, i.e.
// the revived node should accept writes from the new leader.
TEST_F(RaftConsensusElectionITest, ElectPendingVoter) {
// Test plan:
// 1. Disable failure detection to avoid non-deterministic behavior.
// 2. Start with a configuration size of 5, all servers synced.
// 3. Remove one server from the configuration, wait until committed.
// 4. Pause the 3 remaining non-leaders (SIGSTOP).
// 5. Run a config change to add back the previously-removed server.
// Ensure that, while the op cannot be committed yet due to lack of a
// majority in the new config (only 2 out of 5 servers are alive), the op
// has been replicated to both the local leader and the new member.
// 6. Force the existing leader to step down.
// 7. Resume one of the paused nodes so that a majority (of the 5-node
// configuration, but not the original 4-node configuration) will be available.
// 8. Start a leader election on the new (pending) node. It should win.
// 9. Unpause the two remaining stopped nodes.
// 10. Wait for all nodes to sync to the new leader's log.
const vector<string> kTsFlags = {
const vector<string> kMasterFlags = {
FLAGS_num_tablet_servers = 5;
FLAGS_num_replicas = 5;
NO_FATALS(BuildAndStart(kTsFlags, kMasterFlags));
vector<TServerDetails*> tservers;
AppendValuesFromMap(tablet_servers_, &tservers);
ASSERT_EQ(FLAGS_num_tablet_servers, tservers.size());
// Elect server 0 as leader and wait for log index 1 to propagate to all servers.
TServerDetails* initial_leader = tservers[0];
ASSERT_OK(StartElection(initial_leader, tablet_id_, MonoDelta::FromSeconds(10)));
ASSERT_OK(WaitUntilCommittedOpIdIndexIs(1, initial_leader, tablet_id_,
// The server we will remove and then bring back.
TServerDetails* final_leader = tservers[4];
// Kill the master, so we can change the config without interference.
// Now remove server 4 from the configuration.
TabletServerMap active_tablet_servers = tablet_servers_;
LOG(INFO) << "Removing tserver with uuid " << final_leader->uuid();
ASSERT_OK(RemoveServer(initial_leader, tablet_id_, final_leader,
ASSERT_EQ(1, active_tablet_servers.erase(final_leader->uuid()));
int64_t cur_log_index = 2;
active_tablet_servers, tablet_id_, cur_log_index));
// Pause tablet servers 1 through 3, so they won't see the operation to add
// server 4 back.
LOG(INFO) << "Pausing 3 replicas...";
for (int i = 1; i <= 3; i++) {
ExternalTabletServer* replica_ts = cluster_->tablet_server_by_uuid(tservers[i]->uuid());
// Now add server 4 back to the peers.
// This operation will time out on the client side.
LOG(INFO) << "Adding back Peer " << final_leader->uuid() << " and expecting timeout...";
Status s = AddServer(initial_leader, tablet_id_, final_leader, RaftPeerPB::VOTER,
ASSERT_TRUE(s.IsTimedOut()) << "Expected AddServer() to time out. Result: " << s.ToString();
LOG(INFO) << "Timeout achieved.";
active_tablet_servers = tablet_servers_; // Reset to the unpaused servers.
for (int i = 1; i <= 3; i++) {
ASSERT_EQ(1, active_tablet_servers.erase(tservers[i]->uuid()));
// Only wait for TS 0 and 4 to agree that the new change config op has been
// replicated.
active_tablet_servers, tablet_id_, ++cur_log_index));
// Now that TS 4 is electable (and pending), have TS 0 step down.
LOG(INFO) << "Forcing Peer " << initial_leader->uuid() << " to step down...";
ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, MonoDelta::FromSeconds(10)));
// Resume TS 1 so we have a majority of 3 to elect a new leader.
LOG(INFO) << "Resuming Peer " << tservers[1]->uuid() << " ...";
InsertOrDie(&active_tablet_servers, tservers[1]->uuid(), tservers[1]);
// Now try to get TS 4 elected. It should succeed and push a NO_OP.
LOG(INFO) << "Trying to elect Peer " << tservers[4]->uuid() << " ...";
ASSERT_OK(StartElection(final_leader, tablet_id_, MonoDelta::FromSeconds(10)));
active_tablet_servers, tablet_id_, ++cur_log_index));
// Resume the remaining paused nodes.
LOG(INFO) << "Resuming remaining nodes...";
active_tablet_servers = tablet_servers_;
// Do one last operation on the new leader: an insert.
ASSERT_OK(WriteSimpleTestRow(final_leader, tablet_id_, RowOperationsPB::INSERT,
kTestRowKey, kTestRowIntVal, "Ob-La-Di, Ob-La-Da",
// Wait for all servers to replicate everything up through the last write op.
active_tablet_servers, tablet_id_, ++cur_log_index));
// Have a replica fall behind the leader's log, then fail a tablet copy. It
// should still be able to vote in leader elections.
// This test is relevant only for 3-2-3 replica management scheme when the
// replica with tombstoned tablet is added back into the tablet as a voter
// (that's why the flag "--raft_prepare_replacement_before_eviction=false"
// is added). In case of 3-4-3 replica management scheme the newly added
// replica is a non-voter and it's irrelevant whether it can or cannot vote.
TEST_F(RaftConsensusElectionITest, TombstonedVoteAfterFailedTabletCopy) {
const MonoDelta kTimeout = MonoDelta::FromSeconds(30);
const vector<string> kMasterFlags = {
vector<string> ts_flags {
AddFlagsForLogRolls(&ts_flags); // For CauseFollowerToFallBehindLogGC().
NO_FATALS(BuildAndStart(ts_flags, kMasterFlags));
TabletServerMap active_tablet_servers = tablet_servers_;
ASSERT_EQ(3, FLAGS_num_replicas);
ASSERT_EQ(3, active_tablet_servers.size());
string leader_uuid;
int64_t orig_term;
string follower_uuid;
active_tablet_servers, &leader_uuid, &orig_term, &follower_uuid));
// Wait for the abandoned follower to be evicted.
ASSERT_OK(WaitUntilCommittedConfigNumVotersIs(2, tablet_servers_[leader_uuid],
tablet_id_, kTimeout));
ASSERT_EQ(1, active_tablet_servers.erase(follower_uuid));
// Wait for the deleted tablet to be tombstoned, meaning the catalog manager
// has already sent DeleteTablet request and it has been processed. A race
// between adding a new server and processing DeleteTablet request is
// possible: the DeleteTablet request might be sent/processed _after_
// a new server has been added.
const int follower_idx =
follower_idx, tablet_id_, { TABLET_DATA_TOMBSTONED }, kTimeout));
// Add the evicted follower back to the config and then make it crash because
// of injected fault during tablet copy.
"tablet_copy_fault_crash_on_fetch_all", "1.0"));
auto* leader_ts = tablet_servers_[leader_uuid];
auto* follower_ts = tablet_servers_[follower_uuid];
ASSERT_OK(AddServer(leader_ts, tablet_id_, follower_ts, RaftPeerPB::VOTER, kTimeout));
// Shut down the rest of the cluster, then only bring back the node we
// crashed, plus one other node. The tombstoned replica should still be able
// to vote and the tablet should come online.
ASSERT_OK(inspect_->CheckTabletDataStateOnTS(follower_idx, tablet_id_,
TestWorkload workload(cluster_.get());
ASSERT_GE(workload.rows_inserted(), 100);
// A test scenario to verify that a disruptive server doesn't start needless
// elections in case if it takes a long time to replicate Raft transactions
// from leader to follower replicas (e.g., due to slowness in WAL IO ops).
TEST_F(RaftConsensusElectionITest, DisruptiveServerAndSlowWAL) {
const MonoDelta kTimeout = MonoDelta::FromSeconds(10);
// Shorten the heartbeat interval for faster test run time.
const auto kHeartbeatIntervalMs = 200;
const auto kMaxMissedHeartbeatPeriods = 3;
const vector<string> ts_flags {
Substitute("--raft_heartbeat_interval_ms=$0", kHeartbeatIntervalMs),
// Sanity check: this scenario assumes there are 3 tablet servers. The test
// table is created with RF=FLAGS_num_replicas.
ASSERT_EQ(3, FLAGS_num_replicas);
ASSERT_EQ(3, tablet_servers_.size());
// A convenience array to access each tablet server as TServerDetails.
vector<TServerDetails*> tservers;
AppendValuesFromMap(tablet_servers_, &tservers);
ASSERT_EQ(cluster_->num_tablet_servers(), tservers.size());
// The leadership might fluctuate before shutting down the third tablet
// server, so ASSERT_EVENTUALLY() below is for those rare cases.
// However, after one of the tablet servers is shutdown, the leadership should
// not fluctuate because:
// 1) only two voters out of three are alive
// 2) current leader should not be disturbed by any rogue votes -- that's
// the whole premise of this test scenario
// So, for this scenario the leadership can fluctuate only if significantly
// delaying or dropping Raft heartbeats sent from leader to follower replicas.
// However, minicluster components send heartbeats via the loopback interface,
// so no real network layer that might significantly delay heartbeats
// is involved. Also, the consensus RPC queues should not overflow
// in this scenario because the number of consensus RPCs is relatively low.
TServerDetails* leader_tserver = nullptr;
TServerDetails* other_tserver = nullptr;
TServerDetails* shutdown_tserver = nullptr;
// This is a clean-up in case of retry.
if (shutdown_tserver != nullptr) {
auto* ts = cluster_->tablet_server_by_uuid(shutdown_tserver->uuid());
if (ts->IsShutdown()) {
for (auto idx = 0; idx < cluster_->num_tablet_servers(); ++idx) {
auto* ts = cluster_->tablet_server(idx);
ASSERT_OK(cluster_->SetFlag(ts, "log_inject_latency", "false"));
leader_tserver = nullptr;
other_tserver = nullptr;
shutdown_tserver = nullptr;
ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader_tserver));
ASSERT_OK(WriteSimpleTestRow(leader_tserver, tablet_id_,
RowOperationsPB::UPSERT, 0, 0, "", kTimeout));
OpId op_id;
ASSERT_OK(GetLastOpIdForReplica(tablet_id_, leader_tserver,
consensus::COMMITTED_OPID, kTimeout,
ASSERT_OK(WaitForServersToAgree(kTimeout, tablet_servers_, tablet_id_,
// Shutdown one tablet server that doesn't host the leader replica of the
// target tablet and inject WAL latency to others.
for (const auto& server : tservers) {
auto* ts = cluster_->tablet_server_by_uuid(server->uuid());
if (server->uuid() != leader_tserver->uuid() && shutdown_tserver == nullptr) {
shutdown_tserver = server;
if (server->uuid() != leader_tserver->uuid() && other_tserver == nullptr) {
other_tserver = server;
// For this scenario it's important to reserve some inactivity intervals
// for the follower between processing Raft messages from the leader.
// If a vote request arrives while replica is busy with processing
// Raft message from leader, it is rejected with 'busy' status before
// evaluating the vote withholding interval.
const auto mult = (server->uuid() == leader_tserver->uuid()) ? 2 : 1;
const auto latency_ms = mult *
kHeartbeatIntervalMs * kMaxMissedHeartbeatPeriods;
ASSERT_OK(cluster_->SetFlag(ts, "log_inject_latency_ms_mean",
ASSERT_OK(cluster_->SetFlag(ts, "log_inject_latency_ms_stddev", "0"));
ASSERT_OK(cluster_->SetFlag(ts, "log_inject_latency", "true"));
// Shutdown the third tablet server.
// Sanity check: make sure the leadership hasn't changed since the leader
// has been determined.
TServerDetails* current_leader;
ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &current_leader));
// Get the Raft term from the established leader.
ConsensusStatePB cstate;
ASSERT_OK(GetConsensusState(leader_tserver, tablet_id_, kTimeout,
consensus::EXCLUDE_HEALTH_REPORT, &cstate));
TestWorkload workload(cluster_.get());
// Make a 'space' for the artificial vote requests (see below) to arrive
// while there is no activity on RaftConsensus::Update().
// Issue rogue vote requests, imitating a disruptive tablet replica.
const auto& shutdown_server_uuid = shutdown_tserver->uuid();
const auto next_term = cstate.current_term() + 1;
const auto targets = { leader_tserver, other_tserver };
for (auto i = 0; i < 100; ++i) {
SleepFor(MonoDelta::FromMilliseconds(kHeartbeatIntervalMs / 4));
for (const auto* ts : targets) {
auto s = RequestVote(ts, tablet_id_, shutdown_server_uuid,
next_term, MakeOpId(next_term + i, 0),
/*ignore_live_leader=*/ false,
/*is_pre_election=*/ false,
// Neither leader nor follower replica should grant 'yes' vote
// since the healthy leader is there and doing well, sending Raft
// transactions to be replicated.
ASSERT_TRUE(s.IsInvalidArgument() || s.IsServiceUnavailable())
<< s.ToString();
"because replica is either leader or "
"believes a valid leader to be alive"
"because replica is already servicing an update "
"from a current leader or another vote"
} // namespace tserver
} // namespace kudu