blob: e33b64c5df19564cd1ad283f4fc38ff2abaf0159 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/tools/ksck.h"
#include <algorithm>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <map>
#include <memory>
#include <numeric>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <google/protobuf/stubs/common.h>
#include <gtest/gtest.h>
#include <rapidjson/document.h>
#include "kudu/common/partition.h"
#include "kudu/common/schema.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/rebalance/cluster_status.h"
#include "kudu/server/server_base.pb.h"
#include "kudu/tablet/metadata.pb.h"
#include "kudu/tablet/tablet.pb.h"
#include "kudu/tools/ksck_checksum.h"
#include "kudu/tools/ksck_results.h"
#include "kudu/transactions/txn_status_tablet.h"
#include "kudu/util/jsonreader.h"
#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
DECLARE_bool(checksum_scan);
DECLARE_int32(checksum_idle_timeout_sec);
DECLARE_int32(max_progress_report_wait_ms);
DECLARE_string(color);
DECLARE_string(flags_categories_to_check);
DECLARE_string(ksck_format);
DECLARE_uint32(truncate_server_csv_length);
using kudu::cluster_summary::ConsensusConfigType;
using kudu::cluster_summary::ConsensusState;
using kudu::cluster_summary::ConsensusStateMap;
using kudu::cluster_summary::ReplicaSummary;
using kudu::cluster_summary::ServerHealth;
using kudu::cluster_summary::ServerHealthSummary;
using kudu::cluster_summary::TableSummary;
using kudu::cluster_summary::TabletSummary;
using kudu::server::GetFlagsResponsePB;
using kudu::tablet::TabletDataState;
using kudu::transactions::TxnStatusTablet;
using std::make_shared;
using std::nullopt;
using std::optional;
using std::ostringstream;
using std::shared_ptr;
using std::static_pointer_cast;
using std::string;
using std::vector;
using strings::Substitute;
namespace kudu {
namespace tools {
class MockKsckMaster : public KsckMaster {
public:
explicit MockKsckMaster(const string& address, const string& uuid, bool is_get_flags_available)
: KsckMaster(address),
fetch_info_status_(Status::OK()),
is_get_flags_available_(is_get_flags_available) {
uuid_ = uuid;
version_ = "mock-version";
if (is_get_flags_available_) {
for (size_t cat = FlagsCategory::MIN; cat <= FlagsCategory::MAX; ++cat) {
flags_by_category_[cat].flags.emplace();
}
}
}
Status Init() override {
return Status::OK();
}
Status FetchInfo() override {
if (fetch_info_status_.ok()) {
state_ = KsckFetchState::FETCHED;
} else {
state_ = KsckFetchState::FETCH_FAILED;
}
return fetch_info_status_;
}
Status FetchConsensusState() override {
return fetch_cstate_status_;
}
Status FetchFlags(const std::vector<FlagsCategory>& categories) override {
for (const auto cat : categories) {
if (is_get_flags_available_) {
flags_by_category_[cat].state = KsckFetchState::FETCHED;
} else {
flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED;
}
}
return is_get_flags_available_
? Status::OK() : Status::RemoteError("GetFlags not available");
}
// Public because the unit tests mutate these variables directly.
Status fetch_info_status_;
Status fetch_cstate_status_;
using KsckMaster::uuid_;
using KsckMaster::cstate_;
using KsckMaster::flags_by_category_;
using KsckMaster::version_;
private:
const bool is_get_flags_available_;
};
class MockKsckTabletServer : public KsckTabletServer {
public:
explicit MockKsckTabletServer(const string& uuid, bool is_get_flags_available)
: KsckTabletServer(uuid),
fetch_info_status_(Status::OK()),
fetch_info_health_(ServerHealth::HEALTHY),
address_("<mock>"),
is_get_flags_available_(is_get_flags_available) {
version_ = "mock-version";
if (is_get_flags_available_) {
for (size_t cat = FlagsCategory::MIN; cat <= FlagsCategory::MAX; ++cat) {
flags_by_category_[cat].flags.emplace();
}
}
}
Status FetchInfo(ServerHealth* health) override {
CHECK(health);
*health = fetch_info_health_;
timestamp_ = 12345;
if (fetch_info_status_.ok()) {
state_ = KsckFetchState::FETCHED;
} else {
state_ = KsckFetchState::FETCH_FAILED;
}
return fetch_info_status_;
}
Status FetchConsensusState(ServerHealth* /*health*/) override {
return Status::OK();
}
Status FetchFlags(const std::vector<FlagsCategory>& categories) override {
for (const auto cat : categories) {
if (is_get_flags_available_) {
flags_by_category_[cat].state = KsckFetchState::FETCHED;
} else {
flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED;
}
}
return is_get_flags_available_
? Status::OK() : Status::RemoteError("GetFlags not available");
}
void FetchCurrentTimestampAsync() override {}
Status FetchCurrentTimestamp() override {
return Status::OK();
}
void FetchQuiescingInfo() override {}
void RunTabletChecksumScanAsync(
const std::string& tablet_id,
const Schema& /*schema*/,
const KsckChecksumOptions& /*options*/,
shared_ptr<KsckChecksumManager> manager) override {
manager->ReportProgress(checksum_progress_, 2 * checksum_progress_);
if (checksum_progress_ > 0) {
manager->ReportResult(tablet_id, uuid_, Status::OK(), checksum_);
}
}
std::string address() const override {
return address_;
}
// Public because the unit tests mutate these variables directly.
Status fetch_info_status_;
ServerHealth fetch_info_health_;
// The fake checksum for replicas on this mock server.
uint64_t checksum_ = 0;
// The fake progress amount for this mock server, used to mock checksum
// progress for this server.
int64_t checksum_progress_ = 10;
using KsckTabletServer::flags_by_category_;
using KsckTabletServer::location_;
using KsckTabletServer::version_;
private:
const string address_;
const bool is_get_flags_available_;
};
class MockKsckCluster : public KsckCluster {
public:
MockKsckCluster()
: fetch_info_status_(Status::OK()) {
}
virtual Status Connect() override {
return fetch_info_status_;
}
virtual Status RetrieveTabletServers() override {
return Status::OK();
}
virtual Status RetrieveTablesList() override {
return Status::OK();
}
virtual Status RetrieveAllTablets() override {
return Status::OK();
}
virtual Status RetrieveTabletsList(const shared_ptr<KsckTable>& /* unused */) override {
return Status::OK();
}
// Public because the unit tests mutate these variables directly.
Status fetch_info_status_;
using KsckCluster::masters_;
using KsckCluster::tables_;
using KsckCluster::tablet_servers_;
using KsckCluster::txn_sys_table_;
};
class KsckTest : public KuduTest {
public:
KsckTest()
: cluster_(new MockKsckCluster()),
ksck_(new Ksck(cluster_, &err_stream_)) {
FLAGS_color = "never";
}
void SetUp() override {
// Set up the master consensus state.
consensus::ConsensusStatePB cstate;
cstate.set_current_term(0);
cstate.set_leader_uuid("master-id-0");
for (int i = 0; i < 3; i++) {
auto* peer = cstate.mutable_committed_config()->add_peers();
peer->set_member_type(consensus::RaftPeerPB::VOTER);
peer->set_permanent_uuid(Substitute("master-id-$0", i));
}
for (int i = 0; i < 3; i++) {
const string uuid = Substitute("master-id-$0", i);
const string addr = Substitute("master-$0", i);
auto master = make_shared<MockKsckMaster>(addr, uuid, IsGetFlagsAvailable());
master->cstate_ = cstate;
cluster_->masters_.push_back(master);
}
KsckCluster::TSMap tablet_servers;
for (int i = 0; i < 3; i++) {
string name = Substitute("ts-id-$0", i);
auto ts = make_shared<MockKsckTabletServer>(name, IsGetFlagsAvailable());
InsertOrDie(&tablet_servers, ts->uuid(), ts);
}
cluster_->tablet_servers_.swap(tablet_servers);
}
protected:
// Returns the expected summary for a table with the given tablet states.
std::string ExpectedTableSummary(const string& table_name,
int replication_factor,
int healthy_tablets,
int recovering_tablets,
int underreplicated_tablets,
int consensus_mismatch_tablets,
int unavailable_tablets) {
TableSummary table_summary;
table_summary.name = table_name;
table_summary.replication_factor = replication_factor;
table_summary.healthy_tablets = healthy_tablets;
table_summary.recovering_tablets = recovering_tablets;
table_summary.underreplicated_tablets = underreplicated_tablets;
table_summary.consensus_mismatch_tablets = consensus_mismatch_tablets;
table_summary.unavailable_tablets = unavailable_tablets;
std::ostringstream oss;
PrintTableSummaries({ table_summary }, "table", oss);
return oss.str();
}
void CreateDefaultAssignmentPlan(int tablets_count) {
SCOPED_CLEANUP({
// This isn't necessary for correctness, but the tests were all
// written to expect a reversed order and doing that here is more
// convenient than rewriting many ASSERTs.
std::reverse(assignment_plan_.begin(), assignment_plan_.end());
});
while (tablets_count > 0) {
for (const auto& entry : cluster_->tablet_servers_) {
if (tablets_count-- == 0) return;
assignment_plan_.push_back(entry.second->uuid());
}
}
}
void CreateOneTableOneTablet(bool create_txn_status_table = false) {
NO_FATALS(CreateDefaultAssignmentPlan(create_txn_status_table ? 2 : 1));
auto table = CreateAndAddTable("test", 1);
auto tablet(make_shared<KsckTablet>(
table.get(), "tablet-id-1", Partition{}));
NO_FATALS(CreateAndFillTablet(tablet, 1, true, true));
table->set_tablets({ tablet });
if (create_txn_status_table) {
auto sys_table = CreateAndAddTxnStatusTable(1);
auto sys_tablet(make_shared<KsckTablet>(
sys_table.get(), "sys-tablet-id-1", Partition{}));
NO_FATALS(CreateAndFillTablet(sys_tablet, 1, true, true));
sys_table->set_tablets({ sys_tablet });
}
}
void CreateOneSmallReplicatedTable(const string& table_name = "test",
const string& tablet_id_prefix = "") {
int num_replicas = 3;
int num_tablets = 3;
CreateDefaultAssignmentPlan(num_replicas * num_tablets);
auto table = CreateAndAddTable(table_name, num_replicas);
vector<shared_ptr<KsckTablet>> tablets;
for (int i = 0; i < num_tablets; i++) {
auto tablet(make_shared<KsckTablet>(
table.get(),
Substitute("$0tablet-id-$1", tablet_id_prefix, i),
Partition{}));
CreateAndFillTablet(tablet, num_replicas, true, true);
tablets.push_back(std::move(tablet));
}
table->set_tablets(tablets);
}
void CreateOneSmallReplicatedTableWithTabletNotRunning() {
int num_replicas = 3;
int num_tablets = 3;
CreateDefaultAssignmentPlan(num_replicas * num_tablets);
auto table = CreateAndAddTable("test", num_replicas);
vector<shared_ptr<KsckTablet>> tablets;
for (int i = 0; i < num_tablets; i++) {
auto tablet(make_shared<KsckTablet>(
table.get(), Substitute("tablet-id-$0", i), Partition{}));
CreateAndFillTablet(tablet, num_replicas, true, i != 0);
tablets.push_back(std::move(tablet));
}
table->set_tablets(tablets);
}
void CreateOneOneTabletReplicatedBrokenTable() {
// We're placing only two tablets, the 3rd goes nowhere.
CreateDefaultAssignmentPlan(2);
auto table = CreateAndAddTable("test", 3);
auto tablet(make_shared<KsckTablet>(table.get(), "tablet-id-1", Partition{}));
CreateAndFillTablet(tablet, 2, false, true);
table->set_tablets({ tablet });
}
shared_ptr<KsckTable> CreateAndAddTxnStatusTable(int num_replicas) {
auto table(make_shared<KsckTable>(
TxnStatusTablet::kTxnStatusTableName, TxnStatusTablet::kTxnStatusTableName,
TxnStatusTablet::GetSchema(), num_replicas));
cluster_->txn_sys_table_ = table;
return table;
}
shared_ptr<KsckTable> CreateAndAddTable(const string& id_and_name, int num_replicas) {
auto table(make_shared<KsckTable>(
id_and_name, id_and_name, Schema(), num_replicas));
cluster_->tables_.push_back(table);
return table;
}
void CreateAndFillTablet(shared_ptr<KsckTablet>& tablet, int num_replicas,
bool has_leader, bool is_running) {
{
vector<shared_ptr<KsckTabletReplica>> replicas;
if (has_leader) {
NO_FATALS(CreateReplicaAndAdd(&replicas, tablet->id(), true, is_running));
num_replicas--;
}
for (int i = 0; i < num_replicas; i++) {
NO_FATALS(CreateReplicaAndAdd(&replicas, tablet->id(), false, is_running));
}
tablet->set_replicas(std::move(replicas));
}
// Set up the consensus state on each tablet server.
consensus::ConsensusStatePB cstate;
cstate.set_current_term(0);
for (const auto& replica : tablet->replicas()) {
if (replica->is_leader()) {
cstate.set_leader_uuid(replica->ts_uuid());
}
auto* peer = cstate.mutable_committed_config()->add_peers();
peer->set_member_type(consensus::RaftPeerPB::VOTER);
peer->set_permanent_uuid(replica->ts_uuid());
}
for (const auto& replica : tablet->replicas()) {
shared_ptr<MockKsckTabletServer> ts =
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_.at(replica->ts_uuid()));
InsertIfNotPresent(&ts->tablet_consensus_state_map_,
std::make_pair(replica->ts_uuid(), tablet->id()),
cstate);
}
}
void CreateReplicaAndAdd(vector<shared_ptr<KsckTabletReplica>>* replicas,
const string& tablet_id,
bool is_leader,
bool is_running) {
shared_ptr<KsckTabletReplica> replica(
new KsckTabletReplica(assignment_plan_.back(), is_leader, true));
shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers_.at(assignment_plan_.back()));
assignment_plan_.pop_back();
replicas->push_back(replica);
// Add the equivalent replica on the tablet server.
tablet::TabletStatusPB pb;
pb.set_tablet_id(tablet_id);
pb.set_table_name("fake-table");
pb.set_state(is_running ? tablet::RUNNING : tablet::FAILED);
pb.set_tablet_data_state(TabletDataState::TABLET_DATA_UNKNOWN);
InsertOrDie(&ts->tablet_status_map_, tablet_id, pb);
}
Status RunKsck() {
auto c = MakeScopedCleanup([this]() {
LOG(INFO) << "Ksck output:\n" << err_stream_.str();
});
return ksck_->RunAndPrintResults();
}
const string KsckResultsToJsonString(int sections = PrintSections::ALL_SECTIONS) {
ostringstream json_stream;
ksck_->results().PrintJsonTo(PrintMode::JSON_COMPACT,
sections,
json_stream);
return json_stream.str();
}
virtual bool IsGetFlagsAvailable() const {
return true;
}
shared_ptr<MockKsckCluster> cluster_;
shared_ptr<Ksck> ksck_;
// This is used as a stack. First the unit test is responsible to create a plan to follow, that
// is the order in which each replica of each tablet will be assigned, starting from the end.
// So if you have 2 tablets with num_replicas=3 and 3 tablet servers, then to distribute evenly
// you should have a list that looks like ts1,ts2,ts3,ts3,ts2,ts1 so that the two LEADERS, which
// are assigned first, end up on ts1 and ts3.
vector<string> assignment_plan_;
std::ostringstream err_stream_;
};
class GetFlagsUnavailableKsckTest : public KsckTest {
protected:
bool IsGetFlagsAvailable() const override {
return false;
}
};
// Helpful macros for checking JSON fields vs. expected values.
// In all cases, the meaning of the parameters are as follows:
// 'reader' is the JsonReader that owns the parsed JSON data.
// 'value' is the rapidjson::Value* containing the field, or, if 'field'
// is nullptr, the field itself.
// 'field' is a const char* naming the field of 'value' to check.
// If it is null, the field value is extracted from 'value' directly.
// 'expected' is the expected value.
#define EXPECT_JSON_STRING_FIELD(reader, value, field, expected) do { \
string actual; \
ASSERT_OK((reader).ExtractString((value), (field), &actual)); \
EXPECT_EQ((expected), actual); \
} while (0)
#define EXPECT_JSON_INT_FIELD(reader, value, field, expected) do { \
int64_t actual; \
ASSERT_OK((reader).ExtractInt64((value), (field), &actual)); \
EXPECT_EQ((expected), actual); \
} while (0)
#define EXPECT_JSON_BOOL_FIELD(reader, value, field, expected) do { \
bool actual; \
ASSERT_OK((reader).ExtractBool((value), (field), &actual)); \
EXPECT_EQ((expected), actual); \
} while (0)
#define EXPECT_JSON_FIELD_NOT_PRESENT(reader, value, field) do { \
int64_t unused; \
ASSERT_TRUE((reader).ExtractInt64((value), (field), &unused).IsNotFound()); \
} while (0)
// 'array' is a vector<const rapidjson::Value*> into which the array elements
// will be extracted.
// 'exp_size' is the expected size of the vector after extraction.
#define EXTRACT_ARRAY_CHECK_SIZE(reader, value, field, array, exp_size) do { \
ASSERT_OK((reader).ExtractObjectArray((value), (field), &(array))); \
ASSERT_EQ(exp_size, (array).size()); \
} while (0)
void CheckJsonVsServerHealthSummaries(
const JsonReader& r,
const string& key,
const optional<vector<ServerHealthSummary>>& summaries) {
if (!summaries || summaries->empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> health;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), key.c_str(), health, summaries->size());
for (int i = 0; i < summaries->size(); i++) {
const auto& summary = (*summaries)[i];
const auto* server = health[i];
EXPECT_JSON_STRING_FIELD(r, server, "uuid", summary.uuid);
EXPECT_JSON_STRING_FIELD(r, server, "address", summary.address);
EXPECT_JSON_STRING_FIELD(r, server, "health", ServerHealthToString(summary.health));
EXPECT_JSON_STRING_FIELD(r, server, "status", summary.status.ToString());
if (!summary.ts_location.empty()) {
EXPECT_JSON_STRING_FIELD(r, server, "location", summary.ts_location);
}
}
}
const string ConsensusConfigTypeToString(ConsensusConfigType t) {
switch (t) {
case ConsensusConfigType::COMMITTED:
return "COMMITTED";
case ConsensusConfigType::PENDING:
return "PENDING";
case ConsensusConfigType::MASTER:
return "MASTER";
default:
LOG(FATAL) << "unknown ConsensusConfigType";
}
}
void CheckJsonVsConsensusState(const JsonReader& r,
const rapidjson::Value* cstate,
const ConsensusState& ref_cstate) {
EXPECT_JSON_STRING_FIELD(r, cstate, "type",
ConsensusConfigTypeToString(ref_cstate.type));
if (ref_cstate.leader_uuid) {
EXPECT_JSON_STRING_FIELD(r, cstate, "leader_uuid", ref_cstate.leader_uuid);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, cstate, "leader_uuid");
}
if (ref_cstate.term) {
EXPECT_JSON_INT_FIELD(r, cstate, "term", ref_cstate.term);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, cstate, "term");
}
if (ref_cstate.opid_index) {
EXPECT_JSON_INT_FIELD(r, cstate, "opid_index", ref_cstate.opid_index);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, cstate, "opid_index");
}
// Check voters.
if (ref_cstate.voter_uuids.empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, cstate, "voter_uuids");
} else {
const vector<string> ref_voter_uuids(ref_cstate.voter_uuids.begin(),
ref_cstate.voter_uuids.end());
vector<const rapidjson::Value*> voter_uuids;
EXTRACT_ARRAY_CHECK_SIZE(r, cstate, "voter_uuids",
voter_uuids, ref_voter_uuids.size());
for (int j = 0; j < voter_uuids.size(); j++) {
EXPECT_JSON_STRING_FIELD(r, voter_uuids[j], nullptr, ref_voter_uuids[j]);
}
}
// Check non-voters.
if (ref_cstate.non_voter_uuids.empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, cstate, "non_voter_uuids");
} else {
const vector<string> ref_non_voter_uuids(ref_cstate.non_voter_uuids.begin(),
ref_cstate.non_voter_uuids.end());
vector<const rapidjson::Value*> non_voter_uuids;
EXTRACT_ARRAY_CHECK_SIZE(r, cstate, "nonvoter_uuids",
non_voter_uuids, ref_non_voter_uuids.size());
for (int j = 0; j < non_voter_uuids.size(); j++) {
EXPECT_JSON_STRING_FIELD(r, non_voter_uuids[j], nullptr, ref_non_voter_uuids[j]);
}
}
}
void CheckJsonVsReplicaSummary(const JsonReader& r,
const rapidjson::Value* replica,
const ReplicaSummary& ref_replica) {
EXPECT_JSON_STRING_FIELD(r, replica, "ts_uuid", ref_replica.ts_uuid);
if (ref_replica.ts_address) {
EXPECT_JSON_STRING_FIELD(r, replica, "ts_address", ref_replica.ts_address);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, replica, "ts_address");
}
EXPECT_JSON_BOOL_FIELD(r, replica, "is_leader", ref_replica.is_leader);
EXPECT_JSON_BOOL_FIELD(r, replica, "is_voter", ref_replica.is_voter);
EXPECT_JSON_BOOL_FIELD(r, replica, "ts_healthy", ref_replica.ts_healthy);
EXPECT_JSON_STRING_FIELD(r, replica, "state", tablet::TabletStatePB_Name(ref_replica.state));
// The only thing ksck expects from the status_pb is the data state,
// so it's all we check (even though the other info is nice to have).
if (ref_replica.status_pb) {
const rapidjson::Value* status_pb;
ASSERT_OK(r.ExtractObject(replica, "status_pb", &status_pb));
EXPECT_JSON_STRING_FIELD(
r,
status_pb,
"tablet_data_state",
tablet::TabletDataState_Name(ref_replica.status_pb->tablet_data_state()));
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, replica, "status_pb");
}
if (ref_replica.consensus_state) {
const rapidjson::Value* cstate;
ASSERT_OK(r.ExtractObject(replica, "consensus_state", &cstate));
CheckJsonVsConsensusState(r, cstate, *ref_replica.consensus_state);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, replica, "consensus_state");
}
}
void CheckJsonVsMasterConsensus(const JsonReader& r,
bool ref_conflict,
const optional<ConsensusStateMap>& ref_cstates) {
if (!ref_cstates || ref_cstates->empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), "master_consensus_states");
return;
}
EXPECT_JSON_BOOL_FIELD(r, r.root(), "master_consensus_conflict", ref_conflict);
vector<const rapidjson::Value*> cstates;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), "master_consensus_states",
cstates, ref_cstates->size());
int i = 0;
for (const auto& entry : *ref_cstates) {
CheckJsonVsConsensusState(r, cstates[i++], entry.second);
}
}
void CheckJsonVsTableSummaries(const JsonReader& r,
const string& key,
const optional<vector<TableSummary>>& ref_tables) {
if (!ref_tables || ref_tables->empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> tables;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), key.c_str(), tables, ref_tables->size());
for (int i = 0; i < ref_tables->size(); i++) {
const auto& ref_table = (*ref_tables)[i];
const auto* table = tables[i];
EXPECT_JSON_STRING_FIELD(r, table, "id", ref_table.id);
EXPECT_JSON_STRING_FIELD(r, table, "name", ref_table.name);
EXPECT_JSON_STRING_FIELD(r, table,
"health", HealthCheckResultToString(ref_table.TableStatus()));
EXPECT_JSON_INT_FIELD(r, table,
"replication_factor", ref_table.replication_factor);
EXPECT_JSON_INT_FIELD(r, table,
"total_tablets", ref_table.TotalTablets());
EXPECT_JSON_INT_FIELD(r, table,
"healthy_tablets", ref_table.healthy_tablets);
EXPECT_JSON_INT_FIELD(r, table,
"recovering_tablets", ref_table.recovering_tablets);
EXPECT_JSON_INT_FIELD(r, table,
"underreplicated_tablets", ref_table.underreplicated_tablets);
EXPECT_JSON_INT_FIELD(r, table,
"unavailable_tablets", ref_table.unavailable_tablets);
EXPECT_JSON_INT_FIELD(r, table,
"consensus_mismatch_tablets", ref_table.consensus_mismatch_tablets);
}
}
void CheckJsonVsTabletSummaries(const JsonReader& r,
const string& key,
const optional<vector<TabletSummary>>& ref_tablets) {
if (!ref_tablets || ref_tablets->empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> tablets;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), key.c_str(), tablets, ref_tablets->size());
for (int i = 0; i < ref_tablets->size(); i++) {
const auto& ref_tablet = (*ref_tablets)[i];
const auto& tablet = tablets[i];
EXPECT_JSON_STRING_FIELD(r, tablet, "id", ref_tablet.id);
EXPECT_JSON_STRING_FIELD(r, tablet, "table_id", ref_tablet.table_id);
EXPECT_JSON_STRING_FIELD(r, tablet, "table_name", ref_tablet.table_name);
EXPECT_JSON_STRING_FIELD(r, tablet,
"health", HealthCheckResultToString(ref_tablet.result));
EXPECT_JSON_STRING_FIELD(r, tablet, "status", ref_tablet.status);
const rapidjson::Value* master_cstate;
ASSERT_OK(r.ExtractObject(tablet, "master_cstate", &master_cstate));
CheckJsonVsConsensusState(r, master_cstate, ref_tablet.master_cstate);
if (ref_tablet.replicas.empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, tablet, "replicas");
continue;
}
vector<const rapidjson::Value*> replicas;
EXTRACT_ARRAY_CHECK_SIZE(r, tablet,
"replicas", replicas, ref_tablet.replicas.size());
for (int j = 0; j < replicas.size(); j++) {
const auto& ref_replica = ref_tablet.replicas[j];
const auto* replica = replicas[j];
CheckJsonVsReplicaSummary(r, replica, ref_replica);
}
EXPECT_JSON_STRING_FIELD(r, tablet, "range_key_begin", ref_tablet.range_key_begin);
}
}
void CheckJsonVsChecksumResults(const JsonReader& r,
const string& key,
const optional<KsckChecksumResults>& ref_checksum_results) {
if (!ref_checksum_results || ref_checksum_results->tables.empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
const rapidjson::Value* checksum_results;
ASSERT_OK(r.ExtractObject(r.root(), key.c_str(), &checksum_results));
if (ref_checksum_results->snapshot_timestamp) {
EXPECT_JSON_INT_FIELD(r, checksum_results,
"snapshot_timestamp", *ref_checksum_results->snapshot_timestamp);
} else {
EXPECT_JSON_FIELD_NOT_PRESENT(r, checksum_results, "snapshot_timestamp");
}
vector<const rapidjson::Value*> tables;
EXTRACT_ARRAY_CHECK_SIZE(r, checksum_results, "tables",
tables, ref_checksum_results->tables.size());
int i = 0;
for (const auto& table_entry : ref_checksum_results->tables) {
const auto& ref_table = table_entry.second;
const auto* table = tables[i++];
EXPECT_JSON_STRING_FIELD(r, table, "name", table_entry.first);
vector<const rapidjson::Value*> tablets;
EXTRACT_ARRAY_CHECK_SIZE(r, table, "tablets", tablets, ref_table.size());
int j = 0;
for (const auto& tablet_entry : ref_table) {
const auto& ref_tablet = tablet_entry.second;
const auto* tablet = tablets[j++];
EXPECT_JSON_STRING_FIELD(r, tablet, "tablet_id", tablet_entry.first);
EXPECT_JSON_BOOL_FIELD(r, tablet, "mismatch", ref_tablet.mismatch);
vector<const rapidjson::Value*> checksums;
EXTRACT_ARRAY_CHECK_SIZE(r, tablet, "replica_checksums",
checksums, ref_tablet.replica_checksums.size());
int k = 0;
for (const auto& replica_entry : ref_tablet.replica_checksums) {
const auto& ref_replica = replica_entry.second;
const auto* replica = checksums[k++];
EXPECT_JSON_STRING_FIELD(r, replica, "ts_uuid", ref_replica.ts_uuid);
EXPECT_JSON_STRING_FIELD(r, replica, "ts_address", ref_replica.ts_address);
EXPECT_JSON_STRING_FIELD(r, replica, "status", ref_replica.status.ToString());
// Checksum is a uint64_t and might plausibly be larger than int64_t's max,
// so we're handling it special.
int64_t signed_checksum;
ASSERT_OK(r.ExtractInt64(replica, "checksum", &signed_checksum));
ASSERT_EQ(ref_replica.checksum, static_cast<uint64_t>(signed_checksum));
}
}
}
}
void CheckJsonVsVersionSummaries(const JsonReader& r,
const string& key,
const optional<KsckVersionToServersMap>& ref_result) {
if (!ref_result || ref_result->empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> version_servers_map;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), key.c_str(), version_servers_map, ref_result->size());
auto version_servers = version_servers_map.begin();
for (const auto& ref_version_servers : *ref_result) {
ASSERT_NE(version_servers, version_servers_map.end());
EXPECT_JSON_STRING_FIELD(r, *version_servers, "version", ref_version_servers.first);
vector<const rapidjson::Value *> servers;
EXTRACT_ARRAY_CHECK_SIZE(r, *version_servers, "servers", servers,
ref_version_servers.second.size());
auto server = servers.begin();
for (const auto& ref_server : ref_version_servers.second) {
ASSERT_NE(server, servers.end());
EXPECT_EQ((*server)->GetString(), ref_server);
++server;
}
++version_servers;
}
}
void CheckJsonVsCountSummaries(const JsonReader& r,
const string& key,
const optional<KsckResults>& ref_result) {
if (!ref_result) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> count_results;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), key.c_str(), count_results, 1);
EXPECT_JSON_INT_FIELD(r, count_results[0], "masters",
ref_result->cluster_status.master_summaries.size());
EXPECT_JSON_INT_FIELD(r, count_results[0], "tservers",
ref_result->cluster_status.tserver_summaries.size());
EXPECT_JSON_INT_FIELD(r, count_results[0], "tables",
ref_result->cluster_status.table_summaries.size());
EXPECT_JSON_INT_FIELD(r, count_results[0], "tablets",
ref_result->cluster_status.tablet_summaries.size());
int replica_count = std::accumulate(ref_result->cluster_status.tablet_summaries.begin(),
ref_result->cluster_status.tablet_summaries.end(),
0,
[](int acc, const TabletSummary& ts) {
return acc + ts.replicas.size();
});
EXPECT_JSON_INT_FIELD(r, count_results[0], "replicas", replica_count);
}
void CheckJsonVsErrors(const JsonReader& r,
const string& key,
const vector<Status>& ref_errors) {
if (ref_errors.empty()) {
EXPECT_JSON_FIELD_NOT_PRESENT(r, r.root(), key.c_str());
return;
}
vector<const rapidjson::Value*> errors;
EXTRACT_ARRAY_CHECK_SIZE(r, r.root(), "errors", errors, ref_errors.size());
for (int i = 0; i < ref_errors.size(); i++) {
EXPECT_JSON_STRING_FIELD(r, errors[i], nullptr, ref_errors[i].ToString());
}
}
void CheckPlainStringSection(const string& plain, const string& header, bool present) {
if (present) {
ASSERT_STR_CONTAINS(plain, header);
} else {
ASSERT_STR_NOT_CONTAINS(plain, header);
}
}
void CheckPlainStringSections(const string& plain, int sections) {
NO_FATALS(CheckPlainStringSection(plain,
"Master Summary\n",
sections & PrintSections::Values::MASTER_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain,
"Tablet Server Summary\n",
sections & PrintSections::Values::TSERVER_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain,
"Version Summary\n",
sections & PrintSections::Values::VERSION_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain,
"Tablet Summary\n",
sections & PrintSections::Values::TABLET_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain,
"Summary by table\n",
sections & PrintSections::Values::TABLE_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain, "Summary by system table\n",
sections & PrintSections::Values::SYSTEM_TABLE_SUMMARIES));
NO_FATALS(CheckPlainStringSection(plain,
"Checksum Summary\n",
sections & PrintSections::Values::CHECKSUM_RESULTS));
NO_FATALS(CheckPlainStringSection(plain,
"Total Count Summary\n",
sections & PrintSections::Values::TOTAL_COUNT));
}
void CheckJsonStringVsKsckResults(const string& json,
const KsckResults& results,
int sections = PrintSections::ALL_SECTIONS) {
JsonReader r(json);
ASSERT_OK(r.Init());
NO_FATALS(CheckJsonVsServerHealthSummaries(
r,
"master_summaries",
sections & PrintSections::Values::MASTER_SUMMARIES ?
optional<vector<ServerHealthSummary>>
(results.cluster_status.master_summaries) : nullopt));
NO_FATALS(CheckJsonVsMasterConsensus(
r,
results.cluster_status.master_consensus_conflict,
sections & PrintSections::Values::MASTER_SUMMARIES ?
optional<ConsensusStateMap>
(results.cluster_status.master_consensus_state_map) : nullopt));
NO_FATALS(CheckJsonVsServerHealthSummaries(
r,
"tserver_summaries",
sections & PrintSections::Values::TSERVER_SUMMARIES ?
optional<vector<ServerHealthSummary>>
(results.cluster_status.tserver_summaries) : nullopt));
NO_FATALS(CheckJsonVsVersionSummaries(
r,
"version_summaries",
sections & PrintSections::Values::VERSION_SUMMARIES ?
optional<KsckVersionToServersMap>
(results.version_summaries) : nullopt));
NO_FATALS(CheckJsonVsTabletSummaries(
r,
"tablet_summaries",
sections & PrintSections::Values::TABLET_SUMMARIES ?
optional<vector<TabletSummary>>
(results.cluster_status.tablet_summaries) : nullopt));
NO_FATALS(CheckJsonVsTableSummaries(
r,
"table_summaries",
sections & PrintSections::Values::TABLE_SUMMARIES ?
optional<vector<TableSummary>>
(results.cluster_status.table_summaries) : nullopt));
NO_FATALS(CheckJsonVsTableSummaries(
r,
"system_table_summaries",
sections & PrintSections::Values::SYSTEM_TABLE_SUMMARIES ?
optional<vector<TableSummary>>
(results.cluster_status.system_table_summaries) : nullopt));
NO_FATALS(CheckJsonVsChecksumResults(
r,
"checksum_results",
sections & PrintSections::Values::CHECKSUM_RESULTS ?
optional<KsckChecksumResults>(results.checksum_results) : nullopt));
NO_FATALS(CheckJsonVsCountSummaries(
r,
"count_summaries",
sections & PrintSections::Values::TOTAL_COUNT ?
optional<KsckResults>(results) : nullopt));
NO_FATALS(CheckJsonVsErrors(r, "errors", results.error_messages));
}
void CheckMessageNotPresent(const vector<Status>& messages, const string& msg) {
for (const auto& status : messages) {
ASSERT_STR_NOT_CONTAINS(status.ToString(), msg);
}
}
TEST_F(KsckTest, TestServersOk) {
ASSERT_OK(RunKsck());
const string err_string = err_stream_.str();
// Master health.
ASSERT_STR_CONTAINS(err_string,
"Master Summary\n"
" UUID | Address | Status\n"
"-------------+----------+---------\n"
" master-id-0 | master-0 | HEALTHY\n"
" master-id-1 | master-1 | HEALTHY\n"
" master-id-2 | master-2 | HEALTHY\n");
// Tablet server health.
ASSERT_STR_CONTAINS(err_string,
"Tablet Server Summary\n"
" UUID | Address | Status | Location\n"
"---------+---------+---------+----------\n"
" ts-id-0 | <mock> | HEALTHY | <none>\n"
" ts-id-1 | <mock> | HEALTHY | <none>\n"
" ts-id-2 | <mock> | HEALTHY | <none>\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestMasterUnavailable) {
shared_ptr<MockKsckMaster> master =
std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(1));
master->fetch_info_status_ = Status::NetworkError("gremlins");
master->cstate_.reset();
ASSERT_TRUE(ksck_->CheckMasterHealth().IsNetworkError());
ASSERT_TRUE(ksck_->CheckMasterConsensus().IsCorruption());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Master Summary\n"
" UUID | Address | Status\n"
"-------------+----------+-------------\n"
" master-id-0 | master-0 | HEALTHY\n"
" master-id-2 | master-2 | HEALTHY\n"
" master-id-1 | master-1 | UNAVAILABLE\n");
ASSERT_STR_CONTAINS(err_stream_.str(),
"All reported replicas are:\n"
" A = master-id-0\n"
" B = master-id-1\n"
" C = master-id-2\n"
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------------+--------------+--------------+------------\n"
" A | A* B C | 0 | | Yes\n"
" B | [config not available] | | | \n"
" C | A* B C | 0 | | Yes\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestUnauthorized) {
Status noauth = Status::RemoteError("Not authorized: unauthorized access to method");
shared_ptr<MockKsckMaster> master =
std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(1));
master->fetch_info_status_ = noauth;
shared_ptr<MockKsckTabletServer> tserver =
std::static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers().begin()->second);
tserver->fetch_info_status_ = noauth;
Status s = RunKsck();
ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(), "re-run ksck with administrator privileges");
ASSERT_STR_CONTAINS(err_stream_.str(),
"failed to gather info from 1 of 3 "
"masters due to lack of admin privileges");
ASSERT_STR_CONTAINS(err_stream_.str(),
"failed to gather info from 1 of 3 "
"tablet servers due to lack of admin privileges");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
// A wrong-master-uuid situation can happen if a master that is part of, e.g.,
// a 3-peer config fails permanently and is wiped and reborn on the same address
// with a new uuid.
TEST_F(KsckTest, TestWrongMasterUuid) {
shared_ptr<MockKsckMaster> master =
std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(2));
const string imposter_uuid = "master-id-imposter";
master->uuid_ = imposter_uuid;
master->cstate_->set_leader_uuid(imposter_uuid);
auto* config = master->cstate_->mutable_committed_config();
config->clear_peers();
config->add_peers()->set_permanent_uuid(imposter_uuid);
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_TRUE(ksck_->CheckMasterConsensus().IsCorruption());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Master Summary\n"
" UUID | Address | Status\n"
"--------------------+----------+---------\n"
" master-id-0 | master-0 | HEALTHY\n"
" master-id-1 | master-1 | HEALTHY\n"
" master-id-imposter | master-2 | HEALTHY\n");
ASSERT_STR_CONTAINS(err_stream_.str(),
"All reported replicas are:\n"
" A = master-id-0\n"
" B = master-id-1\n"
" C = master-id-imposter\n"
" D = master-id-2\n"
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------+--------------+--------------+------------\n"
" A | A* B D | 0 | | Yes\n"
" B | A* B D | 0 | | Yes\n"
" C | C* | 0 | | Yes\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestTwoLeaderMasters) {
shared_ptr<MockKsckMaster> master =
std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(1));
master->cstate_->set_leader_uuid(master->uuid_);
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_TRUE(ksck_->CheckMasterConsensus().IsCorruption());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"All reported replicas are:\n"
" A = master-id-0\n"
" B = master-id-1\n"
" C = master-id-2\n"
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+--------------+--------------+--------------+------------\n"
" A | A* B C | 0 | | Yes\n"
" B | A B* C | 0 | | Yes\n"
" C | A* B C | 0 | | Yes\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestLeaderMasterUnavailable) {
Status error = Status::NetworkError("Network failure");
cluster_->fetch_info_status_ = error;
ASSERT_TRUE(ksck_->CheckClusterRunning().IsNetworkError());
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestMasterFlagCheck) {
// Check for the differences in the 'unusual' flags category.
FLAGS_flags_categories_to_check = "unusual";
// Setup flags for each mock master.
for (int i = 0; i < cluster_->masters().size(); i++) {
server::GetFlagsResponsePB flags;
{
// Add an experimental flag with the same value for each master.
auto* flag = flags.add_flags();
flag->set_name("experimental_flag");
flag->set_value("x");
flag->mutable_tags()->Add("experimental");
}
{
// Add a hidden flag with a different value for each master.
auto* flag = flags.add_flags();
flag->set_name("hidden_flag");
flag->set_value(std::to_string(i));
flag->mutable_tags()->Add("hidden");
}
{
// Add a hidden and unsafe flag with one master having a different value
// than the other two.
auto* flag = flags.add_flags();
flag->set_name("hidden_unsafe_flag");
flag->set_value(std::to_string(i % 2));
flag->mutable_tags()->Add("hidden");
flag->mutable_tags()->Add("unsafe");
}
shared_ptr<MockKsckMaster> master =
std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(i));
master->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags);
}
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_OK(ksck_->CheckMasterUnusualFlags());
ASSERT_OK(ksck_->CheckMasterDivergedFlags());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Unusual flags for Master:\n"
" Flag | Value | Tags | Master\n"
"--------------------+-------+---------------+-------------------------\n"
" experimental_flag | x | experimental | all 3 server(s) checked\n"
" hidden_flag | 0 | hidden | master-0\n"
" hidden_flag | 1 | hidden | master-1\n"
" hidden_flag | 2 | hidden | master-2\n"
" hidden_unsafe_flag | 0 | hidden,unsafe | master-0, master-2\n"
" hidden_unsafe_flag | 1 | hidden,unsafe | master-1");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Some masters have unsafe, experimental, or hidden flags set");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Master:\n"
" Flag | Value | Master\n"
"--------------------+-------+-------------------------\n"
" experimental_flag | x | all 3 server(s) checked\n"
" hidden_flag | 0 | master-0\n"
" hidden_flag | 1 | master-1\n"
" hidden_flag | 2 | master-2\n"
" hidden_unsafe_flag | 0 | master-0, master-2\n"
" hidden_unsafe_flag | 1 | master-1");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Different masters have different settings for same flags "
"of checked category 'unusual'");
}
TEST_F(GetFlagsUnavailableKsckTest, TestMasterFlagsUnavailable) {
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_TRUE(ksck_->CheckMasterUnusualFlags().IsIncomplete());
static const string flags_msg = "unable to get flag information for master";
CheckMessageNotPresent(ksck_->results().warning_messages, flags_msg);
}
TEST_F(KsckTest, TestWrongUUIDTabletServer) {
CreateOneTableOneTablet();
Status error = Status::RemoteError("ID reported by tablet server "
"doesn't match the expected ID");
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_["ts-id-1"])
->fetch_info_status_ = error;
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_["ts-id-1"])
->fetch_info_health_ = ServerHealth::WRONG_SERVER_UUID;
ASSERT_OK(ksck_->CheckClusterRunning());
ASSERT_OK(ksck_->FetchTableAndTabletInfo());
ASSERT_TRUE(ksck_->FetchInfoFromTabletServers().IsNetworkError());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Tablet Server Summary\n"
" UUID | Address | Status | Location\n"
"---------+---------+-------------------+----------\n"
" ts-id-0 | <mock> | HEALTHY | <none>\n"
" ts-id-2 | <mock> | HEALTHY | <none>\n"
" ts-id-1 | <mock> | WRONG_SERVER_UUID | <none>\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestBadTabletServer) {
CreateOneSmallReplicatedTable();
// Mock a failure to connect to one of the tablet servers.
Status error = Status::NetworkError("Network failure");
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_["ts-id-1"])
->fetch_info_status_ = error;
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_["ts-id-1"])
->fetch_info_health_ = ServerHealth::UNAVAILABLE;
ASSERT_OK(ksck_->CheckClusterRunning());
ASSERT_OK(ksck_->FetchTableAndTabletInfo());
Status s = ksck_->FetchInfoFromTabletServers();
ASSERT_TRUE(s.IsNetworkError()) << "Status returned: " << s.ToString();
s = ksck_->CheckTablesConsistency();
EXPECT_EQ("Corruption: 1 out of 1 table(s) are not healthy", s.ToString());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Tablet Server Summary\n"
" UUID | Address | Status | Location\n"
"---------+---------+-------------+----------\n"
" ts-id-0 | <mock> | HEALTHY | <none>\n"
" ts-id-2 | <mock> | HEALTHY | <none>\n"
" ts-id-1 | <mock> | UNAVAILABLE | <none>\n");
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Error from <mock>: Network error: Network failure (UNAVAILABLE)\n");
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Tablet tablet-id-0 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
" ts-id-0 (<mock>): RUNNING [LEADER]\n"
" ts-id-1 (<mock>): TS unavailable\n"
" ts-id-2 (<mock>): RUNNING\n");
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Tablet tablet-id-1 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
" ts-id-0 (<mock>): RUNNING [LEADER]\n"
" ts-id-1 (<mock>): TS unavailable\n"
" ts-id-2 (<mock>): RUNNING\n");
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Tablet tablet-id-2 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
" ts-id-0 (<mock>): RUNNING [LEADER]\n"
" ts-id-1 (<mock>): TS unavailable\n"
" ts-id-2 (<mock>): RUNNING\n");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestTserverFlagCheck) {
// Lower the truncation threshold to test truncation.
FLAGS_truncate_server_csv_length = 1;
// Check for the differences in the 'unusual' flags category.
FLAGS_flags_categories_to_check = "unusual";
// Setup flags for each mock tablet server.
int i = 0;
for (const auto& entry : cluster_->tablet_servers()) {
server::GetFlagsResponsePB flags;
{
// Add an experimental flag with the same value for each tablet server.
auto* flag = flags.add_flags();
flag->set_name("experimental_flag");
flag->set_value("x");
flag->mutable_tags()->Add("experimental");
}
{
// Add a hidden flag with a different value for each tablet server.
auto* flag = flags.add_flags();
flag->set_name("hidden_flag");
flag->set_value(std::to_string(i));
flag->mutable_tags()->Add("hidden");
}
{
// Add a hidden and unsafe flag with one tablet server having a different value
// than the other two.
auto* flag = flags.add_flags();
flag->set_name("hidden_unsafe_flag");
flag->set_value(std::to_string(i % 2));
flag->mutable_tags()->Add("hidden");
flag->mutable_tags()->Add("unsafe");
}
shared_ptr<MockKsckTabletServer> ts =
std::static_pointer_cast<MockKsckTabletServer>(entry.second);
ts->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags);
i++;
}
ASSERT_OK(ksck_->FetchInfoFromTabletServers());
ASSERT_OK(ksck_->CheckTabletServerUnusualFlags());
ASSERT_OK(ksck_->CheckTabletServerDivergedFlags());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Unusual flags for Tablet Server:\n"
" Flag | Value | Tags | Tablet Server\n"
"--------------------+-------+---------------+-------------------------------\n"
" experimental_flag | x | experimental | all 3 server(s) checked\n"
" hidden_flag | 0 | hidden | <mock>\n"
" hidden_flag | 1 | hidden | <mock>\n"
" hidden_flag | 2 | hidden | <mock>\n"
" hidden_unsafe_flag | 0 | hidden,unsafe | <mock>, and 1 other server(s)\n"
" hidden_unsafe_flag | 1 | hidden,unsafe | <mock>");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Some tablet servers have unsafe, experimental, or hidden flags set");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Tablet Server:\n"
" Flag | Value | Tablet Server\n"
"--------------------+-------+-------------------------------\n"
" experimental_flag | x | all 3 server(s) checked\n"
" hidden_flag | 0 | <mock>\n"
" hidden_flag | 1 | <mock>\n"
" hidden_flag | 2 | <mock>\n"
" hidden_unsafe_flag | 0 | <mock>, and 1 other server(s)\n"
" hidden_unsafe_flag | 1 | <mock>");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Different tablet servers have different settings for same flags "
"of checked category 'unusual'");
}
TEST_F(KsckTest, FlagsCategoriesDifferenceBetweenMastersAndTabletServers) {
// Check for the differences in the 'time_source' flags category.
FLAGS_flags_categories_to_check = "time_source";
// Setup flags for mock masters.
for (const auto& master : cluster_->masters()) {
shared_ptr<MockKsckMaster> m =
std::static_pointer_cast<MockKsckMaster>(master);
// Set two flags in the 'time_source' category.
{
server::GetFlagsResponsePB flags;
{
auto* flag = flags.add_flags();
flag->set_name("time_source");
flag->set_value("builtin");
}
{
auto* flag = flags.add_flags();
flag->set_name("builtin_ntp_servers");
flag->set_value("mega.turbo.ntp");
}
m->flags_by_category_[FlagsCategory::TIME_SOURCE].flags = std::move(flags);
}
// Set a flag unrelated to the checked category.
{
server::GetFlagsResponsePB flags;
{
auto* flag = flags.add_flags();
flag->set_name("giga");
flag->set_value("hertz");
}
m->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags);
}
}
// Setup flags for mock tablet servers.
for (const auto& entry : cluster_->tablet_servers()) {
shared_ptr<MockKsckTabletServer> ts =
std::static_pointer_cast<MockKsckTabletServer>(entry.second);
// Set one flag in the 'time_source' category.
{
server::GetFlagsResponsePB flags;
{
auto* flag = flags.add_flags();
flag->set_name("time_source");
flag->set_value("system");
}
ts->flags_by_category_[FlagsCategory::TIME_SOURCE].flags = std::move(flags);
}
// Set a flag unrelated to the 'time_source' category.
{
server::GetFlagsResponsePB flags;
{
auto* flag = flags.add_flags();
flag->set_name("foo");
flag->set_value("bar");
}
ts->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags);
}
}
// Calling CheckMasterHealth() is a prerequisite for calling
// CheckMasterUnusualFlags().
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_OK(ksck_->CheckMasterDivergedFlags());
// Calling FetchInfoFromTabletServers() is a prerequisite for calling
// CheckTabletServerDivergedFlags().
ASSERT_OK(ksck_->FetchInfoFromTabletServers());
ASSERT_OK(ksck_->CheckTabletServerDivergedFlags());
ASSERT_OK(ksck_->CheckDivergedFlags());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Master:\n"
" Flag | Value | Master\n"
"---------------------+----------------+-------------------------\n"
" builtin_ntp_servers | mega.turbo.ntp | all 3 server(s) checked\n"
" time_source | builtin | all 3 server(s) checked\n");
ASSERT_STR_NOT_CONTAINS(err_stream_.str(),
"Different masters have different settings for same flags "
"of checked category 'time_source'");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Master diverging from Tablet Server flags:\n"
" Flag | Value | Master\n"
"---------------------+----------------+-------------------------\n"
" builtin_ntp_servers | mega.turbo.ntp | all 3 server(s) checked\n"
" time_source | builtin | all 3 server(s) checked");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Tablet Server:\n"
" Flag | Value | Tablet Server\n"
"-------------+--------+-------------------------\n"
" time_source | system | all 3 server(s) checked\n");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Flags of checked categories for Tablet Server diverging from Master flags:\n"
" Flag | Value | Tablet Server\n"
"-------------+--------+-------------------------\n"
" time_source | system | all 3 server(s) checked");
ASSERT_STR_NOT_CONTAINS(err_stream_.str(),
"Different tablet servers have different settings for same flags "
"of checked category 'time_source'");
ASSERT_STR_CONTAINS(err_stream_.str(),
"Same flags have different values between masters and tablet servers "
"for at least one checked flag category");
}
TEST_F(GetFlagsUnavailableKsckTest, TestTserverFlagsUnavailable) {
ASSERT_OK(ksck_->FetchInfoFromTabletServers());
ASSERT_TRUE(ksck_->CheckTabletServerUnusualFlags().IsIncomplete());
static const string flags_msg = "unable to get flag information for tablet server";
CheckMessageNotPresent(ksck_->results().warning_messages, flags_msg);
}
TEST_F(KsckTest, TestOneTableCheck) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
ASSERT_OK(RunKsck());
ASSERT_STR_CONTAINS(err_stream_.str(),
"0/1 replicas remaining (20B from disk, 10 rows summed)");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestOneSmallReplicatedTable) {
CreateOneSmallReplicatedTable();
FLAGS_checksum_scan = true;
ASSERT_OK(RunKsck());
ASSERT_STR_CONTAINS(err_stream_.str(),
"0/9 replicas remaining (180B from disk, 90 rows summed)");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestOneSmallReplicatedTableWithConsensusState) {
CreateOneSmallReplicatedTable();
ASSERT_OK(RunKsck());
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 3,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestConsensusConflictExtraPeer) {
CreateOneSmallReplicatedTable();
shared_ptr<KsckTabletServer> ts = FindOrDie(cluster_->tablet_servers_, "ts-id-0");
auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map_,
std::make_pair("ts-id-0", "tablet-id-0"));
cstate.mutable_committed_config()->add_peers()->set_permanent_uuid("ts-id-fake");
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
const string err_str = err_stream_.str();
ASSERT_STR_CONTAINS(err_str, "Tablet tablet-id-0 of table 'test' is conflicted: "
"1 replicas' active configs disagree with the leader master's");
ASSERT_STR_CONTAINS(err_str,
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A* B C D | 0 | | Yes\n"
" B | A* B C | 0 | | Yes\n"
" C | A* B C | 0 | | Yes");
ASSERT_STR_CONTAINS(err_str,
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 1,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestConsensusConflictMissingPeer) {
CreateOneSmallReplicatedTable();
shared_ptr<KsckTabletServer> ts = FindOrDie(cluster_->tablet_servers_, "ts-id-0");
auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map_,
std::make_pair("ts-id-0", "tablet-id-0"));
cstate.mutable_committed_config()->mutable_peers()->RemoveLast();
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+--------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A* B | 0 | | Yes\n"
" B | A* B C | 0 | | Yes\n"
" C | A* B C | 0 | | Yes");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 1,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestConsensusConflictDifferentLeader) {
CreateOneSmallReplicatedTable();
const shared_ptr<KsckTabletServer>& ts = FindOrDie(cluster_->tablet_servers_, "ts-id-0");
auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map_,
std::make_pair("ts-id-0", "tablet-id-0"));
cstate.set_leader_uuid("ts-id-1");
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+--------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A B* C | 0 | | Yes\n"
" B | A* B C | 0 | | Yes\n"
" C | A* B C | 0 | | Yes");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 1,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestOneOneTabletBrokenTable) {
CreateOneOneTabletReplicatedBrokenTable();
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Tablet tablet-id-1 of table 'test' is under-replicated: "
"configuration has 2 replicas vs desired 3");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 0,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 1,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestMismatchedAssignments) {
CreateOneSmallReplicatedTable();
shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers_.at(Substitute("ts-id-$0", 0)));
ASSERT_EQ(1, ts->tablet_status_map_.erase("tablet-id-2"));
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Tablet tablet-id-2 of table 'test' is under-replicated: "
"1 replica(s) not RUNNING\n"
" ts-id-0 (<mock>): missing [LEADER]\n"
" ts-id-1 (<mock>): RUNNING\n"
" ts-id-2 (<mock>): RUNNING\n");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 1,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestTabletNotRunning) {
CreateOneSmallReplicatedTableWithTabletNotRunning();
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(
err_stream_.str(),
"Tablet tablet-id-0 of table 'test' is unavailable: 3 replica(s) not RUNNING\n"
" ts-id-0 (<mock>): not running [LEADER]\n"
" State: FAILED\n"
" Data state: TABLET_DATA_UNKNOWN\n"
" Last status: \n"
" ts-id-1 (<mock>): not running\n"
" State: FAILED\n"
" Data state: TABLET_DATA_UNKNOWN\n"
" Last status: \n"
" ts-id-2 (<mock>): not running\n"
" State: FAILED\n"
" Data state: TABLET_DATA_UNKNOWN\n"
" Last status: \n");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 1));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestTabletCopying) {
CreateOneSmallReplicatedTableWithTabletNotRunning();
CreateDefaultAssignmentPlan(1);
// Mark one of the tablet replicas as copying.
auto not_running_ts = static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers_.at(assignment_plan_.back()));
auto& pb = FindOrDie(not_running_ts->tablet_status_map_, "tablet-id-0");
pb.set_tablet_data_state(TabletDataState::TABLET_DATA_COPYING);
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 2,
/*recovering_tablets=*/ 1,
/*underreplicated_tablets=*/ 0,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
// Test for a bug where we weren't properly handling a tserver not reported by the master.
TEST_F(KsckTest, TestMasterNotReportingTabletServer) {
CreateOneSmallReplicatedTable();
// Delete a tablet server from the master's list. This simulates a situation
// where the master is starting and doesn't list all tablet servers yet, but
// tablets from other tablet servers are listing a missing tablet server as a peer.
EraseKeyReturnValuePtr(&cluster_->tablet_servers_, "ts-id-0");
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 0,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 3,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
// KUDU-2113: Test for a bug where we weren't properly handling a tserver not
// reported by the master when there was also a consensus conflict.
TEST_F(KsckTest, TestMasterNotReportingTabletServerWithConsensusConflict) {
CreateOneSmallReplicatedTable();
// Delete a tablet server from the cluster's list as in TestMasterNotReportingTabletServer.
EraseKeyReturnValuePtr(&cluster_->tablet_servers_, "ts-id-0");
// Now engineer a consensus conflict.
const shared_ptr<KsckTabletServer>& ts = FindOrDie(cluster_->tablet_servers_, "ts-id-1");
auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map_,
std::make_pair("ts-id-1", "tablet-id-1"));
cstate.set_leader_uuid("ts-id-1");
ASSERT_TRUE(RunKsck().IsRuntimeError());
const vector<Status>& error_messages = ksck_->results().error_messages;
ASSERT_EQ(1, error_messages.size());
ASSERT_EQ("Corruption: table consistency check error: 1 out of 1 table(s) are not healthy",
error_messages[0].ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
" Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | [config not available] | | | \n"
" B | A B* C | 0 | | Yes\n"
" C | A* B C | 0 | | Yes");
ASSERT_STR_CONTAINS(err_stream_.str(),
ExpectedTableSummary("test",
/*replication_factor=*/ 3,
/*healthy_tablets=*/ 0,
/*recovering_tablets=*/ 0,
/*underreplicated_tablets=*/ 3,
/*consensus_mismatch_tablets=*/ 0,
/*unavailable_tablets=*/ 0));
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestVersionCheck) {
for (int i : {1, 2}) {
shared_ptr<MockKsckMaster> master =
static_pointer_cast<MockKsckMaster>(cluster_->masters_[i]);
master->version_ = Substitute("v$0", i);
}
ASSERT_OK(RunKsck());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Version Summary\n"
" Version | Servers\n"
"--------------+------------------------------------------------------------------------\n"
" mock-version | master@master-0, tserver@<mock>, tserver@<mock>, and 1 other server(s)\n"
" v1 | master@master-1\n"
" v2 | master@master-2");
ASSERT_STR_CONTAINS(err_stream_.str(), "version check error: not all servers "
"are running the same version: "
"3 different versions were seen");
CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
}
TEST_F(KsckTest, TestChecksumScanJson) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
FLAGS_ksck_format = "json_compact";
ASSERT_OK(RunKsck());
JsonReader r(err_stream_.str());
ASSERT_OK(r.Init());
}
TEST_F(KsckTest, TestChecksumScanMismatch) {
CreateOneSmallReplicatedTable();
FLAGS_checksum_scan = true;
// Set one tablet server to return a non-zero checksum for its replicas.
// This will not match the checksums of replicas from other servers because
// they are zero by default.
auto ts = static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers_.begin()->second);
ts->checksum_ = 1;
ASSERT_TRUE(RunKsck().IsRuntimeError());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Corruption: checksum scan error: 3 tablet(s) had "
"checksum mismatches");
}
TEST_F(KsckTest, TestChecksumScanIdleTimeout) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
// Set an impossibly low idle timeout and tweak one of the servers to always
// report no progress on the checksum.
FLAGS_checksum_idle_timeout_sec = 0;
auto ts = static_pointer_cast<MockKsckTabletServer>(
cluster_->tablet_servers_.begin()->second);
ts->checksum_progress_ = 0;
// Make the progress report happen frequently so this test is fast.
FLAGS_max_progress_report_wait_ms = 10;
ASSERT_TRUE(RunKsck().IsRuntimeError());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Timed out: checksum scan error: Checksum scan did not "
"make progress within the idle timeout of 0.000s");
}
TEST_F(KsckTest, TestChecksumWithAllUnhealthyTabletServers) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
// Make all tablet servers unhealthy.
for (const auto& entry : cluster_->tablet_servers_) {
auto ts = static_pointer_cast<MockKsckTabletServer>(entry.second);
ts->fetch_info_status_ = Status::NetworkError("gremlins");
ts->fetch_info_health_ = ServerHealth::UNAVAILABLE;
}
// The checksum should short-circuit and fail because no tablet servers are
// available.
ASSERT_TRUE(RunKsck().IsRuntimeError());
ASSERT_STR_CONTAINS(err_stream_.str(), "no tablet servers are available");
}
TEST_F(KsckTest, TestChecksumWithAllPeersUnhealthy) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
// Make all tablet servers unhealthy except an extra one with no replica of
// the tablet.
for (const auto& entry : cluster_->tablet_servers_) {
auto ts = static_pointer_cast<MockKsckTabletServer>(entry.second);
ts->fetch_info_status_ = Status::NetworkError("gremlins");
ts->fetch_info_health_ = ServerHealth::UNAVAILABLE;
}
const char* const new_uuid = "new";
EmplaceOrDie(&cluster_->tablet_servers_,
new_uuid,
make_shared<MockKsckTabletServer>(new_uuid, IsGetFlagsAvailable()));
// The checksum should fail for tablet because none of its replicas are
// available to provide a timestamp.
ASSERT_TRUE(RunKsck().IsRuntimeError());
ASSERT_STR_CONTAINS(
err_stream_.str(),
"T tablet-id-1 P ts-id-0 (<mock>): Error: Aborted: "
"no healthy peer was available to provide a timestamp");
}
TEST_F(KsckTest, TestTabletServerLocation) {
CreateOneTableOneTablet();
shared_ptr<MockKsckTabletServer> ts =
static_pointer_cast<MockKsckTabletServer>(cluster_->tablet_servers_["ts-id-1"]);
ts->location_ = "/foo";
ASSERT_OK(ksck_->CheckClusterRunning());
ASSERT_OK(ksck_->FetchTableAndTabletInfo());
ASSERT_OK(ksck_->FetchInfoFromTabletServers());
ASSERT_OK(ksck_->PrintResults());
ASSERT_STR_CONTAINS(err_stream_.str(),
"Tablet Server Summary\n"
" UUID | Address | Status | Location\n"
"---------+---------+---------+----------\n"
" ts-id-0 | <mock> | HEALTHY | <none>\n"
" ts-id-1 | <mock> | HEALTHY | /foo\n"
" ts-id-2 | <mock> | HEALTHY | <none>\n");
NO_FATALS(CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results()));
}
TEST_F(KsckTest, TestSectionFilter) {
std::map<int, std::string> sections = {
{PrintSections::Values::MASTER_SUMMARIES, "MASTER_SUMMARIES"},
{PrintSections::Values::TSERVER_SUMMARIES, "TSERVER_SUMMARIES"},
{PrintSections::Values::VERSION_SUMMARIES, "VERSION_SUMMARIES"},
{PrintSections::Values::TABLET_SUMMARIES, "TABLET_SUMMARIES"},
{PrintSections::Values::TABLE_SUMMARIES, "TABLE_SUMMARIES"},
{PrintSections::Values::SYSTEM_TABLE_SUMMARIES, "SYSTEM_TABLE_SUMMARIES"},
{PrintSections::Values::CHECKSUM_RESULTS, "CHECKSUM_RESULTS"},
{PrintSections::Values::TOTAL_COUNT, "TOTAL_COUNT"}};
NO_FATALS(CreateOneTableOneTablet(/*create_txn_status_table=*/true));
for (const auto& [s_enum, s_str] : sections) {
if (s_enum == PrintSections::Values::CHECKSUM_RESULTS) {
FLAGS_checksum_scan = true;
}
ksck_->set_print_sections({s_str});
err_stream_.str("");
err_stream_.clear();
ASSERT_OK(RunKsck());
// Check plain string output.
NO_FATALS(CheckPlainStringSections(err_stream_.str(), s_enum));
// Check json string output.
const string& json_output = KsckResultsToJsonString(s_enum);
NO_FATALS(CheckJsonStringVsKsckResults(json_output, ksck_->results(), s_enum));
}
}
} // namespace tools
} // namespace kudu