| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <iosfwd> |
| #include <memory> |
| #include <unordered_map> |
| |
| #include <gflags/gflags.h> |
| #include <gtest/gtest.h> |
| |
| #include "kudu/gutil/map-util.h" |
| #include "kudu/gutil/strings/substitute.h" |
| #include "kudu/tools/ksck.h" |
| #include "kudu/util/scoped_cleanup.h" |
| #include "kudu/util/test_util.h" |
| |
| DECLARE_string(color); |
| |
| namespace kudu { |
| namespace tools { |
| |
| using std::shared_ptr; |
| using std::static_pointer_cast; |
| using std::string; |
| using std::unordered_map; |
| using strings::Substitute; |
| |
| // Import this symbol from ksck.cc so we can introspect the |
| // errors being written to stderr. |
| extern std::ostream* g_err_stream; |
| |
| class MockKsckTabletServer : public KsckTabletServer { |
| public: |
| explicit MockKsckTabletServer(const string& uuid) |
| : KsckTabletServer(uuid), |
| fetch_info_status_(Status::OK()), |
| address_("<mock>") { |
| } |
| |
| Status FetchInfo() override { |
| timestamp_ = 12345; |
| if (fetch_info_status_.ok()) { |
| state_ = kFetched; |
| } else { |
| state_ = kFetchFailed; |
| } |
| return fetch_info_status_; |
| } |
| |
| virtual void RunTabletChecksumScanAsync( |
| const std::string& tablet_id, |
| const Schema& schema, |
| const ChecksumOptions& options, |
| ChecksumProgressCallbacks* callbacks) OVERRIDE { |
| callbacks->Progress(10, 20); |
| callbacks->Finished(Status::OK(), 0); |
| } |
| |
| virtual std::string address() const OVERRIDE { |
| return address_; |
| } |
| |
| // Public because the unit tests mutate this variable directly. |
| Status fetch_info_status_; |
| |
| private: |
| const string address_; |
| }; |
| |
| class MockKsckMaster : public KsckMaster { |
| public: |
| MockKsckMaster() |
| : fetch_info_status_(Status::OK()) { |
| } |
| |
| virtual Status Connect() OVERRIDE { |
| return fetch_info_status_; |
| } |
| |
| virtual Status RetrieveTabletServers(TSMap* tablet_servers) OVERRIDE { |
| *tablet_servers = tablet_servers_; |
| return Status::OK(); |
| } |
| |
| virtual Status RetrieveTablesList(vector<shared_ptr<KsckTable>>* tables) OVERRIDE { |
| tables->assign(tables_.begin(), tables_.end()); |
| return Status::OK(); |
| } |
| |
| virtual Status RetrieveTabletsList(const shared_ptr<KsckTable>& table) OVERRIDE { |
| return Status::OK(); |
| } |
| |
| // Public because the unit tests mutate these variables directly. |
| Status fetch_info_status_; |
| TSMap tablet_servers_; |
| vector<shared_ptr<KsckTable>> tables_; |
| }; |
| |
| class KsckTest : public KuduTest { |
| public: |
| KsckTest() |
| : master_(new MockKsckMaster()), |
| cluster_(new KsckCluster(static_pointer_cast<KsckMaster>(master_))), |
| ksck_(new Ksck(cluster_)) { |
| FLAGS_color = "never"; |
| unordered_map<string, shared_ptr<KsckTabletServer>> tablet_servers; |
| for (int i = 0; i < 3; i++) { |
| string name = Substitute("ts-id-$0", i); |
| shared_ptr<MockKsckTabletServer> ts(new MockKsckTabletServer(name)); |
| InsertOrDie(&tablet_servers, ts->uuid(), ts); |
| } |
| master_->tablet_servers_.swap(tablet_servers); |
| |
| g_err_stream = &err_stream_; |
| } |
| |
| ~KsckTest() { |
| g_err_stream = NULL; |
| } |
| |
| protected: |
| void CreateDefaultAssignmentPlan(int tablets_count) { |
| while (tablets_count > 0) { |
| for (const KsckMaster::TSMap::value_type& entry : master_->tablet_servers_) { |
| if (tablets_count-- == 0) return; |
| assignment_plan_.push_back(entry.second->uuid()); |
| } |
| } |
| } |
| |
| void CreateOneTableOneTablet() { |
| CreateDefaultAssignmentPlan(1); |
| |
| auto table = CreateAndAddTable("test", 1); |
| shared_ptr<KsckTablet> tablet(new KsckTablet(table.get(), "tablet-id-1")); |
| CreateAndFillTablet(tablet, 1, true, true); |
| table->set_tablets({ tablet }); |
| } |
| |
| void CreateOneSmallReplicatedTable() { |
| int num_replicas = 3; |
| int num_tablets = 3; |
| CreateDefaultAssignmentPlan(num_replicas * num_tablets); |
| auto table = CreateAndAddTable("test", num_replicas); |
| |
| vector<shared_ptr<KsckTablet>> tablets; |
| for (int i = 0; i < num_tablets; i++) { |
| shared_ptr<KsckTablet> tablet(new KsckTablet( |
| table.get(), Substitute("tablet-id-$0", i))); |
| CreateAndFillTablet(tablet, num_replicas, true, true); |
| tablets.push_back(tablet); |
| } |
| table->set_tablets(tablets); |
| } |
| |
| void CreateOneSmallReplicatedTableWithTabletNotRunning() { |
| int num_replicas = 3; |
| int num_tablets = 3; |
| CreateDefaultAssignmentPlan(num_replicas * num_tablets); |
| auto table = CreateAndAddTable("test", num_replicas); |
| |
| vector<shared_ptr<KsckTablet>> tablets; |
| for (int i = 0; i < num_tablets; i++) { |
| shared_ptr<KsckTablet> tablet(new KsckTablet( |
| table.get(), Substitute("tablet-id-$0", i))); |
| CreateAndFillTablet(tablet, num_replicas, true, i != 0); |
| tablets.push_back(tablet); |
| } |
| table->set_tablets(tablets); |
| } |
| |
| void CreateOneOneTabletReplicatedBrokenTable() { |
| // We're placing only two tablets, the 3rd goes nowhere. |
| CreateDefaultAssignmentPlan(2); |
| |
| auto table = CreateAndAddTable("test", 3); |
| |
| shared_ptr<KsckTablet> tablet(new KsckTablet(table.get(), "tablet-id-1")); |
| CreateAndFillTablet(tablet, 2, false, true); |
| table->set_tablets({ tablet }); |
| } |
| |
| shared_ptr<KsckTable> CreateAndAddTable(const string& name, int num_replicas) { |
| shared_ptr<KsckTable> table(new KsckTable(name, Schema(), num_replicas)); |
| vector<shared_ptr<KsckTable>> tables = { table }; |
| master_->tables_.assign(tables.begin(), tables.end()); |
| return table; |
| } |
| |
| void CreateAndFillTablet(shared_ptr<KsckTablet>& tablet, int num_replicas, |
| bool has_leader, bool is_running) { |
| vector<shared_ptr<KsckTabletReplica>> replicas; |
| if (has_leader) { |
| CreateReplicaAndAdd(replicas, tablet->id(), true, is_running); |
| num_replicas--; |
| } |
| for (int i = 0; i < num_replicas; i++) { |
| CreateReplicaAndAdd(replicas, tablet->id(), false, is_running); |
| } |
| tablet->set_replicas(replicas); |
| } |
| |
| void CreateReplicaAndAdd(vector<shared_ptr<KsckTabletReplica>>& replicas, |
| string tablet_id, |
| bool is_leader, |
| bool is_running) { |
| shared_ptr<KsckTabletReplica> replica(new KsckTabletReplica(assignment_plan_.back(), |
| is_leader)); |
| shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>( |
| master_->tablet_servers_.at(assignment_plan_.back())); |
| |
| assignment_plan_.pop_back(); |
| replicas.push_back(replica); |
| |
| // Add the equivalent replica on the tablet server. |
| tablet::TabletStatusPB pb; |
| pb.set_tablet_id(tablet_id); |
| pb.set_table_name("fake-table"); |
| pb.set_state(is_running ? tablet::RUNNING : tablet::FAILED); |
| InsertOrDie(&ts->tablet_status_map_, tablet_id, std::move(pb)); |
| } |
| |
| Status RunKsck() { |
| auto c = MakeScopedCleanup([this]() { |
| LOG(INFO) << "Ksck output:\n" << err_stream_.str(); |
| }); |
| RETURN_NOT_OK(ksck_->CheckMasterRunning()); |
| RETURN_NOT_OK(ksck_->FetchTableAndTabletInfo()); |
| RETURN_NOT_OK(ksck_->FetchInfoFromTabletServers()); |
| RETURN_NOT_OK(ksck_->CheckTablesConsistency()); |
| return Status::OK(); |
| } |
| |
| |
| shared_ptr<MockKsckMaster> master_; |
| shared_ptr<KsckCluster> cluster_; |
| shared_ptr<Ksck> ksck_; |
| // This is used as a stack. First the unit test is responsible to create a plan to follow, that |
| // is the order in which each replica of each tablet will be assigned, starting from the end. |
| // So if you have 2 tablets with num_replicas=3 and 3 tablet servers, then to distribute evenly |
| // you should have a list that looks like ts1,ts2,ts3,ts3,ts2,ts1 so that the two LEADERS, which |
| // are assigned first, end up on ts1 and ts3. |
| vector<string> assignment_plan_; |
| |
| std::ostringstream err_stream_; |
| }; |
| |
| TEST_F(KsckTest, TestMasterOk) { |
| ASSERT_OK(ksck_->CheckMasterRunning()); |
| } |
| |
| TEST_F(KsckTest, TestMasterUnavailable) { |
| Status error = Status::NetworkError("Network failure"); |
| master_->fetch_info_status_ = error; |
| ASSERT_TRUE(ksck_->CheckMasterRunning().IsNetworkError()); |
| } |
| |
| TEST_F(KsckTest, TestTabletServersOk) { |
| ASSERT_OK(RunKsck()); |
| } |
| |
| TEST_F(KsckTest, TestBadTabletServer) { |
| CreateOneSmallReplicatedTable(); |
| |
| // Mock a failure to connect to one of the tablet servers. |
| Status error = Status::NetworkError("Network failure"); |
| static_pointer_cast<MockKsckTabletServer>(master_->tablet_servers_["ts-id-1"]) |
| ->fetch_info_status_ = error; |
| |
| ASSERT_OK(ksck_->CheckMasterRunning()); |
| ASSERT_OK(ksck_->FetchTableAndTabletInfo()); |
| Status s = ksck_->FetchInfoFromTabletServers(); |
| ASSERT_TRUE(s.IsNetworkError()) << "Status returned: " << s.ToString(); |
| |
| s = ksck_->CheckTablesConsistency(); |
| EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); |
| ASSERT_STR_CONTAINS( |
| err_stream_.str(), |
| "WARNING: Unable to connect to Tablet Server " |
| "ts-id-1 (<mock>): Network error: Network failure"); |
| ASSERT_STR_CONTAINS( |
| err_stream_.str(), |
| "Tablet tablet-id-0 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" |
| " ts-id-0 (<mock>): RUNNING [LEADER]\n" |
| " ts-id-1 (<mock>): TS unavailable\n" |
| " ts-id-2 (<mock>): RUNNING\n"); |
| ASSERT_STR_CONTAINS( |
| err_stream_.str(), |
| "Tablet tablet-id-1 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" |
| " ts-id-0 (<mock>): RUNNING [LEADER]\n" |
| " ts-id-1 (<mock>): TS unavailable\n" |
| " ts-id-2 (<mock>): RUNNING\n"); |
| ASSERT_STR_CONTAINS( |
| err_stream_.str(), |
| "Tablet tablet-id-2 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" |
| " ts-id-0 (<mock>): RUNNING [LEADER]\n" |
| " ts-id-1 (<mock>): TS unavailable\n" |
| " ts-id-2 (<mock>): RUNNING\n"); |
| } |
| |
| TEST_F(KsckTest, TestZeroTabletReplicasCheck) { |
| ASSERT_OK(RunKsck()); |
| } |
| |
| TEST_F(KsckTest, TestZeroTableCheck) { |
| ASSERT_OK(RunKsck()); |
| } |
| |
| TEST_F(KsckTest, TestOneTableCheck) { |
| CreateOneTableOneTablet(); |
| ASSERT_OK(RunKsck()); |
| ASSERT_OK(ksck_->ChecksumData(ChecksumOptions())); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "0/1 replicas remaining (20B from disk, 10 rows summed)"); |
| } |
| |
| TEST_F(KsckTest, TestOneSmallReplicatedTable) { |
| CreateOneSmallReplicatedTable(); |
| ASSERT_OK(RunKsck()); |
| ASSERT_OK(ksck_->ChecksumData(ChecksumOptions())); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "0/9 replicas remaining (180B from disk, 90 rows summed)"); |
| |
| // Test filtering (a non-matching pattern) |
| err_stream_.str(""); |
| ksck_->set_table_filters({"xyz"}); |
| ASSERT_OK(RunKsck()); |
| Status s = ksck_->ChecksumData(ChecksumOptions()); |
| EXPECT_EQ("Not found: No table found. Filter: table_filters=xyz", s.ToString()); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "The cluster doesn't have any matching tables"); |
| |
| // Test filtering with a matching table pattern. |
| err_stream_.str(""); |
| ksck_->set_table_filters({"te*"}); |
| ASSERT_OK(RunKsck()); |
| ASSERT_OK(ksck_->ChecksumData(ChecksumOptions())); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "0/9 replicas remaining (180B from disk, 90 rows summed)"); |
| |
| // Test filtering with a matching tablet ID pattern. |
| err_stream_.str(""); |
| ksck_->set_table_filters({}); |
| ksck_->set_tablet_id_filters({"*-id-2"}); |
| ASSERT_OK(RunKsck()); |
| ASSERT_OK(ksck_->ChecksumData(ChecksumOptions())); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "0/3 replicas remaining (60B from disk, 30 rows summed)"); |
| } |
| |
| TEST_F(KsckTest, TestOneOneTabletBrokenTable) { |
| CreateOneOneTabletReplicatedBrokenTable(); |
| Status s = RunKsck(); |
| EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "Tablet tablet-id-1 of table 'test' is under-replicated: " |
| "configuration has 2 replicas vs desired 3"); |
| } |
| |
| TEST_F(KsckTest, TestMismatchedAssignments) { |
| CreateOneSmallReplicatedTable(); |
| shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>( |
| master_->tablet_servers_.at(Substitute("ts-id-$0", 0))); |
| ASSERT_EQ(1, ts->tablet_status_map_.erase("tablet-id-2")); |
| |
| Status s = RunKsck(); |
| EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); |
| ASSERT_STR_CONTAINS(err_stream_.str(), |
| "Tablet tablet-id-2 of table 'test' is under-replicated: " |
| "1 replica(s) not RUNNING\n" |
| " ts-id-0 (<mock>): missing [LEADER]\n" |
| " ts-id-1 (<mock>): RUNNING\n" |
| " ts-id-2 (<mock>): RUNNING\n"); |
| } |
| |
| TEST_F(KsckTest, TestTabletNotRunning) { |
| CreateOneSmallReplicatedTableWithTabletNotRunning(); |
| |
| Status s = RunKsck(); |
| EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); |
| ASSERT_STR_CONTAINS( |
| err_stream_.str(), |
| "Tablet tablet-id-0 of table 'test' is unavailable: 3 replica(s) not RUNNING\n" |
| " ts-id-0 (<mock>): bad state [LEADER]\n" |
| " State: FAILED\n" |
| " Data state: TABLET_DATA_UNKNOWN\n" |
| " Last status: \n" |
| " ts-id-1 (<mock>): bad state\n" |
| " State: FAILED\n" |
| " Data state: TABLET_DATA_UNKNOWN\n" |
| " Last status: \n" |
| " ts-id-2 (<mock>): bad state\n" |
| " State: FAILED\n" |
| " Data state: TABLET_DATA_UNKNOWN\n" |
| " Last status: \n"); |
| } |
| |
| } // namespace tools |
| } // namespace kudu |