blob: 272efed05ffd1c66b893a7b7e0f955e5c407ca9e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <boost/optional.hpp>
#include <glog/stl_logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <string>
#include <unordered_map>
#include "kudu/client/client-test-util.h"
#include "kudu/common/wire_protocol-test-util.h"
#include "kudu/gutil/stl_util.h"
#include "kudu/gutil/strings/split.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/integration-tests/cluster_verifier.h"
#include "kudu/integration-tests/external_mini_cluster-itest-base.h"
#include "kudu/integration-tests/test_workload.h"
#include "kudu/tablet/tablet.pb.h"
#include "kudu/tserver/tserver.pb.h"
#include "kudu/util/curl_util.h"
#include "kudu/util/metrics.h"
#include "kudu/util/subprocess.h"
using kudu::client::KuduClient;
using kudu::client::KuduClientBuilder;
using kudu::client::KuduSchema;
using kudu::client::KuduSchemaFromSchema;
using kudu::client::KuduTableCreator;
using kudu::consensus::CONSENSUS_CONFIG_COMMITTED;
using kudu::consensus::ConsensusMetadataPB;
using kudu::consensus::ConsensusStatePB;
using kudu::consensus::RaftPeerPB;
using kudu::itest::TServerDetails;
using kudu::tablet::TABLET_DATA_COPYING;
using kudu::tablet::TABLET_DATA_DELETED;
using kudu::tablet::TABLET_DATA_READY;
using kudu::tablet::TABLET_DATA_TOMBSTONED;
using kudu::tablet::TabletDataState;
using kudu::tablet::TabletSuperBlockPB;
using kudu::tserver::ListTabletsResponsePB;
using kudu::tserver::TabletServerErrorPB;
using std::numeric_limits;
using std::string;
using std::unique_ptr;
using std::unordered_map;
using std::vector;
using strings::Substitute;
METRIC_DECLARE_entity(server);
METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet);
namespace kudu {
class DeleteTableTest : public ExternalMiniClusterITestBase {
protected:
enum IsCMetaExpected {
CMETA_NOT_EXPECTED = 0,
CMETA_EXPECTED = 1
};
enum IsSuperBlockExpected {
SUPERBLOCK_NOT_EXPECTED = 0,
SUPERBLOCK_EXPECTED = 1
};
// Get the UUID of the leader of the specified tablet, as seen by the TS with
// the given 'ts_uuid'.
string GetLeaderUUID(const string& ts_uuid, const string& tablet_id);
Status CheckTabletTombstonedOrDeletedOnTS(
int index,
const string& tablet_id,
TabletDataState data_state,
IsCMetaExpected is_cmeta_expected,
IsSuperBlockExpected is_superblock_expected);
Status CheckTabletTombstonedOnTS(int index,
const string& tablet_id,
IsCMetaExpected is_cmeta_expected);
Status CheckTabletDeletedOnTS(int index,
const string& tablet_id,
IsSuperBlockExpected is_superblock_expected);
void WaitForTabletTombstonedOnTS(int index,
const string& tablet_id,
IsCMetaExpected is_cmeta_expected);
void WaitForTabletDeletedOnTS(int index,
const string& tablet_id,
IsSuperBlockExpected is_superblock_expected);
void WaitForTSToCrash(int index);
void WaitForAllTSToCrash();
void WaitUntilTabletRunning(int index, const std::string& tablet_id);
// Delete the given table. If the operation times out, dumps the master stacks
// to help debug master-side deadlocks.
void DeleteTable(const string& table_name);
// Repeatedly try to delete the tablet, retrying on failure up to the
// specified timeout. Deletion can fail when other operations, such as
// bootstrap, are running.
void DeleteTabletWithRetries(const TServerDetails* ts, const string& tablet_id,
TabletDataState delete_type, const MonoDelta& timeout);
};
string DeleteTableTest::GetLeaderUUID(const string& ts_uuid, const string& tablet_id) {
ConsensusStatePB cstate;
CHECK_OK(itest::GetConsensusState(ts_map_[ts_uuid], tablet_id, CONSENSUS_CONFIG_COMMITTED,
MonoDelta::FromSeconds(10), &cstate));
return cstate.leader_uuid();
}
Status DeleteTableTest::CheckTabletTombstonedOrDeletedOnTS(
int index,
const string& tablet_id,
TabletDataState data_state,
IsCMetaExpected is_cmeta_expected,
IsSuperBlockExpected is_superblock_expected) {
CHECK(data_state == TABLET_DATA_TOMBSTONED || data_state == TABLET_DATA_DELETED) << data_state;
// There should be no WALs and no cmeta.
if (inspect_->CountWALSegmentsForTabletOnTS(index, tablet_id) > 0) {
return Status::IllegalState("WAL segments exist for tablet", tablet_id);
}
if (is_cmeta_expected == CMETA_EXPECTED &&
!inspect_->DoesConsensusMetaExistForTabletOnTS(index, tablet_id)) {
return Status::IllegalState("Expected cmeta for tablet " + tablet_id + " but it doesn't exist");
}
if (is_superblock_expected == SUPERBLOCK_EXPECTED) {
RETURN_NOT_OK(inspect_->CheckTabletDataStateOnTS(index, tablet_id, { data_state }));
} else {
TabletSuperBlockPB superblock_pb;
Status s = inspect_->ReadTabletSuperBlockOnTS(index, tablet_id, &superblock_pb);
if (!s.IsNotFound()) {
return Status::IllegalState("Found unexpected superblock for tablet " + tablet_id);
}
}
return Status::OK();
}
Status DeleteTableTest::CheckTabletTombstonedOnTS(int index,
const string& tablet_id,
IsCMetaExpected is_cmeta_expected) {
return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_TOMBSTONED,
is_cmeta_expected, SUPERBLOCK_EXPECTED);
}
Status DeleteTableTest::CheckTabletDeletedOnTS(int index,
const string& tablet_id,
IsSuperBlockExpected is_superblock_expected) {
return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_DELETED,
CMETA_NOT_EXPECTED, is_superblock_expected);
}
void DeleteTableTest::WaitForTabletTombstonedOnTS(int index,
const string& tablet_id,
IsCMetaExpected is_cmeta_expected) {
Status s;
for (int i = 0; i < 6000; i++) {
s = CheckTabletTombstonedOnTS(index, tablet_id, is_cmeta_expected);
if (s.ok()) return;
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_OK(s);
}
void DeleteTableTest::WaitForTabletDeletedOnTS(int index,
const string& tablet_id,
IsSuperBlockExpected is_superblock_expected) {
Status s;
for (int i = 0; i < 6000; i++) {
s = CheckTabletDeletedOnTS(index, tablet_id, is_superblock_expected);
if (s.ok()) return;
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_OK(s);
}
void DeleteTableTest::WaitForTSToCrash(int index) {
auto ts = cluster_->tablet_server(index);
SCOPED_TRACE(ts->instance_id().permanent_uuid());
ASSERT_OK(ts->WaitForCrash(MonoDelta::FromSeconds(60)));
}
void DeleteTableTest::WaitForAllTSToCrash() {
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
NO_FATALS(WaitForTSToCrash(i));
}
}
void DeleteTableTest::WaitUntilTabletRunning(int index, const std::string& tablet_id) {
ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(index)->uuid()],
tablet_id, MonoDelta::FromSeconds(60)));
}
void DeleteTableTest::DeleteTable(const string& table_name) {
Status s = client_->DeleteTable(table_name);
if (s.IsTimedOut()) {
WARN_NOT_OK(PstackWatcher::DumpPidStacks(cluster_->master()->pid()),
"Couldn't dump stacks");
}
ASSERT_OK(s);
}
void DeleteTableTest::DeleteTabletWithRetries(const TServerDetails* ts,
const string& tablet_id,
TabletDataState delete_type,
const MonoDelta& timeout) {
MonoTime start(MonoTime::Now(MonoTime::FINE));
MonoTime deadline = start;
deadline.AddDelta(timeout);
Status s;
while (true) {
s = itest::DeleteTablet(ts, tablet_id, delete_type, boost::none, timeout);
if (s.ok()) return;
if (deadline.ComesBefore(MonoTime::Now(MonoTime::FINE))) {
break;
}
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_OK(s);
}
// Test deleting an empty table, and ensure that the tablets get removed,
// and the master no longer shows the table as existing.
TEST_F(DeleteTableTest, TestDeleteEmptyTable) {
NO_FATALS(StartCluster());
// Create a table on the cluster. We're just using TestWorkload
// as a convenient way to create it.
TestWorkload(cluster_.get()).Setup();
// The table should have replicas on all three tservers.
ASSERT_OK(inspect_->WaitForReplicaCount(3));
// Grab the tablet ID (used later).
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
// Delete it and wait for the replicas to get deleted.
// We should have no tablets at the filesystem layer after deleting the table.
NO_FATALS(DeleteTable(TestWorkload::kDefaultTableName));
ASSERT_OK(inspect_->WaitForNoData());
// Check that the master no longer exposes the table in any way:
// 1) Should not list it in ListTables.
vector<string> table_names;
ASSERT_OK(client_->ListTables(&table_names));
ASSERT_TRUE(table_names.empty()) << "table still exposed in ListTables";
// 2) Should respond to GetTableSchema with a NotFound error.
KuduSchema schema;
Status s = client_->GetTableSchema(TestWorkload::kDefaultTableName, &schema);
ASSERT_TRUE(s.IsNotFound()) << s.ToString();
// 3) Should return an error for GetTabletLocations RPCs.
{
rpc::RpcController rpc;
master::GetTabletLocationsRequestPB req;
master::GetTabletLocationsResponsePB resp;
rpc.set_timeout(MonoDelta::FromSeconds(10));
req.add_tablet_ids()->assign(tablet_id);
ASSERT_OK(cluster_->master_proxy()->GetTabletLocations(req, &resp, &rpc));
SCOPED_TRACE(resp.DebugString());
ASSERT_EQ(1, resp.errors_size());
ASSERT_STR_CONTAINS(resp.errors(0).ShortDebugString(),
"code: NOT_FOUND message: \"Tablet deleted: Table deleted");
}
// 4) The master 'dump-entities' page should not list the deleted table or tablets.
EasyCurl c;
faststring entities_buf;
ASSERT_OK(c.FetchURL(Substitute("http://$0/dump-entities",
cluster_->master()->bound_http_hostport().ToString()),
&entities_buf));
ASSERT_EQ("{\"tables\":[],\"tablets\":[]}", entities_buf.ToString());
}
// Test that a DeleteTable RPC is rejected without a matching destination UUID.
TEST_F(DeleteTableTest, TestDeleteTableDestUuidValidation) {
NO_FATALS(StartCluster());
// Create a table on the cluster. We're just using TestWorkload
// as a convenient way to create it.
TestWorkload(cluster_.get()).Setup();
ASSERT_OK(inspect_->WaitForReplicaCount(3));
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
tserver::DeleteTabletRequestPB req;
tserver::DeleteTabletResponsePB resp;
rpc::RpcController rpc;
rpc.set_timeout(MonoDelta::FromSeconds(20));
req.set_dest_uuid("fake-uuid");
req.set_tablet_id(tablet_id);
req.set_delete_type(TABLET_DATA_TOMBSTONED);
ASSERT_OK(ts->tserver_admin_proxy->DeleteTablet(req, &resp, &rpc));
ASSERT_TRUE(resp.has_error());
ASSERT_EQ(tserver::TabletServerErrorPB::WRONG_SERVER_UUID, resp.error().code())
<< resp.ShortDebugString();
ASSERT_STR_CONTAINS(StatusFromPB(resp.error().status()).ToString(),
"Wrong destination UUID");
}
// Test the atomic CAS argument to DeleteTablet().
TEST_F(DeleteTableTest, TestAtomicDeleteTablet) {
MonoDelta timeout = MonoDelta::FromSeconds(30);
NO_FATALS(StartCluster());
// Create a table on the cluster. We're just using TestWorkload
// as a convenient way to create it.
TestWorkload(cluster_.get()).Setup();
// The table should have replicas on all three tservers.
ASSERT_OK(inspect_->WaitForReplicaCount(3));
// Grab the tablet ID (used later).
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
const int kTsIndex = 0;
TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
// The committed config starts off with an opid_index of -1, so choose something lower.
boost::optional<int64_t> opid_index(-2);
tserver::TabletServerErrorPB::Code error_code;
ASSERT_OK(itest::WaitUntilTabletRunning(ts, tablet_id, timeout));
Status s;
for (int i = 0; i < 100; i++) {
s = itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
&error_code);
if (error_code == TabletServerErrorPB::CAS_FAILED) break;
// If we didn't get the expected CAS_FAILED error, it's OK to get 'TABLET_NOT_RUNNING'
// because the "creating" maintenance state persists just slightly after it starts to
// expose 'RUNNING' state in ListTablets()
ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_RUNNING, error_code)
<< "unexpected error: " << s.ToString();
SleepFor(MonoDelta::FromMilliseconds(100));
}
ASSERT_EQ(TabletServerErrorPB::CAS_FAILED, error_code) << "unexpected error: " << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(), "of -2 but the committed config has opid_index of -1");
// Now use the "latest", which is -1.
opid_index = -1;
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
&error_code));
inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_TOMBSTONED });
// Now that the tablet is already tombstoned, our opid_index should be
// ignored (because it's impossible to check it).
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, -9999, timeout,
&error_code));
inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_TOMBSTONED });
// Same with TOMBSTONED -> DELETED.
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, -9999, timeout,
&error_code));
inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_DELETED });
}
TEST_F(DeleteTableTest, TestDeleteTableWithConcurrentWrites) {
NO_FATALS(StartCluster());
int n_iters = AllowSlowTests() ? 20 : 1;
for (int i = 0; i < n_iters; i++) {
TestWorkload workload(cluster_.get());
workload.set_table_name(Substitute("table-$0", i));
// We'll delete the table underneath the writers, so we expcted
// a NotFound error during the writes.
workload.set_not_found_allowed(true);
workload.Setup();
// Start the workload, and wait to see some rows actually inserted
workload.Start();
while (workload.rows_inserted() < 100) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
// Delete it and wait for the replicas to get deleted.
NO_FATALS(DeleteTable(workload.table_name()));
ASSERT_OK(inspect_->WaitForNoData());
// Sleep just a little longer to make sure client threads send
// requests to the missing tablets.
SleepFor(MonoDelta::FromMilliseconds(50));
workload.StopAndJoin();
NO_FATALS(cluster_->AssertNoCrashes());
}
}
// Test that a tablet replica is automatically tombstoned on startup if a local
// crash occurs in the middle of tablet copy.
TEST_F(DeleteTableTest, TestAutoTombstoneAfterCrashDuringTabletCopy) {
NO_FATALS(StartCluster());
const MonoDelta timeout = MonoDelta::FromSeconds(10);
const int kTsIndex = 0; // We'll test with the first TS.
// We'll do a config change to tablet copy a replica here later. For
// now, shut it down.
LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
cluster_->tablet_server(kTsIndex)->Shutdown();
// Bounce the Master so it gets new tablet reports and doesn't try to assign
// a replica to the dead TS.
cluster_->master()->Shutdown();
ASSERT_OK(cluster_->master()->Restart());
cluster_->WaitForTabletServerCount(2, timeout);
// Start a workload on the cluster, and run it for a little while.
TestWorkload workload(cluster_.get());
workload.set_num_replicas(2);
workload.Setup();
ASSERT_OK(inspect_->WaitForReplicaCount(2));
workload.Start();
while (workload.rows_inserted() < 100) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
workload.StopAndJoin();
// Enable a fault crash when tablet copy occurs on TS 0.
ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
const string& kFaultFlag = "fault_crash_after_rb_files_fetched";
ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), kFaultFlag, "1.0"));
// Figure out the tablet id to tablet copy.
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
// Add our TS 0 to the config and wait for it to crash.
string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id);
TServerDetails* leader = DCHECK_NOTNULL(ts_map_[leader_uuid]);
TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
ASSERT_OK(itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, timeout));
NO_FATALS(WaitForTSToCrash(kTsIndex));
// The superblock should be in TABLET_DATA_COPYING state on disk.
NO_FATALS(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_COPYING }));
// Kill the other tablet servers so the leader doesn't try to remote
// bootstrap it again during our verification here.
cluster_->tablet_server(1)->Shutdown();
cluster_->tablet_server(2)->Shutdown();
// Now we restart the TS. It will clean up the failed tablet copy and
// convert it to TABLET_DATA_TOMBSTONED. It crashed, so we have to call
// Shutdown() then Restart() to bring it back up.
cluster_->tablet_server(kTsIndex)->Shutdown();
ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
}
// Test that a tablet replica automatically tombstones itself if the remote
// bootstrap source server fails in the middle of the tablet copy process.
// Also test that we can remotely bootstrap a tombstoned tablet.
TEST_F(DeleteTableTest, TestAutoTombstoneAfterTabletCopyRemoteFails) {
vector<string> ts_flags = {
"--enable_leader_failure_detection=false", // Make test deterministic.
"--log_segment_size_mb=1" // Faster log rolls.
};
vector<string> master_flags = {
"--catalog_manager_wait_for_new_tablets_to_elect_leader=false"
};
NO_FATALS(StartCluster(ts_flags, master_flags));
const MonoDelta kTimeout = MonoDelta::FromSeconds(20);
const int kTsIndex = 0; // We'll test with the first TS.
// We'll do a config change to tablet copy a replica here later. For
// now, shut down TS-0.
LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
cluster_->tablet_server(kTsIndex)->Shutdown();
// Bounce the Master so it gets new tablet reports and doesn't try to assign
// a replica to the dead TS.
cluster_->master()->Shutdown();
ASSERT_OK(cluster_->master()->Restart());
cluster_->WaitForTabletServerCount(2, kTimeout);
// Start a workload on the cluster, and run it for a little while.
TestWorkload workload(cluster_.get());
workload.set_num_replicas(2);
workload.Setup();
ASSERT_OK(inspect_->WaitForReplicaCount(2));
// Figure out the tablet id.
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
for (int i = 1; i <= 2; i++) {
NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
}
// Elect a leader and run some data through the cluster.
const int kLeaderIndex = 1;
string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
ASSERT_OK(itest::StartElection(ts_map_[kLeaderUuid], tablet_id, kTimeout));
workload.Start();
while (workload.rows_inserted() < 100) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
// Tablet Copy doesn't see the active WAL segment, and we need to
// download a file to trigger the fault in this test. Due to the log index
// chunks, that means 3 files minimum: One in-flight WAL segment, one index
// chunk file (these files grow much more slowly than the WAL segments), and
// one completed WAL segment.
ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kLeaderIndex, tablet_id, 3));
workload.StopAndJoin();
// Cause the leader to crash when a follower tries to remotely bootstrap from it.
const string& fault_flag = "fault_crash_on_handle_rb_fetch_data";
ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex), fault_flag, "1.0"));
// Add TS-0 as a new member to the config and wait for the leader to crash.
ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
TServerDetails* leader = ts_map_[kLeaderUuid];
TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
ASSERT_OK(itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, kTimeout));
NO_FATALS(WaitForTSToCrash(kLeaderIndex));
// The tablet server will detect that the leader failed, and automatically
// tombstone its replica. Shut down the other non-leader replica to avoid
// interference while we wait for this to happen.
cluster_->tablet_server(1)->Shutdown();
cluster_->tablet_server(2)->Shutdown();
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
// Now bring the other replicas back, re-elect the previous leader (TS-1),
// and wait for the leader to tablet copy the tombstoned replica. This
// will have replaced a tablet with no consensus metadata.
ASSERT_OK(cluster_->tablet_server(1)->Restart());
ASSERT_OK(cluster_->tablet_server(2)->Restart());
for (int i = 1; i <= 2; i++) {
NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
}
ASSERT_OK(itest::StartElection(ts_map_[kLeaderUuid], tablet_id, kTimeout));
ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));
ClusterVerifier v(cluster_.get());
NO_FATALS(v.CheckCluster());
NO_FATALS(v.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
workload.rows_inserted()));
// Now pause the other replicas and tombstone our replica again.
ASSERT_OK(cluster_->tablet_server(1)->Pause());
ASSERT_OK(cluster_->tablet_server(2)->Pause());
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, kTimeout));
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
// Bring them back again, let them yet again bootstrap our tombstoned replica.
// This time, the leader will have replaced a tablet with consensus metadata.
ASSERT_OK(cluster_->tablet_server(1)->Resume());
ASSERT_OK(cluster_->tablet_server(2)->Resume());
ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));
NO_FATALS(v.CheckCluster());
NO_FATALS(v.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
workload.rows_inserted()));
}
// Test for correct tablet copy merge of consensus metadata.
TEST_F(DeleteTableTest, TestMergeConsensusMetadata) {
// Enable manual leader selection.
vector<string> ts_flags, master_flags;
ts_flags.push_back("--enable_leader_failure_detection=false");
master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
NO_FATALS(StartCluster(ts_flags, master_flags));
const MonoDelta timeout = MonoDelta::FromSeconds(10);
const int kTsIndex = 0;
TestWorkload workload(cluster_.get());
workload.Setup();
ASSERT_OK(inspect_->WaitForReplicaCount(3));
// Figure out the tablet id to tablet copy.
vector<string> tablets = inspect_->ListTabletsOnTS(1);
ASSERT_EQ(1, tablets.size());
const string& tablet_id = tablets[0];
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
}
// Elect a leader and run some data through the cluster.
int leader_index = 1;
string leader_uuid = cluster_->tablet_server(leader_index)->uuid();
ASSERT_OK(itest::StartElection(ts_map_[leader_uuid], tablet_id, timeout));
workload.Start();
while (workload.rows_inserted() < 100) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
workload.StopAndJoin();
ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, workload.batches_completed()));
// Verify that TS 0 voted for the chosen leader.
ConsensusMetadataPB cmeta_pb;
ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
ASSERT_EQ(1, cmeta_pb.current_term());
ASSERT_EQ(leader_uuid, cmeta_pb.voted_for());
// Shut down all but TS 0 and try to elect TS 0. The election will fail but
// the TS will record a vote for itself as well as a new term (term 2).
cluster_->tablet_server(1)->Shutdown();
cluster_->tablet_server(2)->Shutdown();
NO_FATALS(WaitUntilTabletRunning(kTsIndex, tablet_id));
TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
ASSERT_OK(itest::StartElection(ts, tablet_id, timeout));
for (int i = 0; i < 6000; i++) {
Status s = inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb);
if (s.ok() &&
cmeta_pb.current_term() == 2 &&
cmeta_pb.voted_for() == ts->uuid()) {
break;
}
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_EQ(2, cmeta_pb.current_term());
ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());
// Tombstone our special little guy, then shut him down.
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
cluster_->tablet_server(kTsIndex)->Shutdown();
// Restart the other dudes and re-elect the same leader.
ASSERT_OK(cluster_->tablet_server(1)->Restart());
ASSERT_OK(cluster_->tablet_server(2)->Restart());
TServerDetails* leader = ts_map_[leader_uuid];
NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
ASSERT_OK(itest::WaitUntilLeader(leader, tablet_id, timeout));
// Bring our special little guy back up.
// Wait until he gets tablet copyped.
LOG(INFO) << "Bringing TS " << cluster_->tablet_server(kTsIndex)->uuid()
<< " back up...";
ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));
// Assert that the election history is retained (voted for self).
ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
ASSERT_EQ(2, cmeta_pb.current_term());
ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());
// Now do the same thing as above, where we tombstone TS 0 then trigger a new
// term (term 3) on the other machines. TS 0 will get copied
// again, but this time the vote record on TS 0 for term 2 should not be
// retained after tablet copy occurs.
cluster_->tablet_server(1)->Shutdown();
cluster_->tablet_server(2)->Shutdown();
// Delete with retries because the tablet might still be bootstrapping.
NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout));
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
ASSERT_OK(cluster_->tablet_server(1)->Restart());
ASSERT_OK(cluster_->tablet_server(2)->Restart());
NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));
// The election history should have been wiped out.
ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
ASSERT_EQ(3, cmeta_pb.current_term());
ASSERT_TRUE(!cmeta_pb.has_voted_for()) << cmeta_pb.ShortDebugString();
}
// Regression test for KUDU-987, a bug where followers with transactions in
// REPLICATING state, which means they have not yet been committed to a
// majority, cannot shut down during a DeleteTablet() call.
TEST_F(DeleteTableTest, TestDeleteFollowerWithReplicatingTransaction) {
if (!AllowSlowTests()) {
// We will typically wait at least 5 seconds for timeouts to occur.
LOG(INFO) << "Skipping test in fast-test mode.";
return;
}
const MonoDelta timeout = MonoDelta::FromSeconds(10);
const int kNumTabletServers = 5;
vector<string> ts_flags, master_flags;
ts_flags.push_back("--enable_leader_failure_detection=false");
ts_flags.push_back("--flush_threshold_mb=0"); // Always be flushing.
ts_flags.push_back("--maintenance_manager_polling_interval_ms=100");
master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers));
const int kTsIndex = 0; // We'll test with the first TS.
TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
// Create the table.
TestWorkload workload(cluster_.get());
workload.set_num_replicas(kNumTabletServers);
workload.Setup();
// Figure out the tablet ids of the created tablets.
vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets));
const string& tablet_id = tablets[0].tablet_status().tablet_id();
// Wait until all replicas are up and running.
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()],
tablet_id, timeout));
}
// Elect TS 1 as leader.
const int kLeaderIndex = 1;
const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
TServerDetails* leader = ts_map_[kLeaderUuid];
ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
// Kill a majority, but leave the leader and a single follower.
LOG(INFO) << "Killing majority";
for (int i = 2; i < kNumTabletServers; i++) {
cluster_->tablet_server(i)->Shutdown();
}
// Now write a single row to the leader.
// We give 5 seconds for the timeout to pretty much guarantee that a flush
// will occur due to the low flush threshold we set.
LOG(INFO) << "Writing a row";
Status s = WriteSimpleTestRow(leader, tablet_id, RowOperationsPB::INSERT,
1, 1, "hola, world", MonoDelta::FromSeconds(5));
ASSERT_TRUE(s.IsTimedOut());
ASSERT_STR_CONTAINS(s.ToString(), "timed out");
LOG(INFO) << "Killing the leader...";
cluster_->tablet_server(kLeaderIndex)->Shutdown();
// Now tombstone the follower tablet. This should succeed even though there
// are uncommitted operations on the replica.
LOG(INFO) << "Tombstoning tablet " << tablet_id << " on TS " << ts->uuid();
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
}
// Test that orphaned blocks are cleared from the superblock when a tablet is
// tombstoned.
TEST_F(DeleteTableTest, TestOrphanedBlocksClearedOnDelete) {
const MonoDelta timeout = MonoDelta::FromSeconds(30);
vector<string> ts_flags, master_flags;
ts_flags.push_back("--enable_leader_failure_detection=false");
ts_flags.push_back("--flush_threshold_mb=0"); // Flush quickly since we wait for a flush to occur.
ts_flags.push_back("--maintenance_manager_polling_interval_ms=100");
master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
NO_FATALS(StartCluster(ts_flags, master_flags));
const int kFollowerIndex = 0;
TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()];
// Create the table.
TestWorkload workload(cluster_.get());
workload.Setup();
// Figure out the tablet id of the created tablet.
vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
ASSERT_OK(WaitForNumTabletsOnTS(follower_ts, 1, timeout, &tablets));
const string& tablet_id = tablets[0].tablet_status().tablet_id();
// Wait until all replicas are up and running.
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()],
tablet_id, timeout));
}
// Elect TS 1 as leader.
const int kLeaderIndex = 1;
const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
TServerDetails* leader_ts = ts_map_[kLeaderUuid];
ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
// Run a write workload and wait until we see some rowsets flush on the follower.
workload.Start();
TabletSuperBlockPB superblock_pb;
for (int i = 0; i < 3000; i++) {
ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb));
if (!superblock_pb.rowsets().empty()) break;
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_GT(superblock_pb.rowsets_size(), 0)
<< "Timed out waiting for rowset flush on TS " << follower_ts->uuid() << ": "
<< "Superblock:\n" << superblock_pb.DebugString();
// Shut down the leader so it doesn't try to bootstrap our follower later.
workload.StopAndJoin();
cluster_->tablet_server(kLeaderIndex)->Shutdown();
// Tombstone the follower and check that there are no rowsets or orphaned
// blocks retained in the superblock.
ASSERT_OK(itest::DeleteTablet(follower_ts, tablet_id, TABLET_DATA_TOMBSTONED,
boost::none, timeout));
NO_FATALS(WaitForTabletTombstonedOnTS(kFollowerIndex, tablet_id, CMETA_EXPECTED));
ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb));
ASSERT_EQ(0, superblock_pb.rowsets_size()) << superblock_pb.DebugString();
ASSERT_EQ(0, superblock_pb.orphaned_blocks_size()) << superblock_pb.DebugString();
}
vector<const string*> Grep(const string& needle, const vector<string>& haystack) {
vector<const string*> results;
for (const string& s : haystack) {
if (s.find(needle) != string::npos) {
results.push_back(&s);
}
}
return results;
}
vector<string> ListOpenFiles(pid_t pid) {
string cmd = strings::Substitute("export PATH=$$PATH:/usr/bin:/usr/sbin; lsof -n -p $0", pid);
vector<string> argv = { "bash", "-c", cmd };
string out;
CHECK_OK(Subprocess::Call(argv, &out));
vector<string> lines = strings::Split(out, "\n");
return lines;
}
int PrintOpenTabletFiles(pid_t pid, const string& tablet_id) {
vector<string> lines = ListOpenFiles(pid);
vector<const string*> wal_lines = Grep(tablet_id, lines);
LOG(INFO) << "There are " << wal_lines.size() << " open WAL files for pid " << pid << ":";
for (const string* l : wal_lines) {
LOG(INFO) << *l;
}
return wal_lines.size();
}
// Regression test for tablet deletion FD leak. See KUDU-1288.
TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
const MonoDelta timeout = MonoDelta::FromSeconds(30);
NO_FATALS(StartCluster({}, {}, 1));
// Create the table.
TestWorkload workload(cluster_.get());
workload.set_num_replicas(1);
workload.Setup();
workload.Start();
while (workload.rows_inserted() < 1000) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
workload.StopAndJoin();
// Figure out the tablet id of the created tablet.
vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
ASSERT_OK(WaitForNumTabletsOnTS(ts_map_.begin()->second, 1, timeout, &tablets));
const string& tablet_id = tablets[0].tablet_status().tablet_id();
// Tombstone the tablet and then ensure that lsof does not list any
// tablet-related paths.
ExternalTabletServer* ets = cluster_->tablet_server(0);
ASSERT_OK(itest::DeleteTablet(ts_map_[ets->uuid()],
tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
// Restart the TS after deletion and then do the same lsof check again.
ets->Shutdown();
ASSERT_OK(ets->Restart());
ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
}
TEST_F(DeleteTableTest, TestUnknownTabletsAreNotDeleted) {
// Speed up heartbeating so that the unknown tablet is detected faster.
vector<string> extra_ts_flags = { "--heartbeat_interval_ms=10" };
NO_FATALS(StartCluster(extra_ts_flags, {}, 1));
Schema schema(GetSimpleTestSchema());
client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
unique_ptr<KuduTableCreator> creator(client_->NewTableCreator());
ASSERT_OK(creator->table_name("test")
.schema(&client_schema)
.set_range_partition_columns({"key"})
.num_replicas(1)
.Create());
// Delete the master's metadata and start it back up. The tablet created
// above is now unknown, but should not be deleted!
cluster_->master()->Shutdown();
ASSERT_OK(env_->DeleteRecursively(cluster_->master()->data_dir()));
ASSERT_OK(cluster_->master()->Restart());
SleepFor(MonoDelta::FromSeconds(2));
int64_t num_delete_attempts;
ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
&METRIC_ENTITY_server, "kudu.tabletserver",
&METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
"total_count", &num_delete_attempts));
ASSERT_EQ(0, num_delete_attempts);
// Now restart the master with orphan deletion enabled. The tablet should get
// deleted.
cluster_->master()->Shutdown();
cluster_->master()->mutable_flags()->push_back(
"--catalog_manager_delete_orphaned_tablets");
ASSERT_OK(cluster_->master()->Restart());
SleepFor(MonoDelta::FromSeconds(2));
ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
&METRIC_ENTITY_server, "kudu.tabletserver",
&METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
"total_count", &num_delete_attempts));
ASSERT_EQ(1, num_delete_attempts);
}
// Parameterized test case for TABLET_DATA_DELETED deletions.
class DeleteTableDeletedParamTest : public DeleteTableTest,
public ::testing::WithParamInterface<const char*> {
};
// Test that if a server crashes mid-delete that the delete will be rolled
// forward on startup. Parameterized by different fault flags that cause a
// crash at various points.
TEST_P(DeleteTableDeletedParamTest, TestRollForwardDelete) {
NO_FATALS(StartCluster());
const string fault_flag = GetParam();
LOG(INFO) << "Running with fault flag: " << fault_flag;
// Dynamically set the fault flag so they crash when DeleteTablet() is called
// by the Master.
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), fault_flag, "1.0"));
}
// Create a table on the cluster. We're just using TestWorkload
// as a convenient way to create it.
TestWorkload(cluster_.get()).Setup();
// The table should have replicas on all three tservers.
ASSERT_OK(inspect_->WaitForReplicaCount(3));
// Delete it and wait for the tablet servers to crash.
NO_FATALS(DeleteTable(TestWorkload::kDefaultTableName));
NO_FATALS(WaitForAllTSToCrash());
// There should still be data left on disk.
Status s = inspect_->CheckNoData();
ASSERT_TRUE(s.IsIllegalState()) << s.ToString();
// Now restart the tablet servers. They should roll forward their deletes.
// We don't have to reset the fault flag here because it was set dynamically.
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
cluster_->tablet_server(i)->Shutdown();
ASSERT_OK(cluster_->tablet_server(i)->Restart());
}
ASSERT_OK(inspect_->WaitForNoData());
}
// Faults appropriate for the TABLET_DATA_DELETED case.
const char* deleted_faults[] = {"fault_crash_after_blocks_deleted",
"fault_crash_after_wal_deleted",
"fault_crash_after_cmeta_deleted"};
INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableDeletedParamTest,
::testing::ValuesIn(deleted_faults));
// Parameterized test case for TABLET_DATA_TOMBSTONED deletions.
class DeleteTableTombstonedParamTest : public DeleteTableTest,
public ::testing::WithParamInterface<const char*> {
};
// Regression test for tablet tombstoning. Tests:
// 1. basic creation & tombstoning of a tablet.
// 2. roll-forward (crash recovery) of a partially-completed tombstoning of a tablet.
// 3. permanent deletion of a TOMBSTONED tablet
// (transition from TABLET_DATA_TOMBSTONED to TABLET_DATA_DELETED).
TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
vector<string> flags;
flags.push_back("--log_segment_size_mb=1"); // Faster log rolls.
NO_FATALS(StartCluster(flags));
const string fault_flag = GetParam();
LOG(INFO) << "Running with fault flag: " << fault_flag;
MonoDelta timeout = MonoDelta::FromSeconds(30);
// Create a table with 2 tablets. We delete the first tablet without
// injecting any faults, then we delete the second tablet while exercising
// several fault injection points.
const int kNumTablets = 2;
vector<const KuduPartialRow*> split_rows;
Schema schema(GetSimpleTestSchema());
client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
KuduPartialRow* split_row = client_schema.NewRow();
ASSERT_OK(split_row->SetInt32(0, numeric_limits<int32_t>::max() / kNumTablets));
split_rows.push_back(split_row);
gscoped_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
ASSERT_OK(table_creator->table_name(TestWorkload::kDefaultTableName)
.split_rows(split_rows)
.schema(&client_schema)
.set_range_partition_columns({ "key" })
.num_replicas(3)
.Create());
// Start a workload on the cluster, and run it until we find WALs on disk.
TestWorkload workload(cluster_.get());
workload.Setup();
// The table should have 2 tablets (1 split) on all 3 tservers (for a total of 6).
ASSERT_OK(inspect_->WaitForReplicaCount(6));
// Set up the proxies so we can easily send DeleteTablet() RPCs.
TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
// Ensure the tablet server is reporting 2 tablets.
vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
// Run the workload against whoever the leader is until WALs appear on TS 0
// for the tablets we created.
const int kTsIndex = 0; // Index of the tablet server we'll use for the test.
workload.Start();
while (workload.rows_inserted() < 100) {
SleepFor(MonoDelta::FromMilliseconds(10));
}
ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
tablets[0].tablet_status().tablet_id(), 3));
ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
tablets[1].tablet_status().tablet_id(), 3));
workload.StopAndJoin();
// Shut down the master and the other tablet servers so they don't interfere
// by attempting to create tablets or tablet copy while we delete tablets.
cluster_->master()->Shutdown();
cluster_->tablet_server(1)->Shutdown();
cluster_->tablet_server(2)->Shutdown();
// Tombstone the first tablet.
string tablet_id = tablets[0].tablet_status().tablet_id();
LOG(INFO) << "Tombstoning first tablet " << tablet_id << "...";
ASSERT_TRUE(inspect_->DoesConsensusMetaExistForTabletOnTS(kTsIndex, tablet_id)) << tablet_id;
ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
LOG(INFO) << "Waiting for first tablet to be tombstoned...";
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
if (t.tablet_status().tablet_id() == tablet_id) {
ASSERT_EQ(tablet::SHUTDOWN, t.tablet_status().state());
ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
<< t.tablet_status().tablet_id() << " not tombstoned";
}
}
// Now tombstone the 2nd tablet, causing a fault.
ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), fault_flag, "1.0"));
tablet_id = tablets[1].tablet_status().tablet_id();
LOG(INFO) << "Tombstoning second tablet " << tablet_id << "...";
ignore_result(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
NO_FATALS(WaitForTSToCrash(kTsIndex));
// Restart the tablet server and wait for the WALs to be deleted and for the
// superblock to show that it is tombstoned.
cluster_->tablet_server(kTsIndex)->Shutdown();
ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
LOG(INFO) << "Waiting for second tablet to be tombstoned...";
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
// The tombstoned tablets will still show up in ListTablets(),
// just with their data state set as TOMBSTONED. They should also be listed
// as NOT_STARTED because we restarted the server.
ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
ASSERT_EQ(tablet::NOT_STARTED, t.tablet_status().state());
ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
<< t.tablet_status().tablet_id() << " not tombstoned";
}
// Finally, delete all tablets on the TS, and wait for all data to be gone.
LOG(INFO) << "Deleting all tablets...";
for (const ListTabletsResponsePB::StatusAndSchemaPB& tablet : tablets) {
string tablet_id = tablet.tablet_status().tablet_id();
// We need retries here, since some of the tablets may still be
// bootstrapping after being restarted above.
NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_DELETED, timeout));
}
ASSERT_OK(inspect_->WaitForNoDataOnTS(kTsIndex));
}
// Faults appropriate for the TABLET_DATA_TOMBSTONED case.
// Tombstoning a tablet does not delete the consensus metadata.
const char* tombstoned_faults[] = {"fault_crash_after_blocks_deleted",
"fault_crash_after_wal_deleted"};
INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableTombstonedParamTest,
::testing::ValuesIn(tombstoned_faults));
} // namespace kudu