src/kudu/integration-tests/delete_table-test.cc - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <boost/optional.hpp>
 #include <glog/stl_logging.h>
 #include <gtest/gtest.h>
 #include <memory>
 #include <string>
 #include <unordered_map>

 #include "kudu/client/client-test-util.h"
 #include "kudu/common/wire_protocol-test-util.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/integration-tests/cluster_verifier.h"
 #include "kudu/integration-tests/external_mini_cluster-itest-base.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/subprocess.h"

 using kudu::client::KuduClient;
 using kudu::client::KuduClientBuilder;
 using kudu::client::KuduSchema;
 using kudu::client::KuduSchemaFromSchema;
 using kudu::client::KuduTableCreator;
 using kudu::consensus::CONSENSUS_CONFIG_COMMITTED;
 using kudu::consensus::ConsensusMetadataPB;
 using kudu::consensus::ConsensusStatePB;
 using kudu::consensus::RaftPeerPB;
 using kudu::itest::TServerDetails;
 using kudu::tablet::TABLET_DATA_COPYING;
 using kudu::tablet::TABLET_DATA_DELETED;
 using kudu::tablet::TABLET_DATA_READY;
 using kudu::tablet::TABLET_DATA_TOMBSTONED;
 using kudu::tablet::TabletDataState;
 using kudu::tablet::TabletSuperBlockPB;
 using kudu::tserver::ListTabletsResponsePB;
 using kudu::tserver::TabletServerErrorPB;
 using std::numeric_limits;
 using std::string;
 using std::unique_ptr;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;

 METRIC_DECLARE_entity(server);
 METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet);

 namespace kudu {

 class DeleteTableTest : public ExternalMiniClusterITestBase {
  protected:
   enum IsCMetaExpected {
     CMETA_NOT_EXPECTED = 0,
     CMETA_EXPECTED = 1
   };

   enum IsSuperBlockExpected {
     SUPERBLOCK_NOT_EXPECTED = 0,
     SUPERBLOCK_EXPECTED = 1
   };

   // Get the UUID of the leader of the specified tablet, as seen by the TS with
   // the given 'ts_uuid'.
   string GetLeaderUUID(const string& ts_uuid, const string& tablet_id);

   Status CheckTabletTombstonedOrDeletedOnTS(
       int index,
       const string& tablet_id,
       TabletDataState data_state,
       IsCMetaExpected is_cmeta_expected,
       IsSuperBlockExpected is_superblock_expected);

   Status CheckTabletTombstonedOnTS(int index,
                                    const string& tablet_id,
                                    IsCMetaExpected is_cmeta_expected);

   Status CheckTabletDeletedOnTS(int index,
                                 const string& tablet_id,
                                 IsSuperBlockExpected is_superblock_expected);

   void WaitForTabletTombstonedOnTS(int index,
                                    const string& tablet_id,
                                    IsCMetaExpected is_cmeta_expected);

   void WaitForTabletDeletedOnTS(int index,
                                 const string& tablet_id,
                                 IsSuperBlockExpected is_superblock_expected);

   void WaitForTSToCrash(int index);
   void WaitForAllTSToCrash();
   void WaitUntilTabletRunning(int index, const std::string& tablet_id);

   // Delete the given table. If the operation times out, dumps the master stacks
   // to help debug master-side deadlocks.
   void DeleteTable(const string& table_name);

   // Repeatedly try to delete the tablet, retrying on failure up to the
   // specified timeout. Deletion can fail when other operations, such as
   // bootstrap, are running.
   void DeleteTabletWithRetries(const TServerDetails* ts, const string& tablet_id,
                                TabletDataState delete_type, const MonoDelta& timeout);
 };

 string DeleteTableTest::GetLeaderUUID(const string& ts_uuid, const string& tablet_id) {
   ConsensusStatePB cstate;
   CHECK_OK(itest::GetConsensusState(ts_map_[ts_uuid], tablet_id, CONSENSUS_CONFIG_COMMITTED,
                                     MonoDelta::FromSeconds(10), &cstate));
   return cstate.leader_uuid();
 }

 Status DeleteTableTest::CheckTabletTombstonedOrDeletedOnTS(
       int index,
       const string& tablet_id,
       TabletDataState data_state,
       IsCMetaExpected is_cmeta_expected,
       IsSuperBlockExpected is_superblock_expected) {
   CHECK(data_state == TABLET_DATA_TOMBSTONED || data_state == TABLET_DATA_DELETED) << data_state;
   // There should be no WALs and no cmeta.
   if (inspect_->CountWALSegmentsForTabletOnTS(index, tablet_id) > 0) {
     return Status::IllegalState("WAL segments exist for tablet", tablet_id);
   }
   if (is_cmeta_expected == CMETA_EXPECTED &&
       !inspect_->DoesConsensusMetaExistForTabletOnTS(index, tablet_id)) {
     return Status::IllegalState("Expected cmeta for tablet " + tablet_id + " but it doesn't exist");
   }
   if (is_superblock_expected == SUPERBLOCK_EXPECTED) {
     RETURN_NOT_OK(inspect_->CheckTabletDataStateOnTS(index, tablet_id, { data_state }));
   } else {
     TabletSuperBlockPB superblock_pb;
     Status s = inspect_->ReadTabletSuperBlockOnTS(index, tablet_id, &superblock_pb);
     if (!s.IsNotFound()) {
       return Status::IllegalState("Found unexpected superblock for tablet " + tablet_id);
     }
   }
   return Status::OK();
 }

 Status DeleteTableTest::CheckTabletTombstonedOnTS(int index,
                                                   const string& tablet_id,
                                                   IsCMetaExpected is_cmeta_expected) {
   return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_TOMBSTONED,
                                             is_cmeta_expected, SUPERBLOCK_EXPECTED);
 }

 Status DeleteTableTest::CheckTabletDeletedOnTS(int index,
                                                const string& tablet_id,
                                                IsSuperBlockExpected is_superblock_expected) {
   return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_DELETED,
                                             CMETA_NOT_EXPECTED, is_superblock_expected);
 }

 void DeleteTableTest::WaitForTabletTombstonedOnTS(int index,
                                                   const string& tablet_id,
                                                   IsCMetaExpected is_cmeta_expected) {
   Status s;
   for (int i = 0; i < 6000; i++) {
     s = CheckTabletTombstonedOnTS(index, tablet_id, is_cmeta_expected);
     if (s.ok()) return;
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_OK(s);
 }

 void DeleteTableTest::WaitForTabletDeletedOnTS(int index,
                                                const string& tablet_id,
                                                IsSuperBlockExpected is_superblock_expected) {
   Status s;
   for (int i = 0; i < 6000; i++) {
     s = CheckTabletDeletedOnTS(index, tablet_id, is_superblock_expected);
     if (s.ok()) return;
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_OK(s);
 }

 void DeleteTableTest::WaitForTSToCrash(int index) {
   auto ts = cluster_->tablet_server(index);
   SCOPED_TRACE(ts->instance_id().permanent_uuid());
   ASSERT_OK(ts->WaitForCrash(MonoDelta::FromSeconds(60)));
 }

 void DeleteTableTest::WaitForAllTSToCrash() {
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     NO_FATALS(WaitForTSToCrash(i));
   }
 }

 void DeleteTableTest::WaitUntilTabletRunning(int index, const std::string& tablet_id) {
   ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(index)->uuid()],
                                           tablet_id, MonoDelta::FromSeconds(60)));
 }

 void DeleteTableTest::DeleteTable(const string& table_name) {
   Status s = client_->DeleteTable(table_name);
   if (s.IsTimedOut()) {
     WARN_NOT_OK(PstackWatcher::DumpPidStacks(cluster_->master()->pid()),
                         "Couldn't dump stacks");
   }
   ASSERT_OK(s);
 }

 void DeleteTableTest::DeleteTabletWithRetries(const TServerDetails* ts,
                                               const string& tablet_id,
                                               TabletDataState delete_type,
                                               const MonoDelta& timeout) {
   MonoTime start(MonoTime::Now(MonoTime::FINE));
   MonoTime deadline = start;
   deadline.AddDelta(timeout);
   Status s;
   while (true) {
     s = itest::DeleteTablet(ts, tablet_id, delete_type, boost::none, timeout);
     if (s.ok()) return;
     if (deadline.ComesBefore(MonoTime::Now(MonoTime::FINE))) {
       break;
     }
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_OK(s);
 }

 // Test deleting an empty table, and ensure that the tablets get removed,
 // and the master no longer shows the table as existing.
 TEST_F(DeleteTableTest, TestDeleteEmptyTable) {
   NO_FATALS(StartCluster());
   // Create a table on the cluster. We're just using TestWorkload
   // as a convenient way to create it.
   TestWorkload(cluster_.get()).Setup();

   // The table should have replicas on all three tservers.
   ASSERT_OK(inspect_->WaitForReplicaCount(3));

   // Grab the tablet ID (used later).
   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   // Delete it and wait for the replicas to get deleted.
   // We should have no tablets at the filesystem layer after deleting the table.
   NO_FATALS(DeleteTable(TestWorkload::kDefaultTableName));
   ASSERT_OK(inspect_->WaitForNoData());

   // Check that the master no longer exposes the table in any way:

   // 1) Should not list it in ListTables.
   vector<string> table_names;
   ASSERT_OK(client_->ListTables(&table_names));
   ASSERT_TRUE(table_names.empty()) << "table still exposed in ListTables";

   // 2) Should respond to GetTableSchema with a NotFound error.
   KuduSchema schema;
   Status s = client_->GetTableSchema(TestWorkload::kDefaultTableName, &schema);
   ASSERT_TRUE(s.IsNotFound()) << s.ToString();

   // 3) Should return an error for GetTabletLocations RPCs.
   {
     rpc::RpcController rpc;
     master::GetTabletLocationsRequestPB req;
     master::GetTabletLocationsResponsePB resp;
     rpc.set_timeout(MonoDelta::FromSeconds(10));
     req.add_tablet_ids()->assign(tablet_id);
     ASSERT_OK(cluster_->master_proxy()->GetTabletLocations(req, &resp, &rpc));
     SCOPED_TRACE(resp.DebugString());
     ASSERT_EQ(1, resp.errors_size());
     ASSERT_STR_CONTAINS(resp.errors(0).ShortDebugString(),
                         "code: NOT_FOUND message: \"Tablet deleted: Table deleted");
   }

   // 4) The master 'dump-entities' page should not list the deleted table or tablets.
   EasyCurl c;
   faststring entities_buf;
   ASSERT_OK(c.FetchURL(Substitute("http://$0/dump-entities",
                                   cluster_->master()->bound_http_hostport().ToString()),
                        &entities_buf));
   ASSERT_EQ("{\"tables\":[],\"tablets\":[]}", entities_buf.ToString());
 }

 // Test that a DeleteTable RPC is rejected without a matching destination UUID.
 TEST_F(DeleteTableTest, TestDeleteTableDestUuidValidation) {
   NO_FATALS(StartCluster());
   // Create a table on the cluster. We're just using TestWorkload
   // as a convenient way to create it.
   TestWorkload(cluster_.get()).Setup();
   ASSERT_OK(inspect_->WaitForReplicaCount(3));

   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];

   tserver::DeleteTabletRequestPB req;
   tserver::DeleteTabletResponsePB resp;
   rpc::RpcController rpc;
   rpc.set_timeout(MonoDelta::FromSeconds(20));

   req.set_dest_uuid("fake-uuid");
   req.set_tablet_id(tablet_id);
   req.set_delete_type(TABLET_DATA_TOMBSTONED);
   ASSERT_OK(ts->tserver_admin_proxy->DeleteTablet(req, &resp, &rpc));
   ASSERT_TRUE(resp.has_error());
   ASSERT_EQ(tserver::TabletServerErrorPB::WRONG_SERVER_UUID, resp.error().code())
       << resp.ShortDebugString();
   ASSERT_STR_CONTAINS(StatusFromPB(resp.error().status()).ToString(),
                       "Wrong destination UUID");
 }

 // Test the atomic CAS argument to DeleteTablet().
 TEST_F(DeleteTableTest, TestAtomicDeleteTablet) {
   MonoDelta timeout = MonoDelta::FromSeconds(30);
   NO_FATALS(StartCluster());
   // Create a table on the cluster. We're just using TestWorkload
   // as a convenient way to create it.
   TestWorkload(cluster_.get()).Setup();

   // The table should have replicas on all three tservers.
   ASSERT_OK(inspect_->WaitForReplicaCount(3));

   // Grab the tablet ID (used later).
   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   const int kTsIndex = 0;
   TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];

   // The committed config starts off with an opid_index of -1, so choose something lower.
   boost::optional<int64_t> opid_index(-2);
   tserver::TabletServerErrorPB::Code error_code;
   ASSERT_OK(itest::WaitUntilTabletRunning(ts, tablet_id, timeout));

   Status s;
   for (int i = 0; i < 100; i++) {
     s = itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
                             &error_code);
     if (error_code == TabletServerErrorPB::CAS_FAILED) break;
     // If we didn't get the expected CAS_FAILED error, it's OK to get 'TABLET_NOT_RUNNING'
     // because the "creating" maintenance state persists just slightly after it starts to
     // expose 'RUNNING' state in ListTablets()
     ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_RUNNING, error_code)
         << "unexpected error: " << s.ToString();
     SleepFor(MonoDelta::FromMilliseconds(100));
   }

   ASSERT_EQ(TabletServerErrorPB::CAS_FAILED, error_code) << "unexpected error: " << s.ToString();
   ASSERT_STR_CONTAINS(s.ToString(), "of -2 but the committed config has opid_index of -1");

   // Now use the "latest", which is -1.
   opid_index = -1;
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
                                 &error_code));
   inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_TOMBSTONED });

   // Now that the tablet is already tombstoned, our opid_index should be
   // ignored (because it's impossible to check it).
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, -9999, timeout,
                                 &error_code));
   inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_TOMBSTONED });

   // Same with TOMBSTONED -> DELETED.
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, -9999, timeout,
                                 &error_code));
   inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_DELETED });
 }

 TEST_F(DeleteTableTest, TestDeleteTableWithConcurrentWrites) {
   NO_FATALS(StartCluster());
   int n_iters = AllowSlowTests() ? 20 : 1;
   for (int i = 0; i < n_iters; i++) {
     TestWorkload workload(cluster_.get());
     workload.set_table_name(Substitute("table-$0", i));

     // We'll delete the table underneath the writers, so we expcted
     // a NotFound error during the writes.
     workload.set_not_found_allowed(true);
     workload.Setup();

     // Start the workload, and wait to see some rows actually inserted
     workload.Start();
     while (workload.rows_inserted() < 100) {
       SleepFor(MonoDelta::FromMilliseconds(10));
     }

     // Delete it and wait for the replicas to get deleted.
     NO_FATALS(DeleteTable(workload.table_name()));
     ASSERT_OK(inspect_->WaitForNoData());

     // Sleep just a little longer to make sure client threads send
     // requests to the missing tablets.
     SleepFor(MonoDelta::FromMilliseconds(50));

     workload.StopAndJoin();
     NO_FATALS(cluster_->AssertNoCrashes());
   }
 }

 // Test that a tablet replica is automatically tombstoned on startup if a local
 // crash occurs in the middle of remote bootstrap.
 TEST_F(DeleteTableTest, TestAutoTombstoneAfterCrashDuringRemoteBootstrap) {
   NO_FATALS(StartCluster());
   const MonoDelta timeout = MonoDelta::FromSeconds(10);
   const int kTsIndex = 0; // We'll test with the first TS.

   // We'll do a config change to remote bootstrap a replica here later. For
   // now, shut it down.
   LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
   cluster_->tablet_server(kTsIndex)->Shutdown();

   // Bounce the Master so it gets new tablet reports and doesn't try to assign
   // a replica to the dead TS.
   cluster_->master()->Shutdown();
   ASSERT_OK(cluster_->master()->Restart());
   cluster_->WaitForTabletServerCount(2, timeout);

   // Start a workload on the cluster, and run it for a little while.
   TestWorkload workload(cluster_.get());
   workload.set_num_replicas(2);
   workload.Setup();
   ASSERT_OK(inspect_->WaitForReplicaCount(2));

   workload.Start();
   while (workload.rows_inserted() < 100) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   workload.StopAndJoin();

   // Enable a fault crash when remote bootstrap occurs on TS 0.
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   const string& kFaultFlag = "fault_crash_after_rb_files_fetched";
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), kFaultFlag, "1.0"));

   // Figure out the tablet id to remote bootstrap.
   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   // Add our TS 0 to the config and wait for it to crash.
   string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id);
   TServerDetails* leader = DCHECK_NOTNULL(ts_map_[leader_uuid]);
   TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
   ASSERT_OK(itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, timeout));
   NO_FATALS(WaitForTSToCrash(kTsIndex));

   // The superblock should be in TABLET_DATA_COPYING state on disk.
   NO_FATALS(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_COPYING }));

   // Kill the other tablet servers so the leader doesn't try to remote
   // bootstrap it again during our verification here.
   cluster_->tablet_server(1)->Shutdown();
   cluster_->tablet_server(2)->Shutdown();

   // Now we restart the TS. It will clean up the failed remote bootstrap and
   // convert it to TABLET_DATA_TOMBSTONED. It crashed, so we have to call
   // Shutdown() then Restart() to bring it back up.
   cluster_->tablet_server(kTsIndex)->Shutdown();
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
 }

 // Test that a tablet replica automatically tombstones itself if the remote
 // bootstrap source server fails in the middle of the remote bootstrap process.
 // Also test that we can remotely bootstrap a tombstoned tablet.
 TEST_F(DeleteTableTest, TestAutoTombstoneAfterRemoteBootstrapRemoteFails) {
   vector<string> ts_flags = {
       "--enable_leader_failure_detection=false",  // Make test deterministic.
       "--log_segment_size_mb=1"                   // Faster log rolls.
   };
   vector<string> master_flags = {
       "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"
   };
   NO_FATALS(StartCluster(ts_flags, master_flags));
   const MonoDelta kTimeout = MonoDelta::FromSeconds(20);
   const int kTsIndex = 0; // We'll test with the first TS.

   // We'll do a config change to remote bootstrap a replica here later. For
   // now, shut down TS-0.
   LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
   cluster_->tablet_server(kTsIndex)->Shutdown();

   // Bounce the Master so it gets new tablet reports and doesn't try to assign
   // a replica to the dead TS.
   cluster_->master()->Shutdown();
   ASSERT_OK(cluster_->master()->Restart());
   cluster_->WaitForTabletServerCount(2, kTimeout);

   // Start a workload on the cluster, and run it for a little while.
   TestWorkload workload(cluster_.get());
   workload.set_num_replicas(2);
   workload.Setup();
   ASSERT_OK(inspect_->WaitForReplicaCount(2));

   // Figure out the tablet id.
   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   for (int i = 1; i <= 2; i++) {
     NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
   }

   // Elect a leader and run some data through the cluster.
   const int kLeaderIndex = 1;
   string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
   ASSERT_OK(itest::StartElection(ts_map_[kLeaderUuid], tablet_id, kTimeout));
   workload.Start();
   while (workload.rows_inserted() < 100) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }

   // Remote bootstrap doesn't see the active WAL segment, and we need to
   // download a file to trigger the fault in this test. Due to the log index
   // chunks, that means 3 files minimum: One in-flight WAL segment, one index
   // chunk file (these files grow much more slowly than the WAL segments), and
   // one completed WAL segment.
   ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kLeaderIndex, tablet_id, 3));
   workload.StopAndJoin();

   // Cause the leader to crash when a follower tries to remotely bootstrap from it.
   const string& fault_flag = "fault_crash_on_handle_rb_fetch_data";
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex), fault_flag, "1.0"));

   // Add TS-0 as a new member to the config and wait for the leader to crash.
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   TServerDetails* leader = ts_map_[kLeaderUuid];
   TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
   ASSERT_OK(itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, kTimeout));
   NO_FATALS(WaitForTSToCrash(kLeaderIndex));

   // The tablet server will detect that the leader failed, and automatically
   // tombstone its replica. Shut down the other non-leader replica to avoid
   // interference while we wait for this to happen.
   cluster_->tablet_server(1)->Shutdown();
   cluster_->tablet_server(2)->Shutdown();
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));

   // Now bring the other replicas back, re-elect the previous leader (TS-1),
   // and wait for the leader to remote bootstrap the tombstoned replica. This
   // will have replaced a tablet with no consensus metadata.
   ASSERT_OK(cluster_->tablet_server(1)->Restart());
   ASSERT_OK(cluster_->tablet_server(2)->Restart());
   for (int i = 1; i <= 2; i++) {
     NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
   }
   ASSERT_OK(itest::StartElection(ts_map_[kLeaderUuid], tablet_id, kTimeout));
   ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));

   ClusterVerifier v(cluster_.get());
   NO_FATALS(v.CheckCluster());
   NO_FATALS(v.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
                             workload.rows_inserted()));

   // Now pause the other replicas and tombstone our replica again.
   ASSERT_OK(cluster_->tablet_server(1)->Pause());
   ASSERT_OK(cluster_->tablet_server(2)->Pause());
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, kTimeout));
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));

   // Bring them back again, let them yet again bootstrap our tombstoned replica.
   // This time, the leader will have replaced a tablet with consensus metadata.
   ASSERT_OK(cluster_->tablet_server(1)->Resume());
   ASSERT_OK(cluster_->tablet_server(2)->Resume());
   ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));

   NO_FATALS(v.CheckCluster());
   NO_FATALS(v.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
                             workload.rows_inserted()));
 }

 // Test for correct remote bootstrap merge of consensus metadata.
 TEST_F(DeleteTableTest, TestMergeConsensusMetadata) {
   // Enable manual leader selection.
   vector<string> ts_flags, master_flags;
   ts_flags.push_back("--enable_leader_failure_detection=false");
   master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
   NO_FATALS(StartCluster(ts_flags, master_flags));
   const MonoDelta timeout = MonoDelta::FromSeconds(10);
   const int kTsIndex = 0;

   TestWorkload workload(cluster_.get());
   workload.Setup();
   ASSERT_OK(inspect_->WaitForReplicaCount(3));

   // Figure out the tablet id to remote bootstrap.
   vector<string> tablets = inspect_->ListTabletsOnTS(1);
   ASSERT_EQ(1, tablets.size());
   const string& tablet_id = tablets[0];

   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
   }

   // Elect a leader and run some data through the cluster.
   int leader_index = 1;
   string leader_uuid = cluster_->tablet_server(leader_index)->uuid();
   ASSERT_OK(itest::StartElection(ts_map_[leader_uuid], tablet_id, timeout));
   workload.Start();
   while (workload.rows_inserted() < 100) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   workload.StopAndJoin();
   ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, workload.batches_completed()));

   // Verify that TS 0 voted for the chosen leader.
   ConsensusMetadataPB cmeta_pb;
   ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
   ASSERT_EQ(1, cmeta_pb.current_term());
   ASSERT_EQ(leader_uuid, cmeta_pb.voted_for());

   // Shut down all but TS 0 and try to elect TS 0. The election will fail but
   // the TS will record a vote for itself as well as a new term (term 2).
   cluster_->tablet_server(1)->Shutdown();
   cluster_->tablet_server(2)->Shutdown();
   NO_FATALS(WaitUntilTabletRunning(kTsIndex, tablet_id));
   TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];
   ASSERT_OK(itest::StartElection(ts, tablet_id, timeout));
   for (int i = 0; i < 6000; i++) {
     Status s = inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb);
     if (s.ok() &&
         cmeta_pb.current_term() == 2 &&
         cmeta_pb.voted_for() == ts->uuid()) {
       break;
     }
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_EQ(2, cmeta_pb.current_term());
   ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());

   // Tombstone our special little guy, then shut him down.
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
   cluster_->tablet_server(kTsIndex)->Shutdown();

   // Restart the other dudes and re-elect the same leader.
   ASSERT_OK(cluster_->tablet_server(1)->Restart());
   ASSERT_OK(cluster_->tablet_server(2)->Restart());
   TServerDetails* leader = ts_map_[leader_uuid];
   NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
   NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
   ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
   ASSERT_OK(itest::WaitUntilLeader(leader, tablet_id, timeout));

   // Bring our special little guy back up.
   // Wait until he gets remote bootstrapped.
   LOG(INFO) << "Bringing TS " << cluster_->tablet_server(kTsIndex)->uuid()
             << " back up...";
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));

   // Assert that the election history is retained (voted for self).
   ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
   ASSERT_EQ(2, cmeta_pb.current_term());
   ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());

   // Now do the same thing as above, where we tombstone TS 0 then trigger a new
   // term (term 3) on the other machines. TS 0 will get remotely bootstrapped
   // again, but this time the vote record on TS 0 for term 2 should not be
   // retained after remote bootstrap occurs.
   cluster_->tablet_server(1)->Shutdown();
   cluster_->tablet_server(2)->Shutdown();

   // Delete with retries because the tablet might still be bootstrapping.
   NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout));
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));

   ASSERT_OK(cluster_->tablet_server(1)->Restart());
   ASSERT_OK(cluster_->tablet_server(2)->Restart());
   NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
   NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
   ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
   ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, { TABLET_DATA_READY }));

   // The election history should have been wiped out.
   ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
   ASSERT_EQ(3, cmeta_pb.current_term());
   ASSERT_TRUE(!cmeta_pb.has_voted_for()) << cmeta_pb.ShortDebugString();
 }

 // Regression test for KUDU-987, a bug where followers with transactions in
 // REPLICATING state, which means they have not yet been committed to a
 // majority, cannot shut down during a DeleteTablet() call.
 TEST_F(DeleteTableTest, TestDeleteFollowerWithReplicatingTransaction) {
   if (!AllowSlowTests()) {
     // We will typically wait at least 5 seconds for timeouts to occur.
     LOG(INFO) << "Skipping test in fast-test mode.";
     return;
   }

   const MonoDelta timeout = MonoDelta::FromSeconds(10);

   const int kNumTabletServers = 5;
   vector<string> ts_flags, master_flags;
   ts_flags.push_back("--enable_leader_failure_detection=false");
   ts_flags.push_back("--flush_threshold_mb=0"); // Always be flushing.
   ts_flags.push_back("--maintenance_manager_polling_interval_ms=100");
   master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
   NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers));

   const int kTsIndex = 0; // We'll test with the first TS.
   TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()];

   // Create the table.
   TestWorkload workload(cluster_.get());
   workload.set_num_replicas(kNumTabletServers);
   workload.Setup();

   // Figure out the tablet ids of the created tablets.
   vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets));
   const string& tablet_id = tablets[0].tablet_status().tablet_id();

   // Wait until all replicas are up and running.
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()],
                                             tablet_id, timeout));
   }

   // Elect TS 1 as leader.
   const int kLeaderIndex = 1;
   const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
   TServerDetails* leader = ts_map_[kLeaderUuid];
   ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
   ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));

   // Kill a majority, but leave the leader and a single follower.
   LOG(INFO) << "Killing majority";
   for (int i = 2; i < kNumTabletServers; i++) {
     cluster_->tablet_server(i)->Shutdown();
   }

   // Now write a single row to the leader.
   // We give 5 seconds for the timeout to pretty much guarantee that a flush
   // will occur due to the low flush threshold we set.
   LOG(INFO) << "Writing a row";
   Status s = WriteSimpleTestRow(leader, tablet_id, RowOperationsPB::INSERT,
                                 1, 1, "hola, world", MonoDelta::FromSeconds(5));
   ASSERT_TRUE(s.IsTimedOut());
   ASSERT_STR_CONTAINS(s.ToString(), "timed out");

   LOG(INFO) << "Killing the leader...";
   cluster_->tablet_server(kLeaderIndex)->Shutdown();

   // Now tombstone the follower tablet. This should succeed even though there
   // are uncommitted operations on the replica.
   LOG(INFO) << "Tombstoning tablet " << tablet_id << " on TS " << ts->uuid();
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
 }

 // Test that orphaned blocks are cleared from the superblock when a tablet is
 // tombstoned.
 TEST_F(DeleteTableTest, TestOrphanedBlocksClearedOnDelete) {
   const MonoDelta timeout = MonoDelta::FromSeconds(30);
   vector<string> ts_flags, master_flags;
   ts_flags.push_back("--enable_leader_failure_detection=false");
   ts_flags.push_back("--flush_threshold_mb=0"); // Flush quickly since we wait for a flush to occur.
   ts_flags.push_back("--maintenance_manager_polling_interval_ms=100");
   master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
   NO_FATALS(StartCluster(ts_flags, master_flags));

   const int kFollowerIndex = 0;
   TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()];

   // Create the table.
   TestWorkload workload(cluster_.get());
   workload.Setup();

   // Figure out the tablet id of the created tablet.
   vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   ASSERT_OK(WaitForNumTabletsOnTS(follower_ts, 1, timeout, &tablets));
   const string& tablet_id = tablets[0].tablet_status().tablet_id();

   // Wait until all replicas are up and running.
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()],
                                             tablet_id, timeout));
   }

   // Elect TS 1 as leader.
   const int kLeaderIndex = 1;
   const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
   TServerDetails* leader_ts = ts_map_[kLeaderUuid];
   ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
   ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));

   // Run a write workload and wait until we see some rowsets flush on the follower.
   workload.Start();
   TabletSuperBlockPB superblock_pb;
   for (int i = 0; i < 3000; i++) {
     ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb));
     if (!superblock_pb.rowsets().empty()) break;
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_GT(superblock_pb.rowsets_size(), 0)
       << "Timed out waiting for rowset flush on TS " << follower_ts->uuid() << ": "
       << "Superblock:\n" << superblock_pb.DebugString();

   // Shut down the leader so it doesn't try to bootstrap our follower later.
   workload.StopAndJoin();
   cluster_->tablet_server(kLeaderIndex)->Shutdown();

   // Tombstone the follower and check that there are no rowsets or orphaned
   // blocks retained in the superblock.
   ASSERT_OK(itest::DeleteTablet(follower_ts, tablet_id, TABLET_DATA_TOMBSTONED,
                                 boost::none, timeout));
   NO_FATALS(WaitForTabletTombstonedOnTS(kFollowerIndex, tablet_id, CMETA_EXPECTED));
   ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb));
   ASSERT_EQ(0, superblock_pb.rowsets_size()) << superblock_pb.DebugString();
   ASSERT_EQ(0, superblock_pb.orphaned_blocks_size()) << superblock_pb.DebugString();
 }

 vector<const string*> Grep(const string& needle, const vector<string>& haystack) {
   vector<const string*> results;
   for (const string& s : haystack) {
     if (s.find(needle) != string::npos) {
       results.push_back(&s);
     }
   }
   return results;
 }

 vector<string> ListOpenFiles(pid_t pid) {
   string cmd = strings::Substitute("export PATH=$$PATH:/usr/bin:/usr/sbin; lsof -n -p $0", pid);
   vector<string> argv = { "bash", "-c", cmd };
   string out;
   CHECK_OK(Subprocess::Call(argv, &out));
   vector<string> lines = strings::Split(out, "\n");
   return lines;
 }

 int PrintOpenTabletFiles(pid_t pid, const string& tablet_id) {
   vector<string> lines = ListOpenFiles(pid);
   vector<const string*> wal_lines = Grep(tablet_id, lines);
   LOG(INFO) << "There are " << wal_lines.size() << " open WAL files for pid " << pid << ":";
   for (const string* l : wal_lines) {
     LOG(INFO) << *l;
   }
   return wal_lines.size();
 }

 // Regression test for tablet deletion FD leak. See KUDU-1288.
 TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
   const MonoDelta timeout = MonoDelta::FromSeconds(30);

   NO_FATALS(StartCluster({}, {}, 1));

   // Create the table.
   TestWorkload workload(cluster_.get());
   workload.set_num_replicas(1);
   workload.Setup();
   workload.Start();
   while (workload.rows_inserted() < 1000) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   workload.StopAndJoin();

   // Figure out the tablet id of the created tablet.
   vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   ASSERT_OK(WaitForNumTabletsOnTS(ts_map_.begin()->second, 1, timeout, &tablets));
   const string& tablet_id = tablets[0].tablet_status().tablet_id();

   // Tombstone the tablet and then ensure that lsof does not list any
   // tablet-related paths.
   ExternalTabletServer* ets = cluster_->tablet_server(0);
   ASSERT_OK(itest::DeleteTablet(ts_map_[ets->uuid()],
                                 tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
   ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));

   // Restart the TS after deletion and then do the same lsof check again.
   ets->Shutdown();
   ASSERT_OK(ets->Restart());
   ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
 }

 TEST_F(DeleteTableTest, TestUnknownTabletsAreNotDeleted) {
   // Speed up heartbeating so that the unknown tablet is detected faster.
   vector<string> extra_ts_flags = { "--heartbeat_interval_ms=10" };

   NO_FATALS(StartCluster(extra_ts_flags, {}, 1));

   Schema schema(GetSimpleTestSchema());
   client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
   unique_ptr<KuduTableCreator> creator(client_->NewTableCreator());
   ASSERT_OK(creator->table_name("test")
       .schema(&client_schema)
       .set_range_partition_columns({"key"})
       .num_replicas(1)
       .Create());

   // Delete the master's metadata and start it back up. The tablet created
   // above is now unknown, but should not be deleted!
   cluster_->master()->Shutdown();
   ASSERT_OK(env_->DeleteRecursively(cluster_->master()->data_dir()));
   ASSERT_OK(cluster_->master()->Restart());
   SleepFor(MonoDelta::FromSeconds(2));
   int64_t num_delete_attempts;
   ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
       &METRIC_ENTITY_server, "kudu.tabletserver",
       &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
       "total_count", &num_delete_attempts));
   ASSERT_EQ(0, num_delete_attempts);

   // Now restart the master with orphan deletion enabled. The tablet should get
   // deleted.
   cluster_->master()->Shutdown();
   cluster_->master()->mutable_flags()->push_back(
       "--catalog_manager_delete_orphaned_tablets");
   ASSERT_OK(cluster_->master()->Restart());
   SleepFor(MonoDelta::FromSeconds(2));
   ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
       &METRIC_ENTITY_server, "kudu.tabletserver",
       &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
       "total_count", &num_delete_attempts));
   ASSERT_EQ(1, num_delete_attempts);

 }

 // Parameterized test case for TABLET_DATA_DELETED deletions.
 class DeleteTableDeletedParamTest : public DeleteTableTest,
                                     public ::testing::WithParamInterface<const char*> {
 };

 // Test that if a server crashes mid-delete that the delete will be rolled
 // forward on startup. Parameterized by different fault flags that cause a
 // crash at various points.
 TEST_P(DeleteTableDeletedParamTest, TestRollForwardDelete) {
   NO_FATALS(StartCluster());
   const string fault_flag = GetParam();
   LOG(INFO) << "Running with fault flag: " << fault_flag;

   // Dynamically set the fault flag so they crash when DeleteTablet() is called
   // by the Master.
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), fault_flag, "1.0"));
   }

   // Create a table on the cluster. We're just using TestWorkload
   // as a convenient way to create it.
   TestWorkload(cluster_.get()).Setup();

   // The table should have replicas on all three tservers.
   ASSERT_OK(inspect_->WaitForReplicaCount(3));

   // Delete it and wait for the tablet servers to crash.
   NO_FATALS(DeleteTable(TestWorkload::kDefaultTableName));
   NO_FATALS(WaitForAllTSToCrash());

   // There should still be data left on disk.
   Status s = inspect_->CheckNoData();
   ASSERT_TRUE(s.IsIllegalState()) << s.ToString();

   // Now restart the tablet servers. They should roll forward their deletes.
   // We don't have to reset the fault flag here because it was set dynamically.
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     cluster_->tablet_server(i)->Shutdown();
     ASSERT_OK(cluster_->tablet_server(i)->Restart());
   }
   ASSERT_OK(inspect_->WaitForNoData());
 }

 // Faults appropriate for the TABLET_DATA_DELETED case.
 const char* deleted_faults[] = {"fault_crash_after_blocks_deleted",
                                 "fault_crash_after_wal_deleted",
                                 "fault_crash_after_cmeta_deleted"};

 INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableDeletedParamTest,
                         ::testing::ValuesIn(deleted_faults));

 // Parameterized test case for TABLET_DATA_TOMBSTONED deletions.
 class DeleteTableTombstonedParamTest : public DeleteTableTest,
                                        public ::testing::WithParamInterface<const char*> {
 };

 // Regression test for tablet tombstoning. Tests:
 // 1. basic creation & tombstoning of a tablet.
 // 2. roll-forward (crash recovery) of a partially-completed tombstoning of a tablet.
 // 3. permanent deletion of a TOMBSTONED tablet
 //    (transition from TABLET_DATA_TOMBSTONED to TABLET_DATA_DELETED).
 TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
   vector<string> flags;
   flags.push_back("--log_segment_size_mb=1"); // Faster log rolls.
   NO_FATALS(StartCluster(flags));
   const string fault_flag = GetParam();
   LOG(INFO) << "Running with fault flag: " << fault_flag;

   MonoDelta timeout = MonoDelta::FromSeconds(30);

   // Create a table with 2 tablets. We delete the first tablet without
   // injecting any faults, then we delete the second tablet while exercising
   // several fault injection points.
   const int kNumTablets = 2;
   vector<const KuduPartialRow*> split_rows;
   Schema schema(GetSimpleTestSchema());
   client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
   KuduPartialRow* split_row = client_schema.NewRow();
   ASSERT_OK(split_row->SetInt32(0, numeric_limits<int32_t>::max() / kNumTablets));
   split_rows.push_back(split_row);
   gscoped_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
   ASSERT_OK(table_creator->table_name(TestWorkload::kDefaultTableName)
                           .split_rows(split_rows)
                           .schema(&client_schema)
                           .set_range_partition_columns({ "key" })
                           .num_replicas(3)
                           .Create());

   // Start a workload on the cluster, and run it until we find WALs on disk.
   TestWorkload workload(cluster_.get());
   workload.Setup();

   // The table should have 2 tablets (1 split) on all 3 tservers (for a total of 6).
   ASSERT_OK(inspect_->WaitForReplicaCount(6));

   // Set up the proxies so we can easily send DeleteTablet() RPCs.
   TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];

   // Ensure the tablet server is reporting 2 tablets.
   vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));

   // Run the workload against whoever the leader is until WALs appear on TS 0
   // for the tablets we created.
   const int kTsIndex = 0; // Index of the tablet server we'll use for the test.
   workload.Start();
   while (workload.rows_inserted() < 100) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
             tablets[0].tablet_status().tablet_id(), 3));
   ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
             tablets[1].tablet_status().tablet_id(), 3));
   workload.StopAndJoin();

   // Shut down the master and the other tablet servers so they don't interfere
   // by attempting to create tablets or remote bootstrap while we delete tablets.
   cluster_->master()->Shutdown();
   cluster_->tablet_server(1)->Shutdown();
   cluster_->tablet_server(2)->Shutdown();

   // Tombstone the first tablet.
   string tablet_id = tablets[0].tablet_status().tablet_id();
   LOG(INFO) << "Tombstoning first tablet " << tablet_id << "...";
   ASSERT_TRUE(inspect_->DoesConsensusMetaExistForTabletOnTS(kTsIndex, tablet_id)) << tablet_id;
   ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
   LOG(INFO) << "Waiting for first tablet to be tombstoned...";
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));

   ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
   for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
     if (t.tablet_status().tablet_id() == tablet_id) {
       ASSERT_EQ(tablet::SHUTDOWN, t.tablet_status().state());
       ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
           << t.tablet_status().tablet_id() << " not tombstoned";
     }
   }

   // Now tombstone the 2nd tablet, causing a fault.
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), fault_flag, "1.0"));
   tablet_id = tablets[1].tablet_status().tablet_id();
   LOG(INFO) << "Tombstoning second tablet " << tablet_id << "...";
   ignore_result(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
   NO_FATALS(WaitForTSToCrash(kTsIndex));

   // Restart the tablet server and wait for the WALs to be deleted and for the
   // superblock to show that it is tombstoned.
   cluster_->tablet_server(kTsIndex)->Shutdown();
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   LOG(INFO) << "Waiting for second tablet to be tombstoned...";
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));

   // The tombstoned tablets will still show up in ListTablets(),
   // just with their data state set as TOMBSTONED. They should also be listed
   // as NOT_STARTED because we restarted the server.
   ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
   for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
     ASSERT_EQ(tablet::NOT_STARTED, t.tablet_status().state());
     ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
         << t.tablet_status().tablet_id() << " not tombstoned";
   }

   // Finally, delete all tablets on the TS, and wait for all data to be gone.
   LOG(INFO) << "Deleting all tablets...";
   for (const ListTabletsResponsePB::StatusAndSchemaPB& tablet : tablets) {
     string tablet_id = tablet.tablet_status().tablet_id();
     // We need retries here, since some of the tablets may still be
     // bootstrapping after being restarted above.
     NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_DELETED, timeout));
   }
   ASSERT_OK(inspect_->WaitForNoDataOnTS(kTsIndex));
 }

 // Faults appropriate for the TABLET_DATA_TOMBSTONED case.
 // Tombstoning a tablet does not delete the consensus metadata.
 const char* tombstoned_faults[] = {"fault_crash_after_blocks_deleted",
                                    "fault_crash_after_wal_deleted"};

 INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableTombstonedParamTest,
                         ::testing::ValuesIn(tombstoned_faults));

 } // namespace kudu