| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <array> |
| #include <cmath> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <functional> |
| #include <map> |
| #include <memory> |
| #include <ostream> |
| #include <set> |
| #include <string> |
| #include <thread> |
| #include <type_traits> |
| #include <unordered_map> |
| #include <utility> |
| #include <vector> |
| |
| #include <glog/logging.h> |
| #include <gtest/gtest.h> |
| #include <rapidjson/document.h> |
| |
| #include "kudu/client/client.h" |
| #include "kudu/client/schema.h" |
| #include "kudu/client/shared_ptr.h" // IWYU pragma: keep |
| #include "kudu/common/common.pb.h" |
| #include "kudu/common/partial_row.h" |
| #include "kudu/common/schema.h" |
| #include "kudu/common/wire_protocol-test-util.h" |
| #include "kudu/gutil/mathlimits.h" |
| #include "kudu/gutil/strings/split.h" |
| #include "kudu/gutil/strings/substitute.h" |
| #include "kudu/integration-tests/cluster_itest_util.h" |
| #include "kudu/integration-tests/external_mini_cluster-itest-base.h" |
| #include "kudu/integration-tests/mini_cluster_fs_inspector.h" |
| #include "kudu/master/master.pb.h" |
| #include "kudu/master/master.proxy.h" |
| #include "kudu/mini-cluster/external_mini_cluster.h" |
| #include "kudu/mini-cluster/mini_cluster.h" |
| #include "kudu/rpc/rpc_controller.h" |
| #include "kudu/tools/tool_test_util.h" |
| #include "kudu/util/atomic.h" |
| #include "kudu/util/metrics.h" |
| #include "kudu/util/monotime.h" |
| #include "kudu/util/net/net_util.h" |
| #include "kudu/util/net/sockaddr.h" |
| #include "kudu/util/scoped_cleanup.h" |
| #include "kudu/util/status.h" |
| #include "kudu/util/test_macros.h" |
| #include "kudu/util/test_util.h" |
| |
| using std::multimap; |
| using std::set; |
| using std::string; |
| using std::thread; |
| using std::unique_ptr; |
| using std::vector; |
| using strings::Substitute; |
| |
| METRIC_DECLARE_entity(server); |
| METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_CreateTablet); |
| |
| namespace kudu { |
| |
| using client::KuduClient; |
| using client::KuduSchema; |
| using client::sp::shared_ptr; |
| using cluster::ClusterNodes; |
| |
| const char* const kTableName = "test-table"; |
| |
| class CreateTableITest : public ExternalMiniClusterITestBase { |
| }; |
| |
| Status GetNumCreateTabletRPCs(const HostPort& http_hp, int64_t* num_rpcs) { |
| return itest::GetInt64Metric( |
| http_hp, |
| &METRIC_ENTITY_server, |
| "kudu.tabletserver", |
| &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_CreateTablet, |
| "total_count", |
| num_rpcs); |
| } |
| |
| // Regression test for an issue seen when we fail to create a majority of the |
| // replicas in a tablet. Previously, we'd still consider the tablet "RUNNING" |
| // on the master and finish the table creation, even though that tablet would |
| // be stuck forever with its minority never able to elect a leader. |
| TEST_F(CreateTableITest, TestCreateWhenMajorityOfReplicasFailCreation) { |
| constexpr int kNumReplicas = 3; |
| vector<string> ts_flags; |
| vector<string> master_flags; |
| master_flags.emplace_back("--tablet_creation_timeout_ms=1000"); |
| NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas)); |
| |
| // Shut down 2/3 of the tablet servers. |
| cluster_->tablet_server(1)->Shutdown(); |
| cluster_->tablet_server(2)->Shutdown(); |
| |
| // Try to create a single-tablet table. |
| // This won't succeed because we can't create enough replicas to get |
| // a quorum. |
| unique_ptr<client::KuduTableCreator> table_creator(client_->NewTableCreator()); |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| ASSERT_OK(table_creator->table_name(kTableName) |
| .schema(&client_schema) |
| .set_range_partition_columns({ "key" }) |
| .num_replicas(kNumReplicas) |
| .wait(false) |
| .Create()); |
| |
| // Sleep until we've seen a couple retries on our live server. |
| int64_t num_create_attempts = 0; |
| while (num_create_attempts < 3) { |
| SleepFor(MonoDelta::FromMilliseconds(100)); |
| ASSERT_OK(GetNumCreateTabletRPCs(cluster_->tablet_server(0)->bound_http_hostport(), |
| &num_create_attempts)); |
| LOG(INFO) << "Waiting for the master to retry creating the tablet 3 times... " |
| << num_create_attempts << " RPCs seen so far"; |
| |
| // The CreateTable operation should still be considered in progress, even though |
| // we'll be successful at creating a single replica. |
| bool in_progress = false; |
| ASSERT_OK(client_->IsCreateTableInProgress(kTableName, &in_progress)); |
| ASSERT_TRUE(in_progress); |
| } |
| |
| // Once we restart the servers, we should succeed at creating a healthy |
| // replicated tablet. |
| ASSERT_OK(cluster_->tablet_server(1)->Restart()); |
| ASSERT_OK(cluster_->tablet_server(2)->Restart()); |
| |
| // We should eventually finish the table creation we started earlier. |
| ASSERT_EVENTUALLY([&] { |
| bool in_progress = true; |
| ASSERT_OK(client_->IsCreateTableInProgress(kTableName, &in_progress)); |
| ASSERT_FALSE(in_progress); |
| }); |
| |
| // At this point, table has been successfully created. All the tablet servers should be left |
| // with only one tablet, eventually, since the tablets which failed to get created properly |
| // should get deleted. It's possible there are some delete tablet RPCs still inflight and |
| // not processed yet. |
| |
| // map of tablet id and count of replicas found. |
| std::unordered_map<string, int> tablet_num_replica_map; |
| ASSERT_EVENTUALLY([&] { |
| tablet_num_replica_map.clear(); |
| for (int i = 0; i < kNumReplicas; i++) { |
| for (const auto& tablet_id : inspect_->ListTabletsWithDataOnTS(i)) { |
| tablet_num_replica_map[tablet_id]++; |
| } |
| } |
| LOG(INFO) << Substitute( |
| "Waiting for only one tablet to be left with $0 replicas. Currently have: $1 tablet(s)", |
| kNumReplicas, tablet_num_replica_map.size()); |
| ASSERT_EQ(1, tablet_num_replica_map.size()); |
| // Assertion failure on the line above won't execute lines below under ASSERT_EVENTUALLY. |
| // So only one entry is present in the map at this point. |
| const auto num_replicas_found = tablet_num_replica_map.begin()->second; |
| ASSERT_EQ(kNumReplicas, num_replicas_found); |
| }); |
| |
| // Verify no additional create tablet RPCs are being serviced. |
| std::array<int64_t, kNumReplicas> num_create_attempts_arr{}; |
| for (int i = 0; i < kNumReplicas; i++) { |
| ASSERT_OK(GetNumCreateTabletRPCs(cluster_->tablet_server(i)->bound_http_hostport(), |
| &num_create_attempts_arr[i])); |
| } |
| for (int repeat_count = 0; repeat_count < 10; repeat_count++) { |
| SleepFor(MonoDelta::FromMilliseconds(100)); |
| for (int i = 0; i < kNumReplicas; i++) { |
| int64_t num_rpcs = 0; |
| ASSERT_OK(GetNumCreateTabletRPCs(cluster_->tablet_server(i)->bound_http_hostport(), |
| &num_rpcs)); |
| ASSERT_EQ(num_create_attempts_arr[i], num_rpcs); |
| } |
| } |
| } |
| |
| // Regression test for KUDU-1317. Ensure that, when a table is created, |
| // the tablets are well spread out across the machines in the cluster and |
| // that recovery from failures will be well parallelized. |
| TEST_F(CreateTableITest, TestSpreadReplicasEvenly) { |
| const int kNumServers = 10; |
| const int kNumTablets = 20; |
| NO_FATALS(StartCluster({}, {}, kNumServers)); |
| |
| unique_ptr<client::KuduTableCreator> table_creator(client_->NewTableCreator()); |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| ASSERT_OK(table_creator->table_name(kTableName) |
| .schema(&client_schema) |
| .set_range_partition_columns({ "key" }) |
| .num_replicas(3) |
| .add_hash_partitions({ "key" }, kNumTablets) |
| .Create()); |
| |
| // Check that the replicas are fairly well spread by computing the standard |
| // deviation of the number of replicas per server. |
| const double kMeanPerServer = kNumTablets * 3.0 / kNumServers; |
| double sum_squared_deviation = 0; |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size(); |
| LOG(INFO) << "TS " << ts_idx << " has " << num_replicas << " tablets"; |
| double deviation = static_cast<double>(num_replicas) - kMeanPerServer; |
| sum_squared_deviation += deviation * deviation; |
| } |
| double stddev = sqrt(sum_squared_deviation / (kMeanPerServer - 1)); |
| LOG(INFO) << "stddev = " << stddev; |
| // In 1000 runs of the test, only one run had stddev above 2.0. So, 3.0 should |
| // be a safe non-flaky choice. |
| ASSERT_LE(stddev, 3.0); |
| |
| // Construct a map from tablet ID to the set of servers that each tablet is hosted on. |
| multimap<string, int> tablet_to_servers; |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| vector<string> tablets = inspect_->ListTabletsOnTS(ts_idx); |
| for (const string& tablet_id : tablets) { |
| tablet_to_servers.insert(std::make_pair(tablet_id, ts_idx)); |
| } |
| } |
| |
| // For each server, count how many other servers it shares tablets with. |
| // This is highly correlated to how well parallelized recovery will be |
| // in the case the server crashes. |
| int sum_num_peers = 0; |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| vector<string> tablets = inspect_->ListTabletsOnTS(ts_idx); |
| set<int> peer_servers; |
| for (const string& tablet_id : tablets) { |
| auto peer_indexes = tablet_to_servers.equal_range(tablet_id); |
| for (auto it = peer_indexes.first; it != peer_indexes.second; ++it) { |
| peer_servers.insert(it->second); |
| } |
| } |
| |
| peer_servers.erase(ts_idx); |
| LOG(INFO) << "Server " << ts_idx << " has " << peer_servers.size() << " peers"; |
| sum_num_peers += peer_servers.size(); |
| } |
| |
| // On average, servers should have at least half the other servers as peers. |
| double avg_num_peers = static_cast<double>(sum_num_peers) / kNumServers; |
| LOG(INFO) << "avg_num_peers = " << avg_num_peers; |
| ASSERT_GE(avg_num_peers, kNumServers / 2); |
| } |
| |
| // Regression test for KUDU-2823. Ensure that, after we add some new tablet servers |
| // to the cluster, the new tablets are evenly distributed in the cluster based on |
| // dimensions. |
| TEST_F(CreateTableITest, TestSpreadReplicasEvenlyWithDimension) { |
| const int kNumServers = 10; |
| const int kNumTablets = 20; |
| vector<int32_t> num_new_replicas(kNumServers, 0); |
| vector<string> master_flags = { |
| "--tserver_last_replica_creations_halflife_ms=10", |
| }; |
| // We have five tablet servers. |
| NO_FATALS(StartCluster({}, master_flags, kNumServers / 2)); |
| |
| Schema schema = Schema({ ColumnSchema("key1", INT32), |
| ColumnSchema("key2", INT32), |
| ColumnSchema("int_val", INT32), |
| ColumnSchema("string_val", STRING, true) }, 2); |
| auto client_schema = KuduSchema::FromSchema(schema); |
| |
| auto create_table_func = [](KuduClient* client, |
| KuduSchema* client_schema, |
| const string& table_name, |
| int32_t range_lower_bound, |
| int32_t range_upper_bound, |
| const string& dimension_label) { |
| unique_ptr<client::KuduTableCreator> table_creator(client->NewTableCreator()); |
| unique_ptr<KuduPartialRow> lower_bound(client_schema->NewRow()); |
| RETURN_NOT_OK(lower_bound->SetInt32("key2", range_lower_bound)); |
| unique_ptr<KuduPartialRow> upper_bound(client_schema->NewRow()); |
| RETURN_NOT_OK(upper_bound->SetInt32("key2", range_upper_bound)); |
| return table_creator->table_name(table_name) |
| .schema(client_schema) |
| .add_hash_partitions({ "key1" }, kNumTablets) |
| .set_range_partition_columns({ "key2" }) |
| .add_range_partition(lower_bound.release(), upper_bound.release()) |
| .num_replicas(3) |
| .dimension_label(dimension_label) |
| .Create(); |
| }; |
| |
| auto alter_table_func = [](KuduClient* client, |
| KuduSchema* client_schema, |
| const string& table_name, |
| int32_t range_lower_bound, |
| int32_t range_upper_bound, |
| const string& dimension_label) { |
| unique_ptr<client::KuduTableAlterer> table_alterer(client->NewTableAlterer(table_name)); |
| unique_ptr<KuduPartialRow> lower_bound(client_schema->NewRow()); |
| RETURN_NOT_OK(lower_bound->SetInt32("key2", range_lower_bound)); |
| unique_ptr<KuduPartialRow> upper_bound(client_schema->NewRow()); |
| RETURN_NOT_OK(upper_bound->SetInt32("key2", range_upper_bound)); |
| return table_alterer->AddRangePartitionWithDimension(lower_bound.release(), |
| upper_bound.release(), |
| dimension_label) |
| ->Alter(); |
| }; |
| |
| auto calc_stddev_func = [](const vector<int32_t>& num_replicas, |
| double mean_per_ts, |
| int32_t ts_idx_start, |
| int32_t ts_idx_end) { |
| double sum_squared_deviation = 0; |
| for (int ts_idx = ts_idx_start; ts_idx < ts_idx_end; ts_idx++) { |
| int num_ts = num_replicas[ts_idx]; |
| LOG(INFO) << "TS " << ts_idx << " has " << num_ts << " tablets"; |
| double deviation = static_cast<double>(num_ts) - mean_per_ts; |
| sum_squared_deviation += deviation * deviation; |
| } |
| return sqrt(sum_squared_deviation / (mean_per_ts - 1)); |
| }; |
| |
| { |
| for (int ts_idx = 0; ts_idx < kNumServers / 2; ts_idx++) { |
| num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size(); |
| } |
| // create the 'test-table1' table with 'label1'. |
| ASSERT_OK(create_table_func(client_.get(), &client_schema, "test-table1", 0, 100, "label1")); |
| for (int ts_idx = 0; ts_idx < kNumServers / 2; ts_idx++) { |
| int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size(); |
| num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx]; |
| } |
| // Check that the replicas are fairly well spread by computing the standard |
| // deviation of the number of replicas per alive server. |
| const double kMeanPerServer = kNumTablets * 3.0 / kNumServers * 2; |
| double stddev = calc_stddev_func( |
| num_new_replicas, kMeanPerServer, 0, kNumServers / 2); |
| LOG(INFO) << "stddev = " << stddev; |
| ASSERT_LE(stddev, 3.0); |
| } |
| |
| // Waiting for the recent creation replicas to decay to 0. |
| SleepFor(MonoDelta::FromMilliseconds(1000)); |
| |
| // Add five new tablet servers to cluster. |
| for (int ts_idx = kNumServers / 2; ts_idx < kNumServers; ts_idx++) { |
| ASSERT_OK(cluster_->AddTabletServer()); |
| } |
| ASSERT_OK(cluster_->WaitForTabletServerCount(kNumServers, MonoDelta::FromSeconds(60))); |
| |
| { |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size(); |
| } |
| // create the 'test-table2' table with 'label2'. |
| ASSERT_OK(create_table_func(client_.get(), &client_schema, "test-table2", 0, 100, "label2")); |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size(); |
| num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx]; |
| } |
| // Check that the replicas are fairly well spread by computing the standard |
| // deviation of the number of replicas per server. |
| const double kMeanPerServer = kNumTablets * 3.0 / kNumServers; |
| double stddev = calc_stddev_func(num_new_replicas, kMeanPerServer, 0, kNumServers); |
| LOG(INFO) << "stddev = " << stddev; |
| ASSERT_LE(stddev, 3.0); |
| } |
| |
| // Waiting for the recent creation replicas to decay to 0. |
| SleepFor(MonoDelta::FromMilliseconds(1000)); |
| |
| { |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size(); |
| } |
| // Add partition with 'label3' to 'test-table1' |
| ASSERT_OK(alter_table_func(client_.get(), &client_schema, "test-table1", 100, 200, "label3")); |
| for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
| int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size(); |
| num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx]; |
| } |
| // Check that the replicas are fairly well spread by computing the standard |
| // deviation of the number of replicas per server. |
| const double kMeanPerServer = kNumTablets * 3.0 / kNumServers; |
| double stddev = calc_stddev_func(num_new_replicas, kMeanPerServer, 0, kNumServers); |
| LOG(INFO) << "stddev = " << stddev; |
| ASSERT_LE(stddev, 3.0); |
| } |
| } |
| |
| static void LookUpRandomKeysLoop(const std::shared_ptr<master::MasterServiceProxy>& master, |
| const char* table_name, |
| AtomicBool* quit) { |
| Schema schema(GetSimpleTestSchema()); |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| unique_ptr<KuduPartialRow> r(client_schema.NewRow()); |
| |
| while (!quit->Load()) { |
| master::GetTableLocationsRequestPB req; |
| master::GetTableLocationsResponsePB resp; |
| req.mutable_table()->set_table_name(table_name); |
| |
| // Look up random start and end keys, allowing start > end to ensure that |
| // the master correctly handles this case too. |
| string start_key; |
| string end_key; |
| CHECK_OK(r->SetInt32("key", rand() % MathLimits<int32_t>::kMax)); |
| CHECK_OK(r->EncodeRowKey(req.mutable_partition_key_start())); |
| CHECK_OK(r->SetInt32("key", rand() % MathLimits<int32_t>::kMax)); |
| CHECK_OK(r->EncodeRowKey(req.mutable_partition_key_end())); |
| |
| rpc::RpcController rpc; |
| |
| // Value doesn't matter; just need something to avoid ugly log messages. |
| rpc.set_timeout(MonoDelta::FromSeconds(10)); |
| |
| Status s = master->GetTableLocations(req, &resp, &rpc); |
| |
| // Either the lookup was successful or the master crashed. |
| CHECK(s.ok() || s.IsNetworkError()); |
| } |
| } |
| |
| // Regression test for a couple of bugs involving tablet lookups |
| // concurrent with tablet replacements during table creation. |
| // |
| // The first bug would crash the master if the table's key range was |
| // not fully populated. This corner case can occur when: |
| // 1. Tablet creation tasks time out because their tservers died, and |
| // 2. The master fails in replica selection when sending tablet creation tasks |
| // for tablets replaced because of #1. |
| // |
| // The second bug involved a race condition where a tablet is looked up |
| // halfway through the process of its being added to the table. |
| // |
| // This test replicates these conditions and hammers the master with key |
| // lookups, attempting to reproduce the master crashes. |
| TEST_F(CreateTableITest, TestCreateTableWithDeadTServers) { |
| SKIP_IF_SLOW_NOT_ALLOWED(); |
| |
| const char* kTableName = "test"; |
| |
| // Start up a cluster and immediately kill the tservers. The master will |
| // consider them alive long enough to respond successfully to the client's |
| // create table request, but won't actually be able to create the tablets. |
| NO_FATALS(StartCluster( |
| {}, |
| { |
| // The master should quickly time out create tablet tasks. The |
| // tservers will all be dead, so there's no point in waiting long. |
| "--tablet_creation_timeout_ms=1000", |
| // This timeout needs to be long enough that we don't immediately |
| // fail the client's create table request, but short enough that the |
| // master considers the tservers unresponsive (and recreates the |
| // outstanding table's tablets) during the test. |
| "--tserver_unresponsive_timeout_ms=5000" })); |
| cluster_->ShutdownNodes(ClusterNodes::TS_ONLY); |
| |
| Schema schema(GetSimpleTestSchema()); |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| unique_ptr<client::KuduTableCreator> table_creator(client_->NewTableCreator()); |
| |
| // Don't bother waiting for table creation to finish; it'll never happen |
| // because all of the tservers are dead. |
| CHECK_OK(table_creator->table_name(kTableName) |
| .schema(&client_schema) |
| .set_range_partition_columns({ "key" }) |
| .wait(false) |
| .Create()); |
| |
| // Spin off a bunch of threads that repeatedly look up random key ranges in the table. |
| constexpr int kNumThreads = 16; |
| AtomicBool quit(false); |
| vector<thread> threads; |
| threads.reserve(kNumThreads); |
| for (int i = 0; i < kNumThreads; i++) { |
| auto proxy = cluster_->master_proxy(); |
| threads.emplace_back([proxy, kTableName, &quit]() { |
| LookUpRandomKeysLoop(proxy, kTableName, &quit); |
| }); |
| } |
| SCOPED_CLEANUP({ |
| quit.Store(true); |
| for (auto& t : threads) { |
| t.join(); |
| } |
| }); |
| |
| // Give the lookup threads some time to crash the master. |
| MonoTime deadline = MonoTime::Now() + MonoDelta::FromSeconds(15); |
| while (MonoTime::Now() < deadline) { |
| ASSERT_TRUE(cluster_->master()->IsProcessAlive()) << "Master crashed!"; |
| SleepFor(MonoDelta::FromMilliseconds(100)); |
| } |
| } |
| |
| // Make sure it's possible to create a table when using proxied RPC addresses, |
| // and the result table locations point to the addresses set to be advertised |
| // by a TCP proxy correspondingly. |
| TEST_F(CreateTableITest, ProxyAdvertisedAddresses) { |
| constexpr const char* const kIpAddr = "127.0.0.1"; |
| constexpr int kNumServers = 1; |
| constexpr int kNumTablets = 2; |
| const MonoDelta timeout = MonoDelta::FromSeconds(10); |
| |
| uint16_t m_port = 0; |
| ASSERT_OK(GetRandomPort(kIpAddr, &m_port)); |
| const HostPort m_proxied_addr(kIpAddr, m_port); |
| |
| uint16_t t_port = 0; |
| ASSERT_OK(GetRandomPort(kIpAddr, &t_port)); |
| const HostPort t_proxied_addr(kIpAddr, t_port); |
| |
| const string m_proxy_advertised_address = "kudu.proxy.io:333"; |
| |
| const uint16_t t_proxy_advertised_port = 888; |
| const string t_proxy_advertised_host = "kudu.proxy.io"; |
| const string t_proxy_advertised_addr = Substitute( |
| "$0:$1", t_proxy_advertised_host, t_proxy_advertised_port); |
| |
| uint16_t t_bind_port = 0; |
| ASSERT_OK(GetRandomPort(kIpAddr, &t_bind_port)); |
| const HostPort t_bind_addr(kIpAddr, t_bind_port); |
| |
| const vector<string> master_flags = { |
| Substitute("--rpc_proxy_advertised_addresses=$0", m_proxy_advertised_address), |
| Substitute("--rpc_proxied_addresses=$0", m_proxied_addr.ToString()), |
| }; |
| const vector<string> ts_flags = { |
| Substitute("--rpc_proxy_advertised_addresses=$0", t_proxy_advertised_addr), |
| Substitute("--rpc_proxied_addresses=$0", t_proxied_addr.ToString()), |
| Substitute("--rpc_bind_addresses=$0", t_bind_addr.ToString()), |
| }; |
| NO_FATALS(StartCluster(ts_flags, master_flags, kNumServers)); |
| |
| // Build a client to send requests via the proxied RPC endpoint. |
| client::KuduClientBuilder builder; |
| builder.add_master_server_addr(m_proxied_addr.ToString()); |
| client::sp::shared_ptr<client::KuduClient> c_ext; |
| ASSERT_OK(builder.Build(&c_ext)); |
| |
| // Create table using Kudu client sending requests as if they were proxied |
| // from outside. |
| unique_ptr<client::KuduTableCreator> table_creator(c_ext->NewTableCreator()); |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| ASSERT_OK(table_creator->table_name(kTableName) |
| .schema(&client_schema) |
| .set_range_partition_columns({ "key" }) |
| .num_replicas(1) |
| .add_hash_partitions({ "key" }, kNumTablets) |
| .Create()); |
| |
| // Make sure the client receives proxy advertised addresses since the request |
| // came to the proxied RPC address. |
| const auto& master_addresses = c_ext->GetMasterAddresses(); |
| ASSERT_EQ(m_proxy_advertised_address, master_addresses); |
| |
| // Get information on table's locations via standard RPC endpoint. |
| { |
| master::GetTableLocationsRequestPB req; |
| req.set_intern_ts_infos_in_response(true); |
| req.mutable_table()->set_table_name(kTableName); |
| |
| rpc::RpcController rpc; |
| rpc.set_timeout(timeout); |
| master::GetTableLocationsResponsePB resp; |
| auto mp = cluster_->master_proxy(); |
| ASSERT_OK(mp->GetTableLocations(req, &resp, &rpc)); |
| |
| ASSERT_EQ(kNumTablets, resp.tablet_locations().size()); |
| for (const auto& loc : resp.tablet_locations()) { |
| ASSERT_EQ(kNumServers, loc.interned_replicas_size()); |
| } |
| |
| ASSERT_EQ(1, resp.ts_infos_size()); |
| const auto& ts_info = resp.ts_infos(0); |
| ASSERT_EQ(1, ts_info.rpc_addresses_size()); |
| const auto& hp = ts_info.rpc_addresses(0); |
| ASSERT_TRUE(hp.has_host()); |
| ASSERT_EQ(t_bind_addr.host(), hp.host()); |
| ASSERT_TRUE(hp.has_port()); |
| ASSERT_EQ(t_bind_addr.port(), hp.port()); |
| } |
| |
| // Get information on table's locations via endpoint for proxied RPCs. |
| { |
| Sockaddr ma; |
| ma.ParseFromNumericHostPort(m_proxied_addr); |
| auto mp = std::make_shared<master::MasterServiceProxy>( |
| cluster_->messenger(), ma, ma.host()); |
| |
| master::GetTableLocationsRequestPB req; |
| req.set_intern_ts_infos_in_response(true); |
| req.mutable_table()->set_table_name(kTableName); |
| |
| rpc::RpcController rpc; |
| rpc.set_timeout(timeout); |
| master::GetTableLocationsResponsePB resp; |
| ASSERT_OK(mp->GetTableLocations(req, &resp, &rpc)); |
| |
| ASSERT_EQ(kNumTablets, resp.tablet_locations().size()); |
| for (const auto& loc : resp.tablet_locations()) { |
| ASSERT_EQ(kNumServers, loc.interned_replicas_size()); |
| } |
| |
| ASSERT_EQ(1, resp.ts_infos_size()); |
| const auto& ts_info = resp.ts_infos(0); |
| ASSERT_EQ(1, ts_info.rpc_addresses_size()); |
| const auto& hp = ts_info.rpc_addresses(0); |
| ASSERT_TRUE(hp.has_host()); |
| ASSERT_EQ(t_proxy_advertised_host, hp.host()); |
| ASSERT_TRUE(hp.has_port()); |
| ASSERT_EQ(t_proxy_advertised_port, hp.port()); |
| } |
| |
| // Delete the created table using the client instance communicating with |
| // the cluster through regular RPC endpoints. |
| ASSERT_OK(client_->DeleteTable(kTableName)); |
| } |
| |
| class NotEnoughHealthyTServersTest : |
| public CreateTableITest, |
| public ::testing::WithParamInterface<bool> { |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(AddNewTS, NotEnoughHealthyTServersTest, ::testing::Bool()); |
| |
| TEST_P(NotEnoughHealthyTServersTest, TestNotEnoughHealthyTServers) { |
| SKIP_IF_SLOW_NOT_ALLOWED(); |
| const auto add_new_ts = GetParam(); |
| constexpr const char* const kNotEnoughReplicasTableName = "kudu.not_enough_replicas"; |
| constexpr const char* const kOverRegistedTSTableName = "kudu.over.registed.ts"; |
| constexpr const char* const kFiveReplicaTableName = "kudu.five.replica"; |
| constexpr const char* const kOneReplicaTableName = "kudu.one.replica"; |
| constexpr int kNumTabletServers = 5; |
| constexpr int kTSUnresponsiveTimeoutMs = 4000; |
| constexpr int kHeartbeatIntervalMs = 3000; |
| |
| // Do not validate the number of replicas. |
| vector<string> master_flags = { |
| "--catalog_manager_check_ts_count_for_create_table=false", |
| "--catalog_manager_check_ts_count_for_alter_table=false", |
| Substitute("--tserver_unresponsive_timeout_ms=$0", kTSUnresponsiveTimeoutMs), |
| Substitute("--heartbeat_interval_ms=$0", kHeartbeatIntervalMs), |
| "--allow_unsafe_replication_factor=true", |
| "--allow_creating_under_replicated_tables=true" |
| }; |
| NO_FATALS(StartCluster({}, master_flags, kNumTabletServers)); |
| |
| string master_address = cluster_->master()->bound_rpc_addr().ToString(); |
| |
| auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema()); |
| auto create_table_func = [&](const string& table_name, int replica_num) -> Status { |
| unique_ptr<client::KuduTableCreator> table_creator(client_->NewTableCreator()); |
| return table_creator->table_name(table_name) |
| .schema(&client_schema) |
| .set_range_partition_columns({ "key" }) |
| .num_replicas(replica_num) |
| .Create(); |
| }; |
| |
| // The number of replicas can't be over the number of registered tablet servers. |
| // RF = 6. |
| { |
| Status s = create_table_func(kOverRegistedTSTableName, 6); |
| ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); |
| ASSERT_STR_CONTAINS(s.ToString(), "not enough registered tablet servers to"); |
| shared_ptr<client::KuduTable> table; |
| s = client_->OpenTable(kOverRegistedTSTableName, &table); |
| ASSERT_TRUE(s.IsNotFound()) << s.ToString(); |
| } |
| |
| { |
| // Shutdown 3 tablet servers. |
| for (int i = 0; i < 3; i++) { |
| NO_FATALS(cluster_->tablet_server(i)->Shutdown()); |
| } |
| // Wait the 3 tablet servers heartbeat timeout and unresponsive timeout. Then catalog |
| // manager will take them as unavailable tablet servers. KSCK gets the status of tablet |
| // server from tablet serve interface. Here must wait the caltalog manager to take the |
| // as unavailable. |
| SleepFor(MonoDelta::FromMilliseconds(3*(kTSUnresponsiveTimeoutMs + kHeartbeatIntervalMs))); |
| } |
| |
| // RF = 5. Creating table will fail because there are not enough live tablet servers. |
| { |
| Status s = create_table_func(kOverRegistedTSTableName, 5); |
| ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); |
| ASSERT_STR_CONTAINS(s.ToString(), "not enough live tablet servers to"); |
| } |
| |
| { |
| // Restart the first tablet server. |
| NO_FATALS(cluster_->tablet_server(0)->Restart()); |
| // Wait the restarted tablet server to send a heartbeat and be registered in catalog manaager. |
| SleepFor(MonoDelta::FromMilliseconds(kHeartbeatIntervalMs)); |
| } |
| |
| // Create a table with RF=5. It should succeed. |
| ASSERT_OK(create_table_func(kFiveReplicaTableName, 5)); |
| |
| { |
| // Restart the second tablet server. |
| NO_FATALS(cluster_->tablet_server(1)->Restart()); |
| // Wait the restarted tablet server to send a heartbeat and be registered in catalog manaager. |
| SleepFor(MonoDelta::FromMilliseconds(kHeartbeatIntervalMs)); |
| } |
| |
| // RF = 1. |
| ASSERT_OK(create_table_func(kOneReplicaTableName, 1)); |
| |
| // Create a five-replicas table. |
| ASSERT_OK(create_table_func(kNotEnoughReplicasTableName, 5)); |
| |
| // Add another column. Test alter table logic. |
| { |
| shared_ptr<KuduClient> client; |
| ASSERT_OK(cluster_->CreateClient(nullptr, &client)); |
| unique_ptr<client::KuduTableAlterer> table_alterer( |
| client_->NewTableAlterer(kNotEnoughReplicasTableName)); |
| table_alterer->AddColumn("new_column")->Type(client::KuduColumnSchema::INT32); |
| ASSERT_OK(table_alterer->Alter()); |
| } |
| |
| { |
| string out; |
| string cmd = Substitute( |
| "cluster ksck $0 --sections=TABLE_SUMMARIES --ksck_format=json_compact", master_address); |
| kudu::tools::RunKuduTool(strings::Split(cmd, " ", strings::SkipEmpty()), &out); |
| rapidjson::Document doc; |
| doc.Parse<0>(out.c_str()); |
| ASSERT_EQ(3, doc["table_summaries"].Size()); |
| const rapidjson::Value& items = doc["table_summaries"]; |
| for (int i = 0; i < items.Size(); i++) { |
| if (string(kOneReplicaTableName) == items[i]["name"].GetString()) { |
| ASSERT_EQ(string("HEALTHY"), items[i]["health"].GetString()); |
| } else { |
| ASSERT_EQ(string("UNDER_REPLICATED"), items[i]["health"].GetString()); |
| } |
| } |
| } |
| |
| if (add_new_ts) { |
| // Add one new tablet server. |
| NO_FATALS(cluster_->AddTabletServer()); |
| } else { |
| // Restart the stopped tablet server |
| NO_FATALS(cluster_->tablet_server(2)->Restart()); |
| } |
| |
| // All tables will become healthy. |
| { |
| ASSERT_EVENTUALLY([&] { |
| string out; |
| string in; |
| string cmd = Substitute( |
| "cluster ksck $0 --sections=TABLE_SUMMARIES --ksck_format=json_compact", master_address); |
| kudu::tools::RunKuduTool(strings::Split(cmd, " ", strings::SkipEmpty()), &out, nullptr, in); |
| rapidjson::Document doc; |
| doc.Parse<0>(out.c_str()); |
| ASSERT_EQ(3, doc["table_summaries"].Size()); |
| const rapidjson::Value& items = doc["table_summaries"]; |
| for (int i = 0; i < items.Size(); i++) { |
| ASSERT_EQ(string("HEALTHY"), items[i]["health"].GetString()); |
| } |
| }); |
| } |
| } |
| } // namespace kudu |