src/kudu/integration-tests/client-stress-test.cc - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <unistd.h>

 #include <cstdint>
 #include <memory>
 #include <ostream>
 #include <set>
 #include <string>
 #include <thread>
 #include <utility>
 #include <vector>

 #include <glog/logging.h>
 #include <gtest/gtest.h>

 #include "kudu/client/client-internal.h"
 #include "kudu/client/client-test-util.h"
 #include "kudu/client/client.h"
 #include "kudu/client/scan_predicate.h"
 #include "kudu/client/shared_ptr.h" // IWYU pragma: keep
 #include "kudu/client/value.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/pstack_watcher.h"
 #include "kudu/util/random.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"

 METRIC_DECLARE_entity(tablet);
 METRIC_DECLARE_counter(leader_memory_pressure_rejections);
 METRIC_DECLARE_counter(follower_memory_pressure_rejections);

 using kudu::client::KuduClient;
 using kudu::client::KuduScanner;
 using kudu::client::KuduTable;
 using kudu::cluster::ExternalMiniCluster;
 using kudu::cluster::ExternalMiniClusterOptions;
 using std::set;
 using std::string;
 using std::thread;
 using std::unique_ptr;
 using std::vector;
 using strings::Substitute;

 namespace kudu {

 class ClientStressTest : public KuduTest {
  public:
   virtual void SetUp() OVERRIDE {
     KuduTest::SetUp();

     ExternalMiniClusterOptions opts = default_opts();
     if (multi_master()) {
       opts.num_masters = 3;
     }
     opts.num_tablet_servers = 3;
     cluster_.reset(new ExternalMiniCluster(std::move(opts)));
     ASSERT_OK(cluster_->Start());
   }

   virtual void TearDown() OVERRIDE {
     alarm(0);
     cluster_->Shutdown();
     KuduTest::TearDown();
   }

  protected:
   void ScannerThread(KuduClient* client, const CountDownLatch* go_latch, int32_t start_key) {
     client::sp::shared_ptr<KuduTable> table;
     CHECK_OK(client->OpenTable(TestWorkload::kDefaultTableName, &table));
     vector<string> rows;

     go_latch->Wait();

     KuduScanner scanner(table.get());
     CHECK_OK(scanner.AddConjunctPredicate(table->NewComparisonPredicate(
         "key", client::KuduPredicate::GREATER_EQUAL,
         client::KuduValue::FromInt(start_key))));
     CHECK_OK(ScanToStrings(&scanner, &rows));
   }

   virtual bool multi_master() const {
     return false;
   }

   virtual ExternalMiniClusterOptions default_opts() const {
     return ExternalMiniClusterOptions();
   }

   unique_ptr<ExternalMiniCluster> cluster_;
 };

 // Stress test a case where most of the operations are expected to time out.
 // This is a regression test for various bugs we've seen in timeout handling,
 // especially with concurrent requests.
 TEST_F(ClientStressTest, TestLookupTimeouts) {
   const int kSleepMillis = AllowSlowTests() ? 5000 : 100;

   TestWorkload work(cluster_.get());
   work.set_num_write_threads(64);
   work.set_write_timeout_millis(10);
   work.set_timeout_allowed(true);
   work.Setup();
   work.Start();
   SleepFor(MonoDelta::FromMilliseconds(kSleepMillis));
 }

 // Regression test for KUDU-1104, a race in which multiple scanners racing to populate a
 // cold meta cache on a shared Client would crash.
 //
 // This test creates a table with a lot of tablets (so that we require many round-trips to
 // the master to populate the meta cache) and then starts a bunch of parallel threads which
 // scan starting at random points in the key space.
 TEST_F(ClientStressTest, TestStartScans) {
   for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
     ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), "log_preallocate_segments", "0"));
   }
   TestWorkload work(cluster_.get());
   work.set_num_tablets(40);
   work.set_num_replicas(1);
   work.Setup();

   // We run the guts of the test several times -- it takes a while to build
   // the 40 tablets above, but the actual scans are very fast since the table
   // is empty.
   for (int run = 1; run <= (AllowSlowTests() ? 10 : 2); run++) {
     LOG(INFO) << "Starting run " << run;
     client::sp::shared_ptr<KuduClient> client;
     CHECK_OK(cluster_->CreateClient(nullptr, &client));

     CountDownLatch go_latch(1);
     constexpr int kNumThreads = 60;
     vector<thread> threads;
     threads.reserve(kNumThreads);
     Random rng(run);
     for (int i = 0; i < kNumThreads; i++) {
       int32_t start_key = rng.Next32();
       threads.emplace_back([this, client, &go_latch, start_key]() {
         this->ScannerThread(client.get(), &go_latch, start_key);
       });
     }
     SleepFor(MonoDelta::FromMilliseconds(50));

     go_latch.CountDown();

     for (auto& t : threads) {
       t.join();
     }
   }
 }

 // Override the base test to run in multi-master mode.
 class ClientStressTest_MultiMaster : public ClientStressTest {
  protected:
   virtual bool multi_master() const OVERRIDE {
     return true;
   }
 };

 // Stress test a case where most of the operations are expected to time out.
 // This is a regression test for KUDU-614 - it would cause a deadlock prior
 // to fixing that bug.
 TEST_F(ClientStressTest_MultiMaster, TestLeaderResolutionTimeout) {
   TestWorkload work(cluster_.get());
   work.set_num_write_threads(64);

   // This timeout gets applied to the master requests. It's lower than the
   // amount of time that we sleep the masters, to ensure they timeout.
   work.set_client_default_rpc_timeout_millis(250);
   // This is the time budget for the whole request. It has to be longer than
   // the above timeout so that the client actually attempts to resolve
   // the leader.
   work.set_write_timeout_millis(280);
   work.set_timeout_allowed(true);
   work.Setup();

   work.Start();

   ASSERT_OK(cluster_->tablet_server(0)->Pause());
   ASSERT_OK(cluster_->tablet_server(1)->Pause());
   ASSERT_OK(cluster_->tablet_server(2)->Pause());
   ASSERT_OK(cluster_->master(0)->Pause());
   ASSERT_OK(cluster_->master(1)->Pause());
   ASSERT_OK(cluster_->master(2)->Pause());
   SleepFor(MonoDelta::FromMilliseconds(300));
   ASSERT_OK(cluster_->tablet_server(0)->Resume());
   ASSERT_OK(cluster_->tablet_server(1)->Resume());
   ASSERT_OK(cluster_->tablet_server(2)->Resume());
   ASSERT_OK(cluster_->master(0)->Resume());
   ASSERT_OK(cluster_->master(1)->Resume());
   ASSERT_OK(cluster_->master(2)->Resume());
   SleepFor(MonoDelta::FromMilliseconds(100));

   // Set an explicit timeout. This test has caused deadlocks in the past.
   // Also make sure to dump stacks before the alarm goes off.
   PstackWatcher watcher(MonoDelta::FromSeconds(30));
   alarm(60);
 }


 // Override the base test to start a cluster with a low memory limit.
 class ClientStressTest_LowMemory : public ClientStressTest {
  protected:
   virtual ExternalMiniClusterOptions default_opts() const OVERRIDE {
     // There's nothing scientific about this number; it must be low enough to
     // trigger memory pressure request rejection yet high enough for the
     // servers to make forward progress.
     const int kMemLimitBytes = 64 * 1024 * 1024;
     ExternalMiniClusterOptions opts;
     opts.extra_tserver_flags.push_back(Substitute(
         "--memory_limit_hard_bytes=$0", kMemLimitBytes));
     opts.extra_tserver_flags.emplace_back("--memory_limit_soft_percentage=0");
     // Since --memory_limit_soft_percentage=0, any nonzero block cache usage
     // will cause memory pressure. Since that's part of the point of the test,
     // we'll allow it.
     opts.extra_tserver_flags.push_back("--force_block_cache_capacity");
     return opts;
   }
 };

 // Stress test where, due to absurdly low memory limits, many client requests
 // are rejected, forcing the client to retry repeatedly.
 TEST_F(ClientStressTest_LowMemory, TestMemoryThrottling) {
 #ifdef THREAD_SANITIZER
   // TSAN tests run much slower, so we don't want to wait for as many
   // rejections before declaring the test to be passed.
   const int64_t kMinRejections = 20;
 #else
   const int64_t kMinRejections = 100;
 #endif

   const MonoDelta kMaxWaitTime = MonoDelta::FromSeconds(60);

   TestWorkload work(cluster_.get());
   work.Setup();
   work.Start();

   // Wait until we've rejected some number of requests.
   MonoTime deadline = MonoTime::Now() + kMaxWaitTime;
   while (true) {
     int64_t total_num_rejections = 0;

     // It can take some time for the tablets (and their metric entities) to
     // appear on every server. Rather than explicitly wait for that above,
     // we'll just treat the lack of a metric as non-fatal. If the entity
     // or metric is truly missing, we'll eventually timeout and fail.
     for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
       int64_t value;
       Status s = itest::GetInt64Metric(
           cluster_->tablet_server(i)->bound_http_hostport(),
           &METRIC_ENTITY_tablet,
           nullptr,
           &METRIC_leader_memory_pressure_rejections,
           "value",
           &value);
       if (!s.IsNotFound()) {
         ASSERT_OK(s);
         total_num_rejections += value;
       }
       s = itest::GetInt64Metric(
           cluster_->tablet_server(i)->bound_http_hostport(),
           &METRIC_ENTITY_tablet,
           nullptr,
           &METRIC_follower_memory_pressure_rejections,
           "value",
           &value);
       if (!s.IsNotFound()) {
         ASSERT_OK(s);
         total_num_rejections += value;
       }
     }
     if (total_num_rejections >= kMinRejections) {
       break;
     } else if (MonoTime::Now() > deadline) {
       FAIL() << "Ran for " << kMaxWaitTime.ToString() << ", deadline expired and only saw "
              << total_num_rejections << " memory rejections";
     }
     SleepFor(MonoDelta::FromMilliseconds(200));
   }
 }

 // This test just creates a bunch of clients and makes sure that the generated client ids
 // are unique among the created clients.
 TEST_F(ClientStressTest, TestUniqueClientIds) {
   set<string> client_ids;
   for (int i = 0; i < 1000; i++) {
     client::sp::shared_ptr<KuduClient> client;
     ASSERT_OK(cluster_->CreateClient(nullptr, &client));
     const string& client_id = client->data_->client_id_;
     auto result = client_ids.emplace(client_id);
     EXPECT_TRUE(result.second) << "Unique id generation failed. New client id: "
                                << client_id
                                << " had already been used. Generated ids: "
                                << JoinStrings(client_ids, ",");
   }
 }

 } // namespace kudu
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include <unistd.h>

	#include <cstdint>
	#include <memory>
	#include <ostream>
	#include <set>
	#include <string>
	#include <thread>
	#include <utility>
	#include <vector>

	#include <glog/logging.h>
	#include <gtest/gtest.h>

	#include "kudu/client/client-internal.h"
	#include "kudu/client/client-test-util.h"
	#include "kudu/client/client.h"
	#include "kudu/client/scan_predicate.h"
	#include "kudu/client/shared_ptr.h" // IWYU pragma: keep
	#include "kudu/client/value.h"
	#include "kudu/gutil/port.h"
	#include "kudu/gutil/strings/join.h"
	#include "kudu/gutil/strings/substitute.h"
	#include "kudu/integration-tests/cluster_itest_util.h"
	#include "kudu/integration-tests/test_workload.h"
	#include "kudu/mini-cluster/external_mini_cluster.h"
	#include "kudu/util/countdown_latch.h"
	#include "kudu/util/metrics.h"
	#include "kudu/util/monotime.h"
	#include "kudu/util/pstack_watcher.h"
	#include "kudu/util/random.h"
	#include "kudu/util/status.h"
	#include "kudu/util/test_macros.h"
	#include "kudu/util/test_util.h"

	METRIC_DECLARE_entity(tablet);
	METRIC_DECLARE_counter(leader_memory_pressure_rejections);
	METRIC_DECLARE_counter(follower_memory_pressure_rejections);

	using kudu::client::KuduClient;
	using kudu::client::KuduScanner;
	using kudu::client::KuduTable;
	using kudu::cluster::ExternalMiniCluster;
	using kudu::cluster::ExternalMiniClusterOptions;
	using std::set;
	using std::string;
	using std::thread;
	using std::unique_ptr;
	using std::vector;
	using strings::Substitute;

	namespace kudu {

	class ClientStressTest : public KuduTest {
	public:
	virtual void SetUp() OVERRIDE {
	KuduTest::SetUp();

	ExternalMiniClusterOptions opts = default_opts();
	if (multi_master()) {
	opts.num_masters = 3;
	}
	opts.num_tablet_servers = 3;
	cluster_.reset(new ExternalMiniCluster(std::move(opts)));
	ASSERT_OK(cluster_->Start());
	}

	virtual void TearDown() OVERRIDE {
	alarm(0);
	cluster_->Shutdown();
	KuduTest::TearDown();
	}

	protected:
	void ScannerThread(KuduClient* client, const CountDownLatch* go_latch, int32_t start_key) {
	client::sp::shared_ptr<KuduTable> table;
	CHECK_OK(client->OpenTable(TestWorkload::kDefaultTableName, &table));
	vector<string> rows;

	go_latch->Wait();

	KuduScanner scanner(table.get());
	CHECK_OK(scanner.AddConjunctPredicate(table->NewComparisonPredicate(
	"key", client::KuduPredicate::GREATER_EQUAL,
	client::KuduValue::FromInt(start_key))));
	CHECK_OK(ScanToStrings(&scanner, &rows));
	}

	virtual bool multi_master() const {
	return false;
	}

	virtual ExternalMiniClusterOptions default_opts() const {
	return ExternalMiniClusterOptions();
	}

	unique_ptr<ExternalMiniCluster> cluster_;
	};

	// Stress test a case where most of the operations are expected to time out.
	// This is a regression test for various bugs we've seen in timeout handling,
	// especially with concurrent requests.
	TEST_F(ClientStressTest, TestLookupTimeouts) {
	const int kSleepMillis = AllowSlowTests() ? 5000 : 100;

	TestWorkload work(cluster_.get());
	work.set_num_write_threads(64);
	work.set_write_timeout_millis(10);
	work.set_timeout_allowed(true);
	work.Setup();
	work.Start();
	SleepFor(MonoDelta::FromMilliseconds(kSleepMillis));
	}

	// Regression test for KUDU-1104, a race in which multiple scanners racing to populate a
	// cold meta cache on a shared Client would crash.
	//
	// This test creates a table with a lot of tablets (so that we require many round-trips to
	// the master to populate the meta cache) and then starts a bunch of parallel threads which
	// scan starting at random points in the key space.
	TEST_F(ClientStressTest, TestStartScans) {
	for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
	ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), "log_preallocate_segments", "0"));
	}
	TestWorkload work(cluster_.get());
	work.set_num_tablets(40);
	work.set_num_replicas(1);
	work.Setup();

	// We run the guts of the test several times -- it takes a while to build
	// the 40 tablets above, but the actual scans are very fast since the table
	// is empty.
	for (int run = 1; run <= (AllowSlowTests() ? 10 : 2); run++) {
	LOG(INFO) << "Starting run " << run;
	client::sp::shared_ptr<KuduClient> client;
	CHECK_OK(cluster_->CreateClient(nullptr, &client));

	CountDownLatch go_latch(1);
	constexpr int kNumThreads = 60;
	vector<thread> threads;
	threads.reserve(kNumThreads);
	Random rng(run);
	for (int i = 0; i < kNumThreads; i++) {
	int32_t start_key = rng.Next32();
	threads.emplace_back([this, client, &go_latch, start_key]() {
	this->ScannerThread(client.get(), &go_latch, start_key);
	});
	}
	SleepFor(MonoDelta::FromMilliseconds(50));

	go_latch.CountDown();

	for (auto& t : threads) {
	t.join();
	}
	}
	}

	// Override the base test to run in multi-master mode.
	class ClientStressTest_MultiMaster : public ClientStressTest {
	protected:
	virtual bool multi_master() const OVERRIDE {
	return true;
	}
	};

	// Stress test a case where most of the operations are expected to time out.
	// This is a regression test for KUDU-614 - it would cause a deadlock prior
	// to fixing that bug.
	TEST_F(ClientStressTest_MultiMaster, TestLeaderResolutionTimeout) {
	TestWorkload work(cluster_.get());
	work.set_num_write_threads(64);

	// This timeout gets applied to the master requests. It's lower than the
	// amount of time that we sleep the masters, to ensure they timeout.
	work.set_client_default_rpc_timeout_millis(250);
	// This is the time budget for the whole request. It has to be longer than
	// the above timeout so that the client actually attempts to resolve
	// the leader.
	work.set_write_timeout_millis(280);
	work.set_timeout_allowed(true);
	work.Setup();

	work.Start();

	ASSERT_OK(cluster_->tablet_server(0)->Pause());
	ASSERT_OK(cluster_->tablet_server(1)->Pause());
	ASSERT_OK(cluster_->tablet_server(2)->Pause());
	ASSERT_OK(cluster_->master(0)->Pause());
	ASSERT_OK(cluster_->master(1)->Pause());
	ASSERT_OK(cluster_->master(2)->Pause());
	SleepFor(MonoDelta::FromMilliseconds(300));
	ASSERT_OK(cluster_->tablet_server(0)->Resume());
	ASSERT_OK(cluster_->tablet_server(1)->Resume());
	ASSERT_OK(cluster_->tablet_server(2)->Resume());
	ASSERT_OK(cluster_->master(0)->Resume());
	ASSERT_OK(cluster_->master(1)->Resume());
	ASSERT_OK(cluster_->master(2)->Resume());
	SleepFor(MonoDelta::FromMilliseconds(100));

	// Set an explicit timeout. This test has caused deadlocks in the past.
	// Also make sure to dump stacks before the alarm goes off.
	PstackWatcher watcher(MonoDelta::FromSeconds(30));
	alarm(60);
	}


	// Override the base test to start a cluster with a low memory limit.
	class ClientStressTest_LowMemory : public ClientStressTest {
	protected:
	virtual ExternalMiniClusterOptions default_opts() const OVERRIDE {
	// There's nothing scientific about this number; it must be low enough to
	// trigger memory pressure request rejection yet high enough for the
	// servers to make forward progress.
	const int kMemLimitBytes = 64 * 1024 * 1024;
	ExternalMiniClusterOptions opts;
	opts.extra_tserver_flags.push_back(Substitute(
	"--memory_limit_hard_bytes=$0", kMemLimitBytes));
	opts.extra_tserver_flags.emplace_back("--memory_limit_soft_percentage=0");
	// Since --memory_limit_soft_percentage=0, any nonzero block cache usage
	// will cause memory pressure. Since that's part of the point of the test,
	// we'll allow it.
	opts.extra_tserver_flags.push_back("--force_block_cache_capacity");
	return opts;
	}
	};

	// Stress test where, due to absurdly low memory limits, many client requests
	// are rejected, forcing the client to retry repeatedly.
	TEST_F(ClientStressTest_LowMemory, TestMemoryThrottling) {
	#ifdef THREAD_SANITIZER
	// TSAN tests run much slower, so we don't want to wait for as many
	// rejections before declaring the test to be passed.
	const int64_t kMinRejections = 20;
	#else
	const int64_t kMinRejections = 100;
	#endif

	const MonoDelta kMaxWaitTime = MonoDelta::FromSeconds(60);

	TestWorkload work(cluster_.get());
	work.Setup();
	work.Start();

	// Wait until we've rejected some number of requests.
	MonoTime deadline = MonoTime::Now() + kMaxWaitTime;
	while (true) {
	int64_t total_num_rejections = 0;

	// It can take some time for the tablets (and their metric entities) to
	// appear on every server. Rather than explicitly wait for that above,
	// we'll just treat the lack of a metric as non-fatal. If the entity
	// or metric is truly missing, we'll eventually timeout and fail.
	for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
	int64_t value;
	Status s = itest::GetInt64Metric(
	cluster_->tablet_server(i)->bound_http_hostport(),
	&METRIC_ENTITY_tablet,
	nullptr,
	&METRIC_leader_memory_pressure_rejections,
	"value",
	&value);
	if (!s.IsNotFound()) {
	ASSERT_OK(s);
	total_num_rejections += value;
	}
	s = itest::GetInt64Metric(
	cluster_->tablet_server(i)->bound_http_hostport(),
	&METRIC_ENTITY_tablet,
	nullptr,
	&METRIC_follower_memory_pressure_rejections,
	"value",
	&value);
	if (!s.IsNotFound()) {
	ASSERT_OK(s);
	total_num_rejections += value;
	}
	}
	if (total_num_rejections >= kMinRejections) {
	break;
	} else if (MonoTime::Now() > deadline) {
	FAIL() << "Ran for " << kMaxWaitTime.ToString() << ", deadline expired and only saw "
	<< total_num_rejections << " memory rejections";
	}
	SleepFor(MonoDelta::FromMilliseconds(200));
	}
	}

	// This test just creates a bunch of clients and makes sure that the generated client ids
	// are unique among the created clients.
	TEST_F(ClientStressTest, TestUniqueClientIds) {
	set<string> client_ids;
	for (int i = 0; i < 1000; i++) {
	client::sp::shared_ptr<KuduClient> client;
	ASSERT_OK(cluster_->CreateClient(nullptr, &client));
	const string& client_id = client->data_->client_id_;
	auto result = client_ids.emplace(client_id);
	EXPECT_TRUE(result.second) << "Unique id generation failed. New client id: "
	<< client_id
	<< " had already been used. Generated ids: "
	<< JoinStrings(client_ids, ",");
	}
	}

	} // namespace kudu