src/kudu/util/debug-util-test.cc - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <dlfcn.h>
 #ifdef __linux__
 #include <link.h>
 #endif
 #include "kudu/util/debug-util.h"

 #include <unistd.h>

 #include <algorithm>
 #include <csignal>
 #include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <initializer_list>
 #include <memory>
 #include <ostream>
 #include <string>
 #include <vector>

 #include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 #include <glog/stl_logging.h>
 #include <gtest/gtest.h>

 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/util/array_view.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/kernel_stack_watchdog.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 #include "kudu/util/thread.h"

 using std::string;
 using std::vector;
 using strings::Substitute;

 DECLARE_int32(test_timeout_after);
 DECLARE_int32(stress_cpu_threads);

 namespace kudu {

 class DebugUtilTest : public KuduTest {
 };

 TEST_F(DebugUtilTest, TestStackTrace) {
   StackTrace t;
   t.Collect(0);
   string trace = t.Symbolize();
   ASSERT_STR_CONTAINS(trace, "kudu::DebugUtilTest_TestStackTrace_Test::TestBody");
 }

 // DumpThreadStack is only supported on Linux, since the implementation relies
 // on the tgkill syscall which is not portable.
 #if defined(__linux__)

 namespace {
 void SleeperThread(CountDownLatch* l) {
   // We use an infinite loop around WaitFor() instead of a normal Wait()
   // so that this test passes in TSAN. Without this, we run into this TSAN
   // bug which prevents the sleeping thread from handling signals:
   // https://code.google.com/p/thread-sanitizer/issues/detail?id=91
   while (!l->WaitFor(MonoDelta::FromMilliseconds(10))) {
   }
 }

 void fake_signal_handler(int signum) {}

 bool IsSignalHandlerRegistered(int signum) {
   struct sigaction cur_action;
   CHECK_EQ(0, sigaction(signum, nullptr, &cur_action));
   return cur_action.sa_handler != SIG_DFL;
 }
 } // anonymous namespace

 TEST_F(DebugUtilTest, TestStackTraceInvalidTid) {
   string s = DumpThreadStack(1);
   ASSERT_STR_CONTAINS(s, "unable to deliver signal");
 }

 TEST_F(DebugUtilTest, TestStackTraceSelf) {
   string s = DumpThreadStack(Thread::CurrentThreadId());
   ASSERT_STR_CONTAINS(s, "kudu::DebugUtilTest_TestStackTraceSelf_Test::TestBody()");
 }

 TEST_F(DebugUtilTest, TestStackTraceMainThread) {
   string s = DumpThreadStack(getpid());
   ASSERT_STR_CONTAINS(s, "kudu::DebugUtilTest_TestStackTraceMainThread_Test::TestBody()");
 }

 TEST_F(DebugUtilTest, TestSignalStackTrace) {
   CountDownLatch l(1);
   scoped_refptr<Thread> t;
   ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
   SCOPED_CLEANUP({
     // Allow the thread to finish.
     l.CountDown();
     t->Join();
   });

   // We have to loop a little bit because it takes a little while for the thread
   // to start up and actually call our function.
   //
   // Note: due to RELEASE build inlining, we need to make sure to pick a stack
   // frame that isn't optimized away. Different compilers behave differently, so
   // we'll use a regular expression to get maximal coverage.
   static constexpr const char* kTestThreadOneStackFrame =
       "SleeperThread";
   static constexpr const char* kTestThreadAnotherStackFrame =
       "kudu::ConditionVariable::WaitUntil()";
   static const string kStackFrameRegExp = Substitute(
       "$0|$1", kTestThreadOneStackFrame, kTestThreadAnotherStackFrame);
   ASSERT_EVENTUALLY([&]() {
     ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);
   });

   // Test that we can change the signal and that the stack traces still work,
   // on the new signal.
   ASSERT_FALSE(IsSignalHandlerRegistered(SIGHUP));
   ASSERT_OK(SetStackTraceSignal(SIGHUP));

   // Should now be registered.
   ASSERT_TRUE(IsSignalHandlerRegistered(SIGHUP));

   // SIGUSR2 should be relinquished.
   ASSERT_FALSE(IsSignalHandlerRegistered(SIGUSR2));

   // Stack traces should work using the new handler.
   ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);

   // Switch back to SIGUSR2 and ensure it changes back.
   ASSERT_OK(SetStackTraceSignal(SIGUSR2));
   ASSERT_TRUE(IsSignalHandlerRegistered(SIGUSR2));
   ASSERT_FALSE(IsSignalHandlerRegistered(SIGHUP));

   // Stack traces should work using the new handler.
   ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);

   // Register our own signal handler on SIGHUP, and ensure that
   // we get a bad Status if we try to use it.
   signal(SIGHUP, &fake_signal_handler);
   ASSERT_STR_CONTAINS(SetStackTraceSignal(SIGHUP).ToString(),
                       "unable to install signal handler");
   signal(SIGHUP, SIG_DFL);

   // Stack traces should be disabled
   ASSERT_STR_CONTAINS(DumpThreadStack(t->tid()), "unable to take thread stack");

   // Re-enable so that other tests pass.
   ASSERT_OK(SetStackTraceSignal(SIGUSR2));
 }

 // Test which dumps all known threads within this process.
 // We don't validate the results in any way -- but this verifies that we can
 // dump library threads such as the libc timer_thread and properly time out.
 TEST_F(DebugUtilTest, TestSnapshot) {
   // HACK: prior tests in this suite start threads. Even though they Join on the
   // threads before the test case finishes, there is actually a very short
   // period of time after Join() returns but before the actual thread has exited
   // and removed itself from /proc/self/task/. That means that 'ListThreads' below
   // can sometimes show these threads from prior test cases, and then the assertions
   // in this test case would fail.
   //
   // So, we have to wait here for the number of running threads to level off to the
   // expected value.
   // Ensure Kernel Stack Watchdog is running.
   KernelStackWatchdog::GetInstance();
   int initial_thread_count =
       1 // main thread
       + 1 // KernelStackWatchdog
       + (FLAGS_test_timeout_after > 0 ? 1 : 0) // test timeout thread if running
       + FLAGS_stress_cpu_threads;
 #ifdef THREAD_SANITIZER
   initial_thread_count++; // tsan signal thread
 #endif
   // The test and runtime environment runs various utility threads (for example,
   // the kernel stack watchdog, the TSAN runtime thread, the test timeout thread, etc).
   // Count them before we start any additional threads for this test.
   ASSERT_EVENTUALLY([&]{
       vector<pid_t> threads;
       ASSERT_OK(ListThreads(&threads));
       ASSERT_EQ(initial_thread_count, threads.size()) << threads;
     });

   // Start a bunch of sleeping threads.
   const int kNumThreads = 30;
   CountDownLatch l(1);
   vector<scoped_refptr<Thread>> threads(kNumThreads);
   for (int i = 0; i < kNumThreads; i++) {
     ASSERT_OK(Thread::Create("test", "test thread",
                              [&l]() { SleeperThread(&l); }, &threads[i]));
   }

   SCOPED_CLEANUP({
     // Allow the thread to finish.
     l.CountDown();
     for (auto& t : threads) {
       t->Join();
     }
   });

   StackTraceSnapshot snap;
   ASSERT_OK(snap.SnapshotAllStacks());
   int count = 0;
   int groups = 0;
   snap.VisitGroups([&](ArrayView<StackTraceSnapshot::ThreadInfo> group) {
       groups++;
       for (const auto& info : group) {
         count++;
         LOG(INFO) << info.tid << " " << info.thread_name
                   << " (" << info.status.ToString() << ")";
       }
       LOG(INFO) << group[0].stack.ToHexString();
     });
   int tsan_threads = 0;
 #ifdef THREAD_SANITIZER
   // TSAN starts an extra thread of its own.
   tsan_threads++;
 #endif
   ASSERT_EQ(kNumThreads + initial_thread_count, count);
   // The threads might not have exactly identical stacks, but
   // we should have far fewer groups than the total number
   // of threads.
   ASSERT_LE(groups, kNumThreads / 2);
   ASSERT_EQ(tsan_threads, snap.num_failed());
 }

 TEST_F(DebugUtilTest, Benchmark) {
   CountDownLatch l(1);
   scoped_refptr<Thread> t;
   ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
   SCOPED_CLEANUP({
     // Allow the thread to finish.
     l.CountDown();
     t->Join();
   });

   for (bool symbolize : {false, true}) {
     MonoTime end_time = MonoTime::Now() + MonoDelta::FromSeconds(1);
     int count = 0;
     volatile int prevent_optimize = 0;
     while (MonoTime::Now() < end_time) {
       StackTrace trace;
       GetThreadStack(t->tid(), &trace);
       if (symbolize) {
         prevent_optimize += trace.Symbolize().size();
       }
       count++;
     }
     LOG(INFO) << "Throughput: " << count << " dumps/second (symbolize=" << symbolize << ")";
   }
 }

 int TakeStackTrace(struct dl_phdr_info* /*info*/, size_t /*size*/, void* data) {
   StackTrace* s = reinterpret_cast<StackTrace*>(data);
   s->Collect(0);
   return 0;
 }

 // Test that if we try to collect a stack trace while inside a libdl function
 // call that we properly return the bogus stack indicating the issue.
 //
 // This doesn't work in ThreadSanitizer since we don't intercept dl_iterate_phdr
 // in those builds (see note in unwind_safeness.cc).
 #ifndef THREAD_SANITIZER
 TEST_F(DebugUtilTest, TestUnwindWhileUnsafe) {
   StackTrace s;
   dl_iterate_phdr(&TakeStackTrace, &s);
   ASSERT_STR_CONTAINS(s.Symbolize(), "CouldNotCollectStackTraceBecauseInsideLibDl");
 }
 #endif

 int DoNothingDlCallback(struct dl_phdr_info* /*info*/, size_t /*size*/, void* /*data*/) {
   return 0;
 }

 // Parameterized test which performs various operations which might be dangerous to
 // collect a stack trace while the main thread tries to take stack traces.  These
 // operations are all possibly executed on normal application threads, so we need to
 // ensure that if we happen to gather the stack from a thread in the middle of the
 // function that we don't crash or deadlock.
 //
 // Example self-deadlock if we didn't have the appropriate workarounds in place:
 //  #0  __lll_lock_wait ()
 //  #1  0x00007ffff6f16e42 in __GI___pthread_mutex_lock
 //  #2  0x00007ffff6c8601f in __GI___dl_iterate_phdr
 //  #3  0x0000000000695b02 in dl_iterate_phdr
 //  #4  0x000000000056d013 in _ULx86_64_dwarf_find_proc_info
 //  #5  0x000000000056d1d5 in fetch_proc_info (c=c@ent
 //  #6  0x000000000056e2e7 in _ULx86_64_dwarf_find_save_
 //  #7  0x000000000056c1b9 in _ULx86_64_dwarf_step (c=c@
 //  #8  0x000000000056be21 in _ULx86_64_step
 //  #9  0x0000000000566b1d in google::GetStackTrace
 //  #10 0x00000000004dc4d1 in kudu::StackTrace::Collect
 //  #11 kudu::(anonymous namespace)::HandleStackTraceSignal
 //  #12 <signal handler called>
 //  #13 0x00007ffff6f16e31 in __GI___pthread_mutex_lock
 //  #14 0x00007ffff6c8601f in __GI___dl_iterate_phdr
 //  #15 0x0000000000695b02 in dl_iterate_phdr
 enum DangerousOp {
   DLOPEN_AND_CLOSE,
   DL_ITERATE_PHDR,
   GET_STACK_TRACE,
   MALLOC_AND_FREE
 };
 class RaceTest : public DebugUtilTest, public ::testing::WithParamInterface<DangerousOp> {};
 INSTANTIATE_TEST_CASE_P(DifferentRaces, RaceTest,
                         ::testing::Values(DLOPEN_AND_CLOSE,
                                           DL_ITERATE_PHDR,
                                           GET_STACK_TRACE,
                                           MALLOC_AND_FREE));

 void DangerousOperationThread(DangerousOp op, CountDownLatch* l) {
   while (l->count()) {
     switch (op) {
       case DLOPEN_AND_CLOSE: {
         // Check races against dlopen/dlclose.
         void* v = dlopen("libc.so.6", RTLD_LAZY);
         CHECK(v);
         dlclose(v);
         break;
       }

       case DL_ITERATE_PHDR: {
         // Check for races against dl_iterate_phdr.
         dl_iterate_phdr(&DoNothingDlCallback, nullptr);
         break;
       }

       case GET_STACK_TRACE: {
         // Check for reentrancy issues
         GetStackTrace();
         break;
       }

       case MALLOC_AND_FREE: {
         // Check large allocations in tcmalloc.
         volatile char* x = new char[1024 * 1024 * 2];
         delete[] x;
         break;
       }
       default:
         LOG(FATAL) << "unknown op";
     }
   }
 }

 // Starts a thread performing dangerous operations and then gathers
 // its stack trace in a loop trying to trigger races.
 TEST_P(RaceTest, TestStackTraceRaces) {
   DangerousOp op = GetParam();
   CountDownLatch l(1);
   scoped_refptr<Thread> t;
   ASSERT_OK(Thread::Create("test", "test thread",
                            [op, &l]() { DangerousOperationThread(op, &l); }, &t));
   SCOPED_CLEANUP({
     // Allow the thread to finish.
     l.CountDown();
     // Crash if we can't join the thread after a reasonable amount of time.
     // That probably indicates a deadlock.
     CHECK_OK(ThreadJoiner(t.get()).give_up_after_ms(10000).Join());
   });
   MonoTime end_time = MonoTime::Now() + MonoDelta::FromSeconds(1);
   while (MonoTime::Now() < end_time) {
     StackTrace trace;
     GetThreadStack(t->tid(), &trace);
   }
 }

 void BlockSignalsThread() {
   sigset_t set;
   sigemptyset(&set);
   sigaddset(&set, SIGUSR2);
   for (int i = 0; i < 3; i++) {
     CHECK_ERR(pthread_sigmask((i % 2) ? SIG_UNBLOCK : SIG_BLOCK, &set, nullptr));
     SleepFor(MonoDelta::FromSeconds(1));
   }
 }

 TEST_F(DebugUtilTest, TestThreadBlockingSignals) {
   scoped_refptr<Thread> t;
   ASSERT_OK(Thread::Create("test", "test thread", &BlockSignalsThread, &t));
   SCOPED_CLEANUP({ t->Join(); });
   string ret;
   while (ret.find("unable to deliver signal") == string::npos) {
     ret = DumpThreadStack(t->tid());
     LOG(INFO) << ret;
   }
 }

 // Test stack traces which time out despite the destination thread not blocking
 // signals.
 TEST_F(DebugUtilTest, TestTimeouts) {
   const int kRunTimeSecs = AllowSlowTests() ? 5 : 1;

   CountDownLatch l(1);
   scoped_refptr<Thread> t;
   ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
   SCOPED_CLEANUP({
     // Allow the thread to finish.
     l.CountDown();
     t->Join();
   });

   // First, time a few stack traces to determine how long a non-timed-out stack
   // trace takes.
   vector<MicrosecondsInt64> durations;
   for (int i = 0; i < 20; i++) {
     StackTrace stack;
     auto st = GetMonoTimeMicros();
     ASSERT_OK(GetThreadStack(t->tid(), &stack));
     auto dur = GetMonoTimeMicros() - st;
     durations.push_back(dur);
   }

   // Compute the median to throw out outliers.
   std::sort(durations.begin(), durations.end());
   auto median_duration = durations[durations.size() / 2];
   LOG(INFO) << "Median duration: " << median_duration << "us";

   // Now take a bunch of stack traces with timeouts clustered around
   // the expected time. When we time out, we adjust the timeout to be
   // higher so the next attempt is less likely to time out. Conversely,
   // when we succeed, we adjust the timeout to be shorter so the next
   // attempt is more likely to time out. This has the effect of triggering
   // all the interesting cases: (a) success, (b) timeout, (c) timeout
   // exactly as the signal finishes.
   int num_timeouts = 0;
   int num_successes = 0;
   auto end_time = MonoTime::Now() + MonoDelta::FromSeconds(kRunTimeSecs);
   int64_t timeout_us = median_duration;
   while (MonoTime::Now() < end_time) {
     StackTraceCollector stc;
     // Allocate Stack on the heap so that if we get a use-after-free it
     // will be caught more easily by ASAN.
     std::unique_ptr<StackTrace> stack(new StackTrace());
     ASSERT_OK(stc.TriggerAsync(t->tid(), stack.get()));
     Status s = stc.AwaitCollection(MonoTime::Now() + MonoDelta::FromMicroseconds(timeout_us));
     if (s.ok()) {
       num_successes++;
       timeout_us--;
     } else if (s.IsTimedOut()) {
       num_timeouts++;
       timeout_us++;
     } else {
       FAIL() << "Unexpected status: " << s.ToString();
     }
   }
   LOG(INFO) << "Timed out " << num_timeouts << " times";
   LOG(INFO) << "Succeeded " << num_successes << " times";
 }

 #endif
 } // namespace kudu
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include <dlfcn.h>
	#ifdef __linux__
	#include <link.h>
	#endif
	#include "kudu/util/debug-util.h"

	#include <unistd.h>

	#include <algorithm>
	#include <csignal>
	#include <cstddef>
	#include <cstdint>
	#include <functional>
	#include <initializer_list>
	#include <memory>
	#include <ostream>
	#include <string>
	#include <vector>

	#include <gflags/gflags_declare.h>
	#include <glog/logging.h>
	#include <glog/stl_logging.h>
	#include <gtest/gtest.h>

	#include "kudu/gutil/ref_counted.h"
	#include "kudu/gutil/strings/substitute.h"
	#include "kudu/gutil/walltime.h"
	#include "kudu/util/array_view.h"
	#include "kudu/util/countdown_latch.h"
	#include "kudu/util/kernel_stack_watchdog.h"
	#include "kudu/util/monotime.h"
	#include "kudu/util/scoped_cleanup.h"
	#include "kudu/util/status.h"
	#include "kudu/util/test_macros.h"
	#include "kudu/util/test_util.h"
	#include "kudu/util/thread.h"

	using std::string;
	using std::vector;
	using strings::Substitute;

	DECLARE_int32(test_timeout_after);
	DECLARE_int32(stress_cpu_threads);

	namespace kudu {

	class DebugUtilTest : public KuduTest {
	};

	TEST_F(DebugUtilTest, TestStackTrace) {
	StackTrace t;
	t.Collect(0);
	string trace = t.Symbolize();
	ASSERT_STR_CONTAINS(trace, "kudu::DebugUtilTest_TestStackTrace_Test::TestBody");
	}

	// DumpThreadStack is only supported on Linux, since the implementation relies
	// on the tgkill syscall which is not portable.
	#if defined(__linux__)

	namespace {
	void SleeperThread(CountDownLatch* l) {
	// We use an infinite loop around WaitFor() instead of a normal Wait()
	// so that this test passes in TSAN. Without this, we run into this TSAN
	// bug which prevents the sleeping thread from handling signals:
	// https://code.google.com/p/thread-sanitizer/issues/detail?id=91
	while (!l->WaitFor(MonoDelta::FromMilliseconds(10))) {
	}
	}

	void fake_signal_handler(int signum) {}

	bool IsSignalHandlerRegistered(int signum) {
	struct sigaction cur_action;
	CHECK_EQ(0, sigaction(signum, nullptr, &cur_action));
	return cur_action.sa_handler != SIG_DFL;
	}
	} // anonymous namespace

	TEST_F(DebugUtilTest, TestStackTraceInvalidTid) {
	string s = DumpThreadStack(1);
	ASSERT_STR_CONTAINS(s, "unable to deliver signal");
	}

	TEST_F(DebugUtilTest, TestStackTraceSelf) {
	string s = DumpThreadStack(Thread::CurrentThreadId());
	ASSERT_STR_CONTAINS(s, "kudu::DebugUtilTest_TestStackTraceSelf_Test::TestBody()");
	}

	TEST_F(DebugUtilTest, TestStackTraceMainThread) {
	string s = DumpThreadStack(getpid());
	ASSERT_STR_CONTAINS(s, "kudu::DebugUtilTest_TestStackTraceMainThread_Test::TestBody()");
	}

	TEST_F(DebugUtilTest, TestSignalStackTrace) {
	CountDownLatch l(1);
	scoped_refptr<Thread> t;
	ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
	SCOPED_CLEANUP({
	// Allow the thread to finish.
	l.CountDown();
	t->Join();
	});

	// We have to loop a little bit because it takes a little while for the thread
	// to start up and actually call our function.
	//
	// Note: due to RELEASE build inlining, we need to make sure to pick a stack
	// frame that isn't optimized away. Different compilers behave differently, so
	// we'll use a regular expression to get maximal coverage.
	static constexpr const char* kTestThreadOneStackFrame =
	"SleeperThread";
	static constexpr const char* kTestThreadAnotherStackFrame =
	"kudu::ConditionVariable::WaitUntil()";
	static const string kStackFrameRegExp = Substitute(
	"$0\|$1", kTestThreadOneStackFrame, kTestThreadAnotherStackFrame);
	ASSERT_EVENTUALLY([&]() {
	ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);
	});

	// Test that we can change the signal and that the stack traces still work,
	// on the new signal.
	ASSERT_FALSE(IsSignalHandlerRegistered(SIGHUP));
	ASSERT_OK(SetStackTraceSignal(SIGHUP));

	// Should now be registered.
	ASSERT_TRUE(IsSignalHandlerRegistered(SIGHUP));

	// SIGUSR2 should be relinquished.
	ASSERT_FALSE(IsSignalHandlerRegistered(SIGUSR2));

	// Stack traces should work using the new handler.
	ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);

	// Switch back to SIGUSR2 and ensure it changes back.
	ASSERT_OK(SetStackTraceSignal(SIGUSR2));
	ASSERT_TRUE(IsSignalHandlerRegistered(SIGUSR2));
	ASSERT_FALSE(IsSignalHandlerRegistered(SIGHUP));

	// Stack traces should work using the new handler.
	ASSERT_STR_MATCHES(DumpThreadStack(t->tid()), kStackFrameRegExp);

	// Register our own signal handler on SIGHUP, and ensure that
	// we get a bad Status if we try to use it.
	signal(SIGHUP, &fake_signal_handler);
	ASSERT_STR_CONTAINS(SetStackTraceSignal(SIGHUP).ToString(),
	"unable to install signal handler");
	signal(SIGHUP, SIG_DFL);

	// Stack traces should be disabled
	ASSERT_STR_CONTAINS(DumpThreadStack(t->tid()), "unable to take thread stack");

	// Re-enable so that other tests pass.
	ASSERT_OK(SetStackTraceSignal(SIGUSR2));
	}

	// Test which dumps all known threads within this process.
	// We don't validate the results in any way -- but this verifies that we can
	// dump library threads such as the libc timer_thread and properly time out.
	TEST_F(DebugUtilTest, TestSnapshot) {
	// HACK: prior tests in this suite start threads. Even though they Join on the
	// threads before the test case finishes, there is actually a very short
	// period of time after Join() returns but before the actual thread has exited
	// and removed itself from /proc/self/task/. That means that 'ListThreads' below
	// can sometimes show these threads from prior test cases, and then the assertions
	// in this test case would fail.
	//
	// So, we have to wait here for the number of running threads to level off to the
	// expected value.
	// Ensure Kernel Stack Watchdog is running.
	KernelStackWatchdog::GetInstance();
	int initial_thread_count =
	1 // main thread
	+ 1 // KernelStackWatchdog
	+ (FLAGS_test_timeout_after > 0 ? 1 : 0) // test timeout thread if running
	+ FLAGS_stress_cpu_threads;
	#ifdef THREAD_SANITIZER
	initial_thread_count++; // tsan signal thread
	#endif
	// The test and runtime environment runs various utility threads (for example,
	// the kernel stack watchdog, the TSAN runtime thread, the test timeout thread, etc).
	// Count them before we start any additional threads for this test.
	ASSERT_EVENTUALLY([&]{
	vector<pid_t> threads;
	ASSERT_OK(ListThreads(&threads));
	ASSERT_EQ(initial_thread_count, threads.size()) << threads;
	});

	// Start a bunch of sleeping threads.
	const int kNumThreads = 30;
	CountDownLatch l(1);
	vector<scoped_refptr<Thread>> threads(kNumThreads);
	for (int i = 0; i < kNumThreads; i++) {
	ASSERT_OK(Thread::Create("test", "test thread",
	[&l]() { SleeperThread(&l); }, &threads[i]));
	}

	SCOPED_CLEANUP({
	// Allow the thread to finish.
	l.CountDown();
	for (auto& t : threads) {
	t->Join();
	}
	});

	StackTraceSnapshot snap;
	ASSERT_OK(snap.SnapshotAllStacks());
	int count = 0;
	int groups = 0;
	snap.VisitGroups([&](ArrayView<StackTraceSnapshot::ThreadInfo> group) {
	groups++;
	for (const auto& info : group) {
	count++;
	LOG(INFO) << info.tid << " " << info.thread_name
	<< " (" << info.status.ToString() << ")";
	}
	LOG(INFO) << group[0].stack.ToHexString();
	});
	int tsan_threads = 0;
	#ifdef THREAD_SANITIZER
	// TSAN starts an extra thread of its own.
	tsan_threads++;
	#endif
	ASSERT_EQ(kNumThreads + initial_thread_count, count);
	// The threads might not have exactly identical stacks, but
	// we should have far fewer groups than the total number
	// of threads.
	ASSERT_LE(groups, kNumThreads / 2);
	ASSERT_EQ(tsan_threads, snap.num_failed());
	}

	TEST_F(DebugUtilTest, Benchmark) {
	CountDownLatch l(1);
	scoped_refptr<Thread> t;
	ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
	SCOPED_CLEANUP({
	// Allow the thread to finish.
	l.CountDown();
	t->Join();
	});

	for (bool symbolize : {false, true}) {
	MonoTime end_time = MonoTime::Now() + MonoDelta::FromSeconds(1);
	int count = 0;
	volatile int prevent_optimize = 0;
	while (MonoTime::Now() < end_time) {
	StackTrace trace;
	GetThreadStack(t->tid(), &trace);
	if (symbolize) {
	prevent_optimize += trace.Symbolize().size();
	}
	count++;
	}
	LOG(INFO) << "Throughput: " << count << " dumps/second (symbolize=" << symbolize << ")";
	}
	}

	int TakeStackTrace(struct dl_phdr_info* /info/, size_t /size/, void* data) {
	StackTrace* s = reinterpret_cast<StackTrace*>(data);
	s->Collect(0);
	return 0;
	}

	// Test that if we try to collect a stack trace while inside a libdl function
	// call that we properly return the bogus stack indicating the issue.
	//
	// This doesn't work in ThreadSanitizer since we don't intercept dl_iterate_phdr
	// in those builds (see note in unwind_safeness.cc).
	#ifndef THREAD_SANITIZER
	TEST_F(DebugUtilTest, TestUnwindWhileUnsafe) {
	StackTrace s;
	dl_iterate_phdr(&TakeStackTrace, &s);
	ASSERT_STR_CONTAINS(s.Symbolize(), "CouldNotCollectStackTraceBecauseInsideLibDl");
	}
	#endif

	int DoNothingDlCallback(struct dl_phdr_info* /info/, size_t /size/, void* /data/) {
	return 0;
	}

	// Parameterized test which performs various operations which might be dangerous to
	// collect a stack trace while the main thread tries to take stack traces. These
	// operations are all possibly executed on normal application threads, so we need to
	// ensure that if we happen to gather the stack from a thread in the middle of the
	// function that we don't crash or deadlock.
	//
	// Example self-deadlock if we didn't have the appropriate workarounds in place:
	// #0 __lll_lock_wait ()
	// #1 0x00007ffff6f16e42 in __GI___pthread_mutex_lock
	// #2 0x00007ffff6c8601f in __GI___dl_iterate_phdr
	// #3 0x0000000000695b02 in dl_iterate_phdr
	// #4 0x000000000056d013 in _ULx86_64_dwarf_find_proc_info
	// #5 0x000000000056d1d5 in fetch_proc_info (c=c@ent
	// #6 0x000000000056e2e7 in _ULx86_64_dwarf_find_save_
	// #7 0x000000000056c1b9 in _ULx86_64_dwarf_step (c=c@
	// #8 0x000000000056be21 in _ULx86_64_step
	// #9 0x0000000000566b1d in google::GetStackTrace
	// #10 0x00000000004dc4d1 in kudu::StackTrace::Collect
	// #11 kudu::(anonymous namespace)::HandleStackTraceSignal
	// #12 <signal handler called>
	// #13 0x00007ffff6f16e31 in __GI___pthread_mutex_lock
	// #14 0x00007ffff6c8601f in __GI___dl_iterate_phdr
	// #15 0x0000000000695b02 in dl_iterate_phdr
	enum DangerousOp {
	DLOPEN_AND_CLOSE,
	DL_ITERATE_PHDR,
	GET_STACK_TRACE,
	MALLOC_AND_FREE
	};
	class RaceTest : public DebugUtilTest, public ::testing::WithParamInterface<DangerousOp> {};
	INSTANTIATE_TEST_CASE_P(DifferentRaces, RaceTest,
	::testing::Values(DLOPEN_AND_CLOSE,
	DL_ITERATE_PHDR,
	GET_STACK_TRACE,
	MALLOC_AND_FREE));

	void DangerousOperationThread(DangerousOp op, CountDownLatch* l) {
	while (l->count()) {
	switch (op) {
	case DLOPEN_AND_CLOSE: {
	// Check races against dlopen/dlclose.
	void* v = dlopen("libc.so.6", RTLD_LAZY);
	CHECK(v);
	dlclose(v);
	break;
	}

	case DL_ITERATE_PHDR: {
	// Check for races against dl_iterate_phdr.
	dl_iterate_phdr(&DoNothingDlCallback, nullptr);
	break;
	}

	case GET_STACK_TRACE: {
	// Check for reentrancy issues
	GetStackTrace();
	break;
	}

	case MALLOC_AND_FREE: {
	// Check large allocations in tcmalloc.
	volatile char* x = new char[1024 * 1024 * 2];
	delete[] x;
	break;
	}
	default:
	LOG(FATAL) << "unknown op";
	}
	}
	}

	// Starts a thread performing dangerous operations and then gathers
	// its stack trace in a loop trying to trigger races.
	TEST_P(RaceTest, TestStackTraceRaces) {
	DangerousOp op = GetParam();
	CountDownLatch l(1);
	scoped_refptr<Thread> t;
	ASSERT_OK(Thread::Create("test", "test thread",
	[op, &l]() { DangerousOperationThread(op, &l); }, &t));
	SCOPED_CLEANUP({
	// Allow the thread to finish.
	l.CountDown();
	// Crash if we can't join the thread after a reasonable amount of time.
	// That probably indicates a deadlock.
	CHECK_OK(ThreadJoiner(t.get()).give_up_after_ms(10000).Join());
	});
	MonoTime end_time = MonoTime::Now() + MonoDelta::FromSeconds(1);
	while (MonoTime::Now() < end_time) {
	StackTrace trace;
	GetThreadStack(t->tid(), &trace);
	}
	}

	void BlockSignalsThread() {
	sigset_t set;
	sigemptyset(&set);
	sigaddset(&set, SIGUSR2);
	for (int i = 0; i < 3; i++) {
	CHECK_ERR(pthread_sigmask((i % 2) ? SIG_UNBLOCK : SIG_BLOCK, &set, nullptr));
	SleepFor(MonoDelta::FromSeconds(1));
	}
	}

	TEST_F(DebugUtilTest, TestThreadBlockingSignals) {
	scoped_refptr<Thread> t;
	ASSERT_OK(Thread::Create("test", "test thread", &BlockSignalsThread, &t));
	SCOPED_CLEANUP({ t->Join(); });
	string ret;
	while (ret.find("unable to deliver signal") == string::npos) {
	ret = DumpThreadStack(t->tid());
	LOG(INFO) << ret;
	}
	}

	// Test stack traces which time out despite the destination thread not blocking
	// signals.
	TEST_F(DebugUtilTest, TestTimeouts) {
	const int kRunTimeSecs = AllowSlowTests() ? 5 : 1;

	CountDownLatch l(1);
	scoped_refptr<Thread> t;
	ASSERT_OK(Thread::Create("test", "test thread", [&l]() { SleeperThread(&l); }, &t));
	SCOPED_CLEANUP({
	// Allow the thread to finish.
	l.CountDown();
	t->Join();
	});

	// First, time a few stack traces to determine how long a non-timed-out stack
	// trace takes.
	vector<MicrosecondsInt64> durations;
	for (int i = 0; i < 20; i++) {
	StackTrace stack;
	auto st = GetMonoTimeMicros();
	ASSERT_OK(GetThreadStack(t->tid(), &stack));
	auto dur = GetMonoTimeMicros() - st;
	durations.push_back(dur);
	}

	// Compute the median to throw out outliers.
	std::sort(durations.begin(), durations.end());
	auto median_duration = durations[durations.size() / 2];
	LOG(INFO) << "Median duration: " << median_duration << "us";

	// Now take a bunch of stack traces with timeouts clustered around
	// the expected time. When we time out, we adjust the timeout to be
	// higher so the next attempt is less likely to time out. Conversely,
	// when we succeed, we adjust the timeout to be shorter so the next
	// attempt is more likely to time out. This has the effect of triggering
	// all the interesting cases: (a) success, (b) timeout, (c) timeout
	// exactly as the signal finishes.
	int num_timeouts = 0;
	int num_successes = 0;
	auto end_time = MonoTime::Now() + MonoDelta::FromSeconds(kRunTimeSecs);
	int64_t timeout_us = median_duration;
	while (MonoTime::Now() < end_time) {
	StackTraceCollector stc;
	// Allocate Stack on the heap so that if we get a use-after-free it
	// will be caught more easily by ASAN.
	std::unique_ptr<StackTrace> stack(new StackTrace());
	ASSERT_OK(stc.TriggerAsync(t->tid(), stack.get()));
	Status s = stc.AwaitCollection(MonoTime::Now() + MonoDelta::FromMicroseconds(timeout_us));
	if (s.ok()) {
	num_successes++;
	timeout_us--;
	} else if (s.IsTimedOut()) {
	num_timeouts++;
	timeout_us++;
	} else {
	FAIL() << "Unexpected status: " << s.ToString();
	}
	}
	LOG(INFO) << "Timed out " << num_timeouts << " times";
	LOG(INFO) << "Succeeded " << num_successes << " times";
	}

	#endif
	} // namespace kudu