blob: b550c03a9d31ec01b26088cc3c9edceba15fedc6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifdef BRPC_BTHREAD_TRACER
#include "bthread/task_tracer.h"
#include "bthread/processor.h"
#include "bthread/task_group.h"
#include "butil/fd_utility.h"
#include "butil/memory/scope_guard.h"
#include "butil/reloadable_flags.h"
#include "absl/debugging/stacktrace.h"
#include "absl/debugging/symbolize.h"
#include <csignal>
#include <gflags/gflags.h>
#include <poll.h>
#include <pthread.h>
#include <unistd.h>
namespace bthread {
DEFINE_uint32(signal_trace_timeout_ms, 50, "Timeout for signal trace in ms");
BUTIL_VALIDATE_GFLAG(signal_trace_timeout_ms, butil::PositiveInteger<uint32_t>);
// Note that SIGURG handler may be registered by some library such as cgo
// so let the signal number be configurable
DEFINE_int32(signal_number_for_trace, SIGURG,
"signal number used for stack trace, default to SIGURG");
extern BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta;
TaskTracer::SignalSync::~SignalSync() {
if (pipe_fds[0] >= 0) {
close(pipe_fds[0]);
}
if (pipe_fds[1] >= 0) {
close(pipe_fds[1]);
}
}
bool TaskTracer::SignalSync::Init() {
if (pipe(pipe_fds) != 0) {
PLOG(ERROR) << "Fail to pipe";
return false;
}
if (butil::make_non_blocking(pipe_fds[0]) != 0) {
PLOG(ERROR) << "Fail to make_non_blocking";
return false;
}
if (butil::make_non_blocking(pipe_fds[1]) != 0) {
PLOG(ERROR) << "Fail to make_non_blocking";
return false;
}
return true;
}
std::string TaskTracer::Result::OutputToString() const {
std::string str;
if (frame_count > 0) {
str.reserve(1024);
}
char symbol_name[128];
char unknown_symbol_name[] = "<unknown>";
if (frame_count > 0) {
for (size_t i = 0; i < frame_count; ++i) {
butil::string_appendf(&str, "#%zu 0x%16p ", i, ips[i]);
if (absl::Symbolize(ips[i], symbol_name, arraysize(symbol_name))) {
str.append(symbol_name);
} else {
str.append(unknown_symbol_name);
}
if (i + 1 < frame_count) {
str.push_back('\n');
}
}
} else {
str.append("No frame");
}
if (error) {
str.append("\nError message: ");
str.append(err_msg);
}
return str;
}
void TaskTracer::Result::OutputToStream(std::ostream& os) const {
char symbol_name[128];
char unknown_symbol_name[] = "<unknown>";
if (frame_count > 0) {
for (size_t i = 0; i < frame_count; ++i) {
os << "# " << i << " 0x" << std::hex << ips[i] << std::dec << " ";
if (absl::Symbolize(ips[i], symbol_name, arraysize(symbol_name))) {
os << symbol_name;
} else {
os << unknown_symbol_name;
}
if (i + 1 < frame_count) {
os << '\n';
}
}
} else {
os << "No frame";
}
if (error) {
os << "\nError message: " << err_msg;
}
}
bool TaskTracer::Init() {
if (_trace_time.expose("bthread_trace_time") != 0) {
LOG(ERROR) << "Fail to expose bthread_trace_time";
return false;
}
if (!RegisterSignalHandler()) {
return false;
}
// Warm up the libunwind.
unw_cursor_t cursor;
if (0 == unw_getcontext(&_context) && unw_init_local(&cursor, &_context) == 0) {
butil::ignore_result(TraceByLibunwind(cursor));
}
return true;
}
void TaskTracer::set_status(TaskStatus s, TaskMeta* m) {
CHECK_NE(TASK_STATUS_RUNNING, s) << "Use `set_running_status' instead";
CHECK_NE(TASK_STATUS_END, s) << "Use `set_end_status_unsafe' instead";
bool tracing = false;
{
BAIDU_SCOPED_LOCK(m->version_lock);
if (TASK_STATUS_UNKNOWN == m->status && TASK_STATUS_JUMPING == s) {
// Do not update status for jumping when bthread is ending.
return;
}
tracing = m->traced;
// bthread is scheduled for the first time.
if (TASK_STATUS_READY == s && NULL == m->stack) {
m->status = TASK_STATUS_FIRST_READY;
} else {
m->status = s;
}
if (TASK_STATUS_CREATED == s) {
m->worker_tid = pthread_t{};
}
}
// Make sure bthread does not jump stack when it is being traced.
if (tracing && TASK_STATUS_JUMPING == s) {
WaitForTracing(m);
}
}
void TaskTracer::set_running_status(pthread_t worker_tid, TaskMeta* m) {
BAIDU_SCOPED_LOCK(m->version_lock);
m->worker_tid = worker_tid;
m->status = TASK_STATUS_RUNNING;
}
bool TaskTracer::set_end_status_unsafe(TaskMeta* m) {
m->status = TASK_STATUS_END;
return m->traced;
}
std::string TaskTracer::Trace(bthread_t tid) {
return TraceImpl(tid).OutputToString();
}
void TaskTracer::Trace(std::ostream& os, bthread_t tid) {
TraceImpl(tid).OutputToStream(os);
}
void TaskTracer::WaitForTracing(TaskMeta* m) {
BAIDU_SCOPED_LOCK(m->trace_lock);
// Acquiring trace_lock means tracing is done.
}
TaskTracer::Result TaskTracer::TraceImpl(bthread_t tid) {
butil::Timer timer(butil::Timer::STARTED);
BRPC_SCOPE_EXIT {
timer.stop();
_trace_time << timer.n_elapsed();
};
if (tid == bthread_self() ||
(NULL != pthread_fake_meta && tid == pthread_fake_meta->tid)) {
return Result::MakeErrorResult("Forbid to trace self=%d", tid);
}
// Make sure only one bthread is traced at a time.
BAIDU_SCOPED_LOCK(_trace_request_mutex);
// The chance to remove unused SignalSyncs.
auto iter = std::remove_if(
_inuse_signal_syncs.begin(), _inuse_signal_syncs.end(),
[](butil::intrusive_ptr<SignalSync>& sync) {
return sync->ref_count() == 1;
});
_inuse_signal_syncs.erase(iter, _inuse_signal_syncs.end());
TaskMeta* m = TaskGroup::address_meta(tid);
if (NULL == m) {
return Result::MakeErrorResult("bthread=%d never existed", tid);
}
BAIDU_SCOPED_LOCK(m->trace_lock);
TaskStatus status;
pthread_t worker_tid;
const uint32_t given_version = get_version(tid);
{
BAIDU_SCOPED_LOCK(m->version_lock);
if (given_version != *m->version_butex) {
return Result::MakeErrorResult("bthread=%d not exist now", tid);
}
status = m->status;
if (TASK_STATUS_UNKNOWN == status) {
return Result::MakeErrorResult("bthread=%d not exist now", tid);
} else if (TASK_STATUS_CREATED == status) {
return Result::MakeErrorResult("bthread=%d has just been created", tid);
} else if (TASK_STATUS_FIRST_READY == status) {
return Result::MakeErrorResult("bthread=%d is scheduled for the first time", tid);
} else if (TASK_STATUS_END == status) {
return Result::MakeErrorResult("bthread=%d has ended", tid);
} else if (TASK_STATUS_JUMPING == status) {
return Result::MakeErrorResult("bthread=%d is jumping stack", tid);
}
// Start tracing.
m->traced = true;
worker_tid = m->worker_tid;
}
BRPC_SCOPE_EXIT {
{
BAIDU_SCOPED_LOCK(m->version_lock);
// If m->status is BTHREAD_STATUS_END, the bthread also waits for
// tracing completion, so given_version != *m->version_butex is OK.
m->traced = false;
}
};
// The status may be RUNNING, SUSPENDED, or READY, which is traceable.
if (TASK_STATUS_RUNNING == status) {
return SignalTrace(worker_tid);
} else if (TASK_STATUS_SUSPENDED == status || TASK_STATUS_READY == status) {
return ContextTrace(m->stack->context);
}
return Result::MakeErrorResult("Invalid TaskStatus=%d of bthread=%d", status, tid);
}
// Instruct ASan to ignore this function.
BUTIL_ATTRIBUTE_NO_SANITIZE_ADDRESS
unw_cursor_t TaskTracer::MakeCursor(bthread_fcontext_t fcontext) {
unw_cursor_t cursor;
unw_init_local(&cursor, &_context);
auto regs = reinterpret_cast<uintptr_t*>(fcontext);
// Only need RBP, RIP, RSP on x86_64.
// The base pointer (RBP).
if (unw_set_reg(&cursor, UNW_X86_64_RBP, regs[6]) != 0) {
LOG(ERROR) << "Fail to set RBP";
}
// The instruction pointer (RIP).
if (unw_set_reg(&cursor, UNW_REG_IP, regs[7]) != 0) {
LOG(ERROR) << "Fail to set RIP";
}
#if UNW_VERSION_MAJOR >= 1 && UNW_VERSION_MINOR >= 7
// The stack pointer (RSP).
if (unw_set_reg(&cursor, UNW_REG_SP, regs[8]) != 0) {
LOG(ERROR) << "Fail to set RSP";
}
#endif
return cursor;
}
TaskTracer::Result TaskTracer::ContextTrace(bthread_fcontext_t fcontext) {
unw_cursor_t cursor = MakeCursor(fcontext);
return TraceByLibunwind(cursor);
}
TaskTracer::Result TaskTracer::TraceByLibunwind(unw_cursor_t& cursor) {
Result result;
while (result.frame_count < arraysize(result.ips)) {
int rc = unw_step(&cursor);
if (0 == rc) {
break;
} else if (rc < 0) {
return Result::MakeErrorResult("Fail to unw_step, rc=%d", rc);
}
unw_word_t ip = 0;
if (0 != unw_get_reg(&cursor, UNW_REG_IP, &ip)) {
continue;
}
result.ips[result.frame_count] = reinterpret_cast<void*>(ip);
++result.frame_count;
}
return result;
}
bool TaskTracer::RegisterSignalHandler() {
// Set up the signal handler.
_signal_num = FLAGS_signal_number_for_trace;
struct sigaction old_sa{};
struct sigaction sa{};
sa.sa_sigaction = SignalHandler;
sa.sa_flags = SA_SIGINFO;
sigfillset(&sa.sa_mask);
if (sigaction(_signal_num, &sa, &old_sa) != 0) {
PLOG(ERROR) << "Failed to sigaction";
return false;
}
if (NULL != old_sa.sa_handler || NULL != old_sa.sa_sigaction) {
LOG(ERROR) << "Signal handler of signal number "
<< _signal_num << " is already registered";
return false;
}
return true;
}
// Caution: This function should be async-signal-safety.
void TaskTracer::SignalHandler(int, siginfo_t* info, void* context) {
ErrnoGuard guard;
// Ref has been taken before the signal is sent, so no need to add ref here.
butil::intrusive_ptr<SignalSync> signal_sync(
static_cast<SignalSync*>(info->si_value.sival_ptr), false);
if (NULL == signal_sync) {
// The signal is not from Tracer, such as TaskControl, do nothing.
return;
}
// Skip the first frame, which is the signal handler itself.
signal_sync->result.frame_count = absl::DefaultStackUnwinder(signal_sync->result.ips, NULL,
arraysize(signal_sync->result.ips), 1,
context, NULL);
// write() is async-signal-safe.
// Don't care about the return value.
butil::ignore_result(write(signal_sync->pipe_fds[1], "1", 1));
}
TaskTracer::Result TaskTracer::SignalTrace(pthread_t worker_tid) {
// CAUTION:
// https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues#libunwind
// Generally, libunwind promises async-signal-safety for capturing backtraces.
// But in practice, it is only partially async-signal-safe due to reliance on
// dl_iterate_phdr API, which is used to enumerate all loaded ELF modules
// (.so files and main executable binary). No libc offers dl_iterate_pdhr that
// is async-signal-safe. In practice, the issue may happen if we take tracing
// signal during an existing dl_iterate_phdr call (such as when the program
// throws an exception) or during dlopen/dlclose-ing some .so module.
// Deadlock call stack:
// #0 __lll_lock_wait (futex=futex@entry=0x7f0d3d7f0990 <_rtld_global+2352>, private=0) at lowlevellock.c:52
// #1 0x00007f0d3a73c131 in __GI___pthread_mutex_lock (mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
// #2 0x00007f0d38eb0231 in __GI___dl_iterate_phdr (callback=callback@entry=0x7f0d38c456a0 <_ULx86_64_dwarf_callback>, data=data@entry=0x7f0d07defad0) at dl-iteratephdr.c:40
// #3 0x00007f0d38c45d79 in _ULx86_64_dwarf_find_proc_info (as=0x7f0d38c4f340 <local_addr_space>, ip=ip@entry=139694791966897, pi=pi@entry=0x7f0d07df0498, need_unwind_info=need_unwind_info@entry=1, arg=0x7f0d07df0340) at dwarf/Gfind_proc_info-lsb.c:759
// #4 0x00007f0d38c43260 in fetch_proc_info (c=c@entry=0x7f0d07df0340, ip=139694791966897) at dwarf/Gparser.c:461
// #5 0x00007f0d38c44e46 in find_reg_state (sr=0x7f0d07defd10, c=0x7f0d07df0340) at dwarf/Gparser.c:925
// #6 _ULx86_64_dwarf_step (c=c@entry=0x7f0d07df0340) at dwarf/Gparser.c:972
// #7 0x00007f0d38c40c14 in _ULx86_64_step (cursor=cursor@entry=0x7f0d07df0340) at x86_64/Gstep.c:71
// #8 0x00007f0d399ed8f6 in GetStackTraceWithContext_libunwind (result=<optimized out>, max_depth=63, skip_count=132054887, ucp=<optimized out>) at src/stacktrace_libunwind-inl.h:138
// #9 0x00007f0d399ee083 in GetStackTraceWithContext (result=0x7f0d07df07b8, max_depth=63, skip_count=3, uc=0x7f0d07df0a40) at src/stacktrace.cc:305
// #10 0x00007f0d399ea992 in CpuProfiler::prof_handler (signal_ucontext=<optimized out>, cpu_profiler=0x7f0d399f6600, sig=<optimized out>) at src/profiler.cc:359
// #11 0x00007f0d399eb633 in ProfileHandler::SignalHandler (sig=27, sinfo=0x7f0d07df0b70, ucontext=0x7f0d07df0a40) at src/profile-handler.cc:530
// #12 <signal handler called>
// #13 0x00007f0d3a73c0b1 in __GI___pthread_mutex_lock (mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
// #14 0x00007f0d38eb0231 in __GI___dl_iterate_phdr (callback=0x7f0d38f525f0, data=0x7f0d07df10c0) at dl-iteratephdr.c:40
// #15 0x00007f0d38f536c1 in _Unwind_Find_FDE () from /lib/x86_64-linux-gnu/libgcc_s.so.1
// #16 0x00007f0d38f4f868 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
// #17 0x00007f0d38f50a20 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
// #18 0x00007f0d38f50f99 in _Unwind_RaiseException () from /lib/x86_64-linux-gnu/libgcc_s.so.1
// #19 0x00007f0d390088dc in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6
// #20 0x00007f0d3b5b2245 in __cxxabiv1::__cxa_throw (thrownException=0x7f0d114ea8c0, type=0x7f0d3d6dd830 <typeinfo for rockset::GRPCError>, destructor=<optimized out>) at /src/folly/folly/experimental/exception_tracer/ExceptionTracerLib.cpp:107
//
// Therefore, use async-signal-safe absl::DefaultStackUnwinder instead of libunwind.
if (pthread_equal(worker_tid, pthread_self())) {
return Result::MakeErrorResult("Forbid to trace self");
}
// Each signal trace has an independent SignalSync to
// prevent the previous SignalHandler from affecting the new SignalTrace.
butil::intrusive_ptr<SignalSync> signal_sync(new SignalSync());
if (!signal_sync->Init()) {
return Result::MakeErrorResult("Fail to init SignalSync");
}
// Add reference for SignalHandler.
signal_sync->AddRefManually();
sigval value{};
value.sival_ptr = signal_sync.get();
size_t sigqueue_try = 0;
while (pthread_sigqueue(worker_tid, _signal_num, value) != 0) {
if (errno != EAGAIN || sigqueue_try++ >= 3) {
// Remove reference for SignalHandler.
signal_sync->RemoveRefManually();
return Result::MakeErrorResult("Fail to pthread_sigqueue: %s", berror());
}
}
// Wait for the signal handler to complete.
butil::Timer timer;
if (FLAGS_signal_trace_timeout_ms > 0) {
timer.start();
}
pollfd poll_fd = {signal_sync->pipe_fds[0], POLLIN, 0};
// Wait for tracing to complete.
while (true) {
int timeout = -1;
if (FLAGS_signal_trace_timeout_ms > 0) {
timer.stop();
if (timer.m_elapsed() >= FLAGS_signal_trace_timeout_ms) {
return Result::MakeErrorResult("Timeout exceed %dms", FLAGS_signal_trace_timeout_ms);
}
timeout = (int64_t)FLAGS_signal_trace_timeout_ms - timer.m_elapsed();
}
// poll() is async-signal-safe.
// Self-pipe trick: https://man7.org/tlpi/code/online/dist/altio/self_pipe.c.html
int rc = poll(&poll_fd, 1, timeout);
if (-1 == rc) {
if (EINTR == errno) {
continue;
}
// Extend the lifetime of `signal_sync' to
// avoid it being destroyed in the signal handler.
_inuse_signal_syncs.push_back(signal_sync);
return Result::MakeErrorResult("Fail to poll: %s", berror());
}
break;
}
return signal_sync->result;
}
} // namespace bthread
#endif // BRPC_BTHREAD_TRACER