blob: 66310b4e6aba74ea5c260bbe5e68d2f630652721 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/util/minidump.h"
// IWYU pragma: no_include <features.h>
#include <unistd.h>
#include <atomic>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <functional>
#include <initializer_list>
#include <memory>
#include <ostream>
#include <string>
#if defined(__linux__)
#include <breakpad/client/linux/handler/exception_handler.h>
#include <breakpad/client/linux/handler/minidump_descriptor.h>
#include <breakpad/common/linux/linux_libc_support.h>
#include <breakpad/common/using_std_string.h>
#include <breakpad/third_party/lss/linux_syscall_support.h>
#endif // defined(__linux__)
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "kudu/gutil/macros.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/human_readable.h"
#include "kudu/util/env.h"
#include "kudu/util/env_util.h"
#include "kudu/util/errno.h"
#include "kudu/util/flag_tags.h"
#include "kudu/util/path_util.h"
#include "kudu/util/status.h"
#include "kudu/util/thread.h"
using kudu::env_util::CreateDirIfMissing;
using std::string;
#if defined(__linux__)
static constexpr bool kMinidumpPlatformSupported = true;
#else
static constexpr bool kMinidumpPlatformSupported = false;
#endif // defined(__linux__)
DECLARE_string(log_dir);
DECLARE_string(log_filename);
DEFINE_bool(enable_minidumps, kMinidumpPlatformSupported,
"Whether to enable minidump generation upon process crash or SIGUSR1. "
"Currently only supported on Linux systems.");
TAG_FLAG(enable_minidumps, advanced);
TAG_FLAG(enable_minidumps, evolving);
static bool ValidateMinidumpEnabled(const char* /*flagname*/, bool value) {
if (value && !kMinidumpPlatformSupported) {
return false; // NOLINT(*)
}
return true;
}
DEFINE_validator(enable_minidumps, &ValidateMinidumpEnabled);
DEFINE_string(minidump_path, "minidumps", "Directory to write minidump files to. This "
"can be either an absolute path or a path relative to --log_dir. Each daemon will "
"create an additional sub-directory to prevent naming conflicts and to make it "
"easier to identify a crashing daemon. Minidump files contain crash-related "
"information in a compressed format. Minidumps will be written when a daemon exits "
"unexpectedly, for example on an unhandled exception or signal, or when a "
"SIGUSR1 signal is sent to the process. Cannot be set to an empty value.");
TAG_FLAG(minidump_path, evolving);
// The minidump path cannot be empty.
static bool ValidateMinidumpPath(const char* /*flagname*/, const string& value) {
return !value.empty();
}
DEFINE_validator(minidump_path, &ValidateMinidumpPath);
DEFINE_int32(max_minidumps, 9, "Maximum number of minidump files to keep per daemon. "
"Older files are removed first. Set to 0 to keep all minidump files.");
TAG_FLAG(max_minidumps, evolving);
DEFINE_int32(minidump_size_limit_hint_kb, 20480, "Size limit hint for minidump files in "
"KB. If a minidump exceeds this value, then breakpad will reduce the stack memory it "
"collects for each thread from 8KB to 2KB. However it will always include the full "
"stack memory for the first 20 threads, including the thread that crashed.");
TAG_FLAG(minidump_size_limit_hint_kb, advanced);
TAG_FLAG(minidump_size_limit_hint_kb, evolving);
#if !defined(__linux__)
namespace google_breakpad {
// Define this as an empty class to avoid an undefined symbol error on Mac.
class ExceptionHandler {
public:
ExceptionHandler() {}
~ExceptionHandler() {}
};
} // namespace google_breakpad
#endif // !defined(__linux__)
namespace kudu {
static sigset_t GetSigset(int signo) {
sigset_t signals;
CHECK_EQ(0, sigemptyset(&signals));
CHECK_EQ(0, sigaddset(&signals, signo));
return signals;
}
#if defined(__linux__)
// Called by the exception handler before minidump is produced.
// Minidump is only written if this returns true.
static bool FilterCallback(void* /*context*/) {
return true;
}
// Write two null-terminated strings and a newline to both stdout and stderr.
static void WriteLineStdoutStderr(const char* msg1, const char* msg2) {
// We use Breakpad's reimplementation of strlen(), called my_strlen(), from
// linux_libc_support.h to avoid calling into libc.
// A comment from linux_libc_support.h is reproduced here:
// "This header provides replacements for libc functions that we need. If we
// call the libc functions directly we risk crashing in the dynamic linker as
// it tries to resolve uncached PLT entries."
int msg1_len = my_strlen(msg1);
int msg2_len = my_strlen(msg2);
// We use sys_write() from linux_syscall_support.h here per the
// recommendation of the breakpad docs for the same reasons as above.
for (int fd : {STDOUT_FILENO, STDERR_FILENO}) {
sys_write(fd, msg1, msg1_len);
sys_write(fd, msg2, msg2_len);
sys_write(fd, "\n", 1);
}
}
// Callback for breakpad. It is called whenever a minidump file has been
// written and should not be called directly. It logs the event before breakpad
// crashes the process. Due to the process being in a failed state we write to
// stdout/stderr and let the surrounding redirection make sure the output gets
// logged. The calls might still fail in unknown scenarios as the process is in
// a broken state. However we don't rely on them as the minidump file has been
// written already.
static bool DumpCallback(const google_breakpad::MinidumpDescriptor& descriptor,
void* context, bool succeeded) {
// Indicate whether a minidump file was written successfully. Write message
// to stdout/stderr, which will usually be captured in the INFO/ERROR log.
if (succeeded) {
WriteLineStdoutStderr("Wrote minidump to ", descriptor.path());
} else {
WriteLineStdoutStderr("Failed to write minidump to ", descriptor.path());
}
// If invoked by a user signal, return the actual success or failure of
// writing the minidump file so that we can print a user-friendly error
// message if writing the minidump fails.
bool is_user_signal = context != nullptr && *reinterpret_cast<bool*>(context);
if (is_user_signal) {
return succeeded;
}
// For crash signals. If we didn't want to invoke the previously-installed
// signal handler from glog, we would return the value received in
// 'succeeded' as described in the breakpad documentation. If this callback
// function returned true, breakpad would not invoke the previously-installed
// signal handler; instead, it would invoke the default signal handler, which
// would cause the process to crash immediately after writing the minidump.
//
// We make this callback always return false so that breakpad will invoke any
// previously-installed signal handler afterward. We want that to happen
// because the glog signal handlers print a helpful stacktrace on crash.
// That's convenient to have, because unlike a minidump, it doesn't need to
// be decoded to be useful for debugging.
return false;
}
// Failure function that simply calls abort().
ATTRIBUTE_NORETURN static void AbortFailureFunction() {
abort();
}
bool MinidumpExceptionHandler::WriteMinidump() {
bool user_signal = true;
return google_breakpad::ExceptionHandler::WriteMinidump(minidump_dir(),
&DumpCallback,
&user_signal);
}
Status MinidumpExceptionHandler::InitMinidumpExceptionHandler() {
minidump_dir_ = FLAGS_minidump_path;
if (minidump_dir_[0] != '/') {
minidump_dir_ = JoinPathSegments(FLAGS_log_dir, minidump_dir_);
}
// Create the first-level minidump directory.
Env* env = Env::Default();
RETURN_NOT_OK_PREPEND(CreateDirIfMissing(env, minidump_dir_),
"Error creating top-level minidump directory");
// Add the program_name to the path where minidumps will be written.
// This makes identification easier and prevents name collisions between the files.
// This is also consistent with how Impala organizes its minidump files.
// The log_filename flag will be used if non-empty, otherwise the executable name
// will be used.
const char* program_name = FLAGS_log_filename.empty() ? gflags::ProgramInvocationShortName() :
FLAGS_log_filename.c_str();
minidump_dir_ = JoinPathSegments(minidump_dir_, program_name);
// Create the directory if it is not there. The minidump doesn't get written if there is
// no directory.
RETURN_NOT_OK_PREPEND(CreateDirIfMissing(env, minidump_dir_),
"Error creating minidump directory");
// Verify that the minidump directory really is a directory. We canonicalize
// in case it's a symlink to a directory.
string canonical_minidump_path;
RETURN_NOT_OK(env->Canonicalize(minidump_dir_, &canonical_minidump_path));
bool is_dir;
RETURN_NOT_OK(env->IsDirectory(canonical_minidump_path, &is_dir));
if (!is_dir) {
return Status::IOError("Unable to create minidump directory", canonical_minidump_path);
}
google_breakpad::MinidumpDescriptor desc(minidump_dir_);
// Limit filesize if configured.
if (FLAGS_minidump_size_limit_hint_kb > 0) {
size_t size_limit = 1024 * static_cast<int64_t>(FLAGS_minidump_size_limit_hint_kb);
LOG(INFO) << "Setting minidump size limit to "
<< HumanReadableNumBytes::ToStringWithoutRounding(size_limit);
desc.set_size_limit(size_limit);
}
// If we don't uninstall the glog failure function when minidumps are enabled
// then we get two (2) stack traces printed from a LOG(FATAL) or CHECK(): one
// from the glog failure function and one from the glog signal handler. That
// is because we always return false in DumpCallback() in the non-user signal
// case.
google::InstallFailureFunction(&AbortFailureFunction);
breakpad_handler_.reset(
new google_breakpad::ExceptionHandler(desc, // Path to minidump directory.
FilterCallback, // Indicates whether to write the dump.
DumpCallback, // Output a log message when dumping.
nullptr, // Optional context for callbacks.
true, // Whether to install a crash handler.
-1)); // -1: Use in-process dump generation.
return Status::OK();
}
Status MinidumpExceptionHandler::RegisterMinidumpExceptionHandler() {
if (!FLAGS_enable_minidumps) return Status::OK();
// Ensure only one active instance is alive per process at any given time.
CHECK_EQ(0, MinidumpExceptionHandler::current_num_instances_.fetch_add(1));
RETURN_NOT_OK(InitMinidumpExceptionHandler());
RETURN_NOT_OK(StartUserSignalHandlerThread());
return Status::OK();
}
void MinidumpExceptionHandler::UnregisterMinidumpExceptionHandler() {
if (!FLAGS_enable_minidumps) return;
StopUserSignalHandlerThread();
CHECK_EQ(1, MinidumpExceptionHandler::current_num_instances_.fetch_sub(1));
}
Status MinidumpExceptionHandler::StartUserSignalHandlerThread() {
user_signal_handler_thread_running_.store(true, std::memory_order_relaxed);
return Thread::Create("minidump", "sigusr1-handler",
[this]() { this->RunUserSignalHandlerThread(); },
&user_signal_handler_thread_);
}
void MinidumpExceptionHandler::StopUserSignalHandlerThread() {
user_signal_handler_thread_running_.store(false, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release); // Store before signal.
// Send SIGUSR1 signal to thread, which will wake it up.
kill(getpid(), SIGUSR1);
user_signal_handler_thread_->Join();
}
void MinidumpExceptionHandler::RunUserSignalHandlerThread() {
sigset_t signals = GetSigset(SIGUSR1);
while (true) {
int signal;
int err = sigwait(&signals, &signal);
CHECK(err == 0) << "sigwait(): " << ErrnoToString(err) << ": " << err;
CHECK_EQ(SIGUSR1, signal);
if (!user_signal_handler_thread_running_.load(std::memory_order_relaxed)) {
// Exit thread if we are shutting down.
return;
}
if (!WriteMinidump()) {
LOG(WARNING) << "Received USR1 signal but failed to write minidump";
}
}
}
#else // defined(__linux__)
// At the time of writing, we don't support breakpad on Mac so we just stub out
// all the methods defined in the header file.
Status MinidumpExceptionHandler::InitMinidumpExceptionHandler() {
return Status::OK();
}
// No-op on non-Linux platforms.
Status MinidumpExceptionHandler::RegisterMinidumpExceptionHandler() {
return Status::OK();
}
void MinidumpExceptionHandler::UnregisterMinidumpExceptionHandler() {
}
bool MinidumpExceptionHandler::WriteMinidump() {
return true;
}
Status MinidumpExceptionHandler::StartUserSignalHandlerThread() {
return Status::OK();
}
void MinidumpExceptionHandler::StopUserSignalHandlerThread() {
}
void MinidumpExceptionHandler::RunUserSignalHandlerThread() {
}
#endif // defined(__linux__)
std::atomic<int> MinidumpExceptionHandler::current_num_instances_;
MinidumpExceptionHandler::MinidumpExceptionHandler() {
CHECK_OK(RegisterMinidumpExceptionHandler());
}
MinidumpExceptionHandler::~MinidumpExceptionHandler() {
UnregisterMinidumpExceptionHandler();
}
Status MinidumpExceptionHandler::DeleteExcessMinidumpFiles(Env* env) {
// Do not delete minidump files if minidumps are disabled.
if (!FLAGS_enable_minidumps) return Status::OK();
int32_t max_minidumps = FLAGS_max_minidumps;
// Disable rotation if set to 0 or less.
if (max_minidumps <= 0) return Status::OK();
// Minidump filenames are created by breakpad in the following format, for example:
// 7b57915b-ee6a-dbc5-21e59491-5c60a2cf.dmp.
string pattern = JoinPathSegments(minidump_dir(), "*.dmp");
// Use mtime to determine which minidumps to delete. While this could
// potentially be ambiguous if many minidumps were created in quick
// succession, users can always increase 'FLAGS_max_minidumps' if desired
// in order to work around the problem.
return env_util::DeleteExcessFilesByPattern(env, pattern, max_minidumps);
}
string MinidumpExceptionHandler::minidump_dir() const {
return minidump_dir_;
}
Status BlockSigUSR1() {
sigset_t signals = GetSigset(SIGUSR1);
int ret = pthread_sigmask(SIG_BLOCK, &signals, nullptr);
if (ret == 0) return Status::OK();
return Status::InvalidArgument("pthread_sigmask", ErrnoToString(ret), ret);
}
} // namespace kudu