| #!/usr/bin/env python3 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # This script automates symbol resolution for Breakpad minidumps |
| # under ideal circumstances. Specifically, it expects all the |
| # binaries to be in the same locations as when the minidump |
| # was taken. This is often true for minidumps on a developer |
| # workstation or at the end of an Impala test job. It finds Breakpad |
| # using environment variables from the Impala dev environment, |
| # so it must run inside the Impala dev environment. |
| # TODO: It may be possible to extend this to Docker images. |
| # |
| # Within this simple context, this script aims for complete |
| # symbol resolution. It uses Breakpad's minidump_dump utility |
| # to dump the minidump, then it parses the list of libraries |
| # that were used by the binary. It gets the symbols for all |
| # those libraries and resolves the minidump. |
| # |
| # Usage: resolve_minidumps.py --minidump_file [file] --output_file [file] |
| # (optional -v or --verbose for more output) |
| |
| import errno |
| import logging |
| import os |
| import re |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| import traceback |
| |
| from argparse import ArgumentParser |
| |
| |
| class ModuleInfo: |
| def __init__(self, code_file, code_id, debug_file, debug_id): |
| self.code_file = code_file |
| self.code_id = code_id |
| self.debug_file = debug_file |
| self.debug_id = debug_id |
| |
| |
| def read_module_info(minidump_dump_contents): |
| """Read the module information out of the minidump_dump raw contents. |
| This is expecting 'minidump_dump_contents' to be the minidump_dump |
| contents for the minidump split by newlines. |
| This will return a list of ModuleInfo objects. |
| """ |
| # Find the module_count |
| for idx, line in enumerate(minidump_dump_contents): |
| if line.strip().startswith("module_count"): |
| module_count = int(line.split("=")[1].strip()) |
| break |
| |
| # The minidump has a MDRawModule per module and it will have |
| # the same number of MDRawModule dumps as module_count. |
| module_boundaries = [] |
| for idx, line in enumerate(minidump_dump_contents): |
| if line.startswith("MDRawModule"): |
| module_boundaries.append(idx) |
| |
| if len(module_boundaries) != module_count: |
| logging.error("Failed to parse modules, mismatch in module count " |
| "({0} != {1})".format(len(module_boundaries), module_count)) |
| return None |
| |
| # Add one more entry to module_boundaries that is the end of the file |
| # That makes this more of a list of boundaries than the list of |
| # start locations. |
| module_boundaries.append(len(minidump_dump_contents)) |
| |
| modules = [] |
| for module_idx in range(module_count): |
| module_start = module_boundaries[module_idx] |
| module_end = module_boundaries[module_idx + 1] |
| |
| # Find the code_file |
| code_file = None |
| code_identifier = None |
| debug_file = None |
| debug_identifier = None |
| for line in minidump_dump_contents[module_start:module_end]: |
| if line.find("code_file") != -1: |
| code_file = line.split("=")[1].strip().strip('"') |
| elif line.find("code_identifier") != -1: |
| code_identifier = line.split("=")[1].strip().strip('"') |
| elif line.find("debug_file") != -1: |
| debug_file = line.split("=")[1].strip().strip('"') |
| elif line.find("debug_identifier") != -1: |
| debug_identifier = line.split("=")[1].strip().strip('"') |
| |
| # Important: it is ok for the fields to be the zero-length string. |
| # We just care that they are non-None (i.e. the loop above encountered |
| # them and parsed a value). |
| if code_file is None or code_identifier is None or debug_file is None or \ |
| debug_identifier is None: |
| logging.error("Failed to parse dump output, missing fields for MDRawModule " |
| "{0}".format(module_idx)) |
| return None |
| |
| # Jars and other files show up in this list, but they have |
| # code identifiers or debug identifiers as all zeros. Skip those, |
| # as there are no symbols to find. |
| if re.fullmatch("[0]+", code_identifier) or re.fullmatch("[0]+", debug_identifier): |
| continue |
| |
| # Skip cases where the code identifier or debug identifier are null |
| if len(code_identifier) == 0 or len(debug_identifier) == 0: |
| continue |
| |
| # linux-gate.so is a special case, and it is not an actual file on disk. |
| if code_file.startswith("linux-gate.so"): |
| continue |
| |
| modules.append(ModuleInfo(code_file, code_identifier, debug_file, debug_identifier)) |
| |
| return modules |
| |
| |
| def filter_shared_library_modules(module_list, lib_allow_list): |
| """Filter the list of modules by eliminating any shared libaries that do not match |
| one of the prefixes in the allow list. This keeps all non-shared libaries |
| (such as the main binary). |
| """ |
| filtered_module_list = [] |
| for module in module_list: |
| code_file_basename = os.path.basename(module.code_file) |
| # Keep anything that is not a shared library (e.g. the main binary) |
| if ".so" not in code_file_basename: |
| filtered_module_list.append(module) |
| continue |
| # Only keep shared libraries that match an entry on the allow list. |
| for allow_lib in lib_allow_list: |
| if code_file_basename.startswith(allow_lib): |
| filtered_module_list.append(module) |
| break |
| return filtered_module_list |
| |
| |
| def find_breakpad_home(): |
| """Locate the Breakpad home directory. |
| |
| We try to locate the package in the Impala toolchain folder. |
| """ |
| toolchain_packages_home = os.environ.get('IMPALA_TOOLCHAIN_PACKAGES_HOME') |
| if not toolchain_packages_home: |
| logging.error("IMPALA_TOOLCHAIN_PACKAGES_HOME is not set") |
| return None |
| |
| if not os.path.isdir(toolchain_packages_home): |
| logging.error("Could not find toolchain packages directory") |
| return None |
| breakpad_version = os.environ.get('IMPALA_BREAKPAD_VERSION') |
| if not breakpad_version: |
| logging.error("Could not determine breakpad version from toolchain") |
| return None |
| breakpad_dir = '{0}/breakpad-{1}'.format(toolchain_packages_home, breakpad_version) |
| if not os.path.isdir(breakpad_dir): |
| logging.error("Could not find breakpad directory") |
| return None |
| |
| return breakpad_dir |
| |
| |
| def find_breakpad_binary(binary_name): |
| """Locate the specified Breadpad binary""" |
| breakpad_home = find_breakpad_home() |
| if not breakpad_home: |
| return None |
| |
| binary_path = os.path.join(breakpad_home, 'bin', binary_name) |
| if not os.path.isfile(binary_path): |
| logging.error("Could not find {0} executable at {1}".format(binary_name, binary_path)) |
| return None |
| |
| return binary_path |
| |
| |
| def find_objcopy_binary(): |
| """Locate the 'objcopy' binary from Binutils. |
| |
| We try to locate the package in the Impala toolchain folder. |
| TODO: Fall back to finding objcopy in the system path. |
| """ |
| toolchain_packages_home = os.environ.get('IMPALA_TOOLCHAIN_PACKAGES_HOME') |
| if not toolchain_packages_home: |
| logging.error("IMPALA_TOOLCHAIN_PACKAGES_HOME is not set") |
| return None |
| |
| if not os.path.isdir(toolchain_packages_home): |
| logging.error("Could not find toolchain packages directory") |
| return None |
| binutils_version = os.environ.get('IMPALA_BINUTILS_VERSION') |
| if not binutils_version: |
| logging.error("Could not determine binutils version from toolchain") |
| return None |
| binutils_dir = "binutils-{0}".format(binutils_version) |
| objcopy = os.path.join(toolchain_packages_home, binutils_dir, 'bin', 'objcopy') |
| if not os.path.isfile(objcopy): |
| logging.error("Could not find objcopy executable at {0}".format(objcopy)) |
| return None |
| return objcopy |
| |
| |
| def ensure_dir_exists(path): |
| """Make sure the directory 'path' exists in a thread-safe way.""" |
| try: |
| os.makedirs(path) |
| except OSError as e: |
| if e.errno != errno.EEXIST or not os.path.isdir(path): |
| raise e |
| |
| |
| def dump_symbols_for_binary(dump_syms, objcopy, binary, out_dir): |
| """Dump symbols of a single binary file and move the result. |
| |
| Symbols will be extracted to a temporary file and moved into place afterwards. Required |
| directories will be created if necessary. |
| """ |
| logging.info("Processing binary file: {0}".format(binary)) |
| ensure_dir_exists(out_dir) |
| # tmp_fd will be closed when the file object created by os.fdopen() below gets |
| # destroyed. |
| tmp_fd, tmp_file = tempfile.mkstemp(dir=out_dir, suffix='.sym') |
| try: |
| # Create a temporary directory used for decompressing debug info |
| tempdir = tempfile.mkdtemp() |
| |
| # Binaries can contain compressed debug symbols. Breakpad currently |
| # does not support dumping symbols for binaries with compressed debug |
| # symbols. |
| # |
| # As a workaround, this uses objcopy to create a copy of the binary with |
| # the debug symbols decompressed. If the debug symbols are not compressed |
| # in the original binary, objcopy simply makes a copy of the binary. |
| # Breakpad is able to read symbols from the decompressed binary, and |
| # those symbols work correctly in resolving a minidump from the original |
| # compressed binary. |
| # TODO: In theory, this could work with the binary.debug_path. |
| binary_basename = os.path.basename(binary) |
| decompressed_binary = os.path.join(tempdir, binary_basename) |
| objcopy_retcode = subprocess.call([objcopy, "--decompress-debug-sections", |
| binary, decompressed_binary]) |
| |
| # Run dump_syms on the binary |
| # If objcopy failed for some reason, fall back to running dump_syms |
| # directly on the original binary. This is unlikely to work, but it is a way of |
| # guaranteeing that objcopy is not the problem. |
| args = [dump_syms, decompressed_binary] |
| if objcopy_retcode != 0: |
| sys.stderr.write('objcopy failed. Trying to run dump_sym directly.\n') |
| args = [dump_syms, binary] |
| |
| # Run dump_syms on the binary. |
| proc = subprocess.Popen(args, stdout=os.fdopen(tmp_fd, 'wb'), stderr=subprocess.PIPE) |
| _, stderr = proc.communicate() |
| if proc.returncode != 0: |
| sys.stderr.write('Failed to dump symbols from %s, return code %s\n' % |
| (binary, proc.returncode)) |
| sys.stderr.write(stderr.decode('utf-8')) |
| os.remove(tmp_file) |
| return False |
| # Parse the temporary file to determine the full target path. |
| with open(tmp_file, 'r') as f: |
| header = f.readline().strip() |
| # Format of header is: MODULE os arch binary_id binary |
| _, _, _, binary_id, binary = header.split(' ') |
| out_path = os.path.join(out_dir, binary, binary_id) |
| ensure_dir_exists(out_path) |
| # Move the temporary file to its final destination. |
| shutil.move(tmp_file, os.path.join(out_path, '%s.sym' % binary)) |
| except Exception as e: |
| # Only need to clean up in case of errors. |
| try: |
| os.remove(tmp_file) |
| except EnvironmentError: |
| pass |
| raise e |
| finally: |
| # Cleanup temporary directory |
| shutil.rmtree(tempdir) |
| return True |
| |
| |
| def dump_symbols_for_all_modules(dump_syms, objcopy, module_list, out_dir): |
| """Given a list of modules (ModuleInfo objects), dump symbols for |
| each library listed. |
| """ |
| for module in module_list: |
| success = dump_symbols_for_binary(dump_syms, objcopy, module.code_file, out_dir) |
| if not success: |
| logging.warning("Failed to dump symbols for {0}".format(module.code_file)) |
| |
| |
| def resolve_minidump(minidump_stackwalk, minidump_path, symbol_dir, verbose, out_file): |
| minidump_stackwalk_cmd = [minidump_stackwalk, minidump_path, symbol_dir] |
| # There are circumstances where the minidump_stackwalk can go wrong and become |
| # a runaway process capable of using all system memory. If the prlimit utility |
| # is present, we use it to apply a limit on the memory consumption. |
| # |
| # See if we have the prlimit utility |
| check_prlimit = subprocess.run(["prlimit", "-V"], stdout=subprocess.DEVNULL, |
| stderr=subprocess.DEVNULL) |
| if check_prlimit.returncode == 0: |
| # The prlimit utility is available, so wrap the minidump_stackwalk command |
| # to apply a 4GB limit on virtual memory. In normal operations, 4G is plenty. |
| prlimit_wrapper = ["prlimit", "--as={0}".format(4 * 1024 * 1024 * 1024)] |
| minidump_stackwalk_cmd = prlimit_wrapper + minidump_stackwalk_cmd |
| with open(out_file, "w") as out_f: |
| stderr_output = None if verbose else subprocess.DEVNULL |
| subprocess.run(minidump_stackwalk_cmd, stdout=out_f, |
| stderr=stderr_output, check=True) |
| |
| |
| def raw_dump_for_minidump(minidump_dump, minidump_path): |
| """Run minidump_dump on the specified minidump and split the output into lines""" |
| # minidump_dump sometimes returns an error code even though it produced usable output. |
| # So, this doesn't check the error code, and it relies on read_module_info() doing |
| # validation. |
| # |
| # Python 3.6 adjustments: |
| # 'capture_output=True' not supported: set stdout/stderr to subprocess.PIPE instead |
| # 'text=True' not supported: set 'universal_newlines=True' (the two are the same thing) |
| output = subprocess.run([minidump_dump, minidump_path], stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE, universal_newlines=True) |
| return output.stdout.split('\n') |
| |
| |
| def parse_args(): |
| """Parse command line arguments and perform sanity checks.""" |
| # TODO: |
| # - Add ability to specify Breakpad home |
| # - Add ability to specify the symbol directory location (for reuse) |
| # - Add ability to specify Binutils home |
| parser = ArgumentParser() |
| parser.add_argument('--minidump_file', required=True) |
| parser.add_argument('--output_file', required=True) |
| parser.add_argument('-v', '--verbose', action='store_true') |
| parser.add_argument('--safe_library_list', |
| default="libstdc++.so,libc.so,libjvm.so", |
| help="Comma-separate list of prefixes for allowed system libraries") |
| args = parser.parse_args() |
| return args |
| |
| |
| def dump_syms_and_resolve_stack(modules, minidump_file, output_file, verbose): |
| """Dump the symbols for the listed modules and use them to resolve the minidump.""" |
| # Create a temporary directory to store the symbols |
| # This automatically gets cleaned up |
| with tempfile.TemporaryDirectory() as tmp_dir: |
| # Dump symbols for all the modules into this temporary directory. |
| # Need both dump_syms and objcopy |
| dump_syms_bin = find_breakpad_binary("dump_syms") |
| if not dump_syms_bin: |
| logging.error("Could not find Breakpad dump_syms binary") |
| sys.exit(1) |
| objcopy_bin = find_objcopy_binary() |
| if not objcopy_bin: |
| logging.error("Could not find Binutils objcopy binary") |
| sys.exit(1) |
| dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir) |
| |
| # Resolve the minidump with the temporary symbol directory |
| minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk") |
| if not minidump_stackwalk_bin: |
| logging.error("Could not find Breakpad minidump_stackwalk binary") |
| sys.exit(1) |
| resolve_minidump(find_breakpad_binary("minidump_stackwalk"), minidump_file, |
| tmp_dir, verbose, output_file) |
| |
| |
| def main(): |
| args = parse_args() |
| |
| if args.verbose: |
| logging.basicConfig(level=logging.INFO) |
| else: |
| logging.basicConfig(level=logging.WARNING) |
| |
| # Step 1: Get the raw dump for the specified minidump |
| minidump_dump_bin = find_breakpad_binary("minidump_dump") |
| if not minidump_dump_bin: |
| logging.error("Could not find Breakpad minidump_dump binary") |
| sys.exit(1) |
| contents = raw_dump_for_minidump(minidump_dump_bin, args.minidump_file) |
| if not contents: |
| logging.error( |
| "minidump_dump could not get the contents of {0}".format(args.minidump_file)) |
| sys.exit(1) |
| |
| # Step 2: Parse the raw dump to get the list of code modules |
| # This is the list of things that have symbols we need to dump. |
| modules = read_module_info(contents) |
| if not modules: |
| logging.error("Failed to read modules for {0}".format(args.minidump_file)) |
| sys.exit(1) |
| |
| # Step 3: Dump the symbols and use them to resolve the minidump |
| # Sometimes there are libraries with corrupt/problematic symbols |
| # that can cause minidump_stackwalk to go haywire and use excessive |
| # memory. First, we try using symbols from all of the shared libraries. |
| # If that fails, we fallback to using a "safe" list of shared libraries. |
| try: |
| # Dump the symbols and use them to resolve the minidump |
| dump_syms_and_resolve_stack(modules, args.minidump_file, args.output_file, |
| args.verbose) |
| return |
| except Exception: |
| logging.warning("Encountered error: {0}".format(traceback.format_exc())) |
| logging.warning("Falling back to resolution using the safe library list") |
| logging.warning("Safe library list: {0}".format(args.safe_library_list)) |
| |
| # Limit the shared libraries to the "safe" list of shared libraries and |
| # try again. |
| if len(args.safe_library_list) == 0: |
| safe_library_list = [] |
| else: |
| safe_library_list = args.safe_library_list.split(",") |
| safe_modules = filter_shared_library_modules(modules, safe_library_list) |
| dump_syms_and_resolve_stack(safe_modules, args.minidump_file, args.output_file, |
| args.verbose) |
| |
| |
| if __name__ == "__main__": |
| main() |