| commit c4d37a7 |
| Author: Peter Collingbourne <pcc@google.com> |
| Date: Thu Nov 2 16:53:21 2017 -0700 |
| |
| Compute base addresses from program headers while reading /proc/self/maps. |
| |
| We previously had logic to compute the base address from program |
| headers as part of symbolization. The problem is that we need a correct |
| base address earlier in order to adjust a PC into the image's address |
| space, as these addresses can appear in unsymbolized output. |
| |
| There was previously an assumption that only the mapping that |
| was lowest in the address space did not need to be adjusted. This |
| assumption is not guaranteed (for example, the kernel may choose to |
| map an ET_DYN lowest) and in fact turned out to be wrong in binaries |
| linked with lld because the first mapping is read-only. |
| |
| The solution is to move the program header reading logic into the |
| code that reads /proc/self/maps. |
| |
| There is a change in semantics for clients that install a callback |
| using the InstallSymbolizeOpenObjectFileCallback function. Any such |
| clients will need to return a correct base address from the callback |
| by reading program headers using code similar to that in the function |
| OpenObjectFileContainingPcAndGetStartAddress. |
| |
| (Modified by Adar to remove changes to Makefile.am) |
| |
| diff --git a/src/symbolize.cc b/src/symbolize.cc |
| index 953f1db..98a754f 100644 |
| --- a/src/symbolize.cc |
| +++ b/src/symbolize.cc |
| @@ -56,6 +56,8 @@ |
| |
| #if defined(HAVE_SYMBOLIZE) |
| |
| +#include <string.h> |
| + |
| #include <limits> |
| |
| #include "symbolize.h" |
| @@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size, |
| // both regular and dynamic symbol tables if necessary. On success, |
| // write the symbol name to "out" and return true. Otherwise, return |
| // false. |
| -static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, |
| - char *out, int out_size, |
| - uint64_t map_base_address) { |
| +static bool GetSymbolFromObjectFile(const int fd, |
| + uint64_t pc, |
| + char* out, |
| + int out_size, |
| + uint64_t base_address) { |
| // Read the ELF header. |
| ElfW(Ehdr) elf_header; |
| if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { |
| return false; |
| } |
| |
| - uint64_t symbol_offset = 0; |
| - if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. |
| - ElfW(Phdr) phdr; |
| - // We need to find the PT_LOAD segment corresponding to the read-execute |
| - // file mapping in order to correctly perform the offset adjustment. |
| - for (unsigned i = 0; i != elf_header.e_phnum; ++i) { |
| - if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr), |
| - elf_header.e_phoff + i * sizeof(phdr))) |
| - return false; |
| - if (phdr.p_type == PT_LOAD && |
| - (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) { |
| - // Find the mapped address corresponding to virtual address zero. We do |
| - // this by first adding p_offset. This gives us the mapped address of |
| - // the start of the segment, or in other words the mapped address |
| - // corresponding to the virtual address of the segment. (Note that this |
| - // is distinct from the start address, as p_offset is not guaranteed to |
| - // be page aligned.) We then subtract p_vaddr, which takes us to virtual |
| - // address zero. |
| - symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr; |
| - break; |
| - } |
| - } |
| - if (symbol_offset == 0) |
| - return false; |
| - } |
| - |
| ElfW(Shdr) symtab, strtab; |
| |
| // Consult a regular symbol table first. |
| @@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, |
| symtab.sh_link * sizeof(symtab))) { |
| return false; |
| } |
| - if (FindSymbol(pc, fd, out, out_size, symbol_offset, |
| - &strtab, &symtab)) { |
| + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { |
| return true; // Found the symbol in a regular symbol table. |
| } |
| } |
| @@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, |
| symtab.sh_link * sizeof(symtab))) { |
| return false; |
| } |
| - if (FindSymbol(pc, fd, out, out_size, symbol_offset, |
| - &strtab, &symtab)) { |
| + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { |
| return true; // Found the symbol in a dynamic symbol table. |
| } |
| } |
| @@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
| int out_file_name_size) { |
| int object_fd; |
| |
| - // Open /proc/self/maps. |
| int maps_fd; |
| NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); |
| FileDescriptor wrapped_maps_fd(maps_fd); |
| @@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
| return -1; |
| } |
| |
| + int mem_fd; |
| + NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY)); |
| + FileDescriptor wrapped_mem_fd(mem_fd); |
| + if (wrapped_mem_fd.get() < 0) { |
| + return -1; |
| + } |
| + |
| // Iterate over maps and look for the map containing the pc. Then |
| // look into the symbol tables inside. |
| char buf[1024]; // Big enough for line of sane /proc/self/maps |
| @@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
| } |
| ++cursor; // Skip ' '. |
| |
| - // Check start and end addresses. |
| - if (!(start_address <= pc && pc < end_address)) { |
| - continue; // We skip this map. PC isn't in this map. |
| - } |
| - |
| // Read flags. Skip flags until we encounter a space or eol. |
| const char * const flags_start = cursor; |
| while (cursor < eol && *cursor != ' ') { |
| @@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
| return -1; // Malformed line. |
| } |
| |
| + // Determine the base address by reading ELF headers in process memory. |
| + ElfW(Ehdr) ehdr; |
| + // Skip non-readable maps. |
| + if (flags_start[0] == 'r' && |
| + ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) && |
| + memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) { |
| + switch (ehdr.e_type) { |
| + case ET_EXEC: |
| + base_address = 0; |
| + break; |
| + case ET_DYN: |
| + // Find the segment containing file offset 0. This will correspond |
| + // to the ELF header that we just read. Normally this will have |
| + // virtual address 0, but this is not guaranteed. We must subtract |
| + // the virtual address from the address where the ELF header was |
| + // mapped to get the base address. |
| + // |
| + // If we fail to find a segment for file offset 0, use the address |
| + // of the ELF header as the base address. |
| + base_address = start_address; |
| + for (unsigned i = 0; i != ehdr.e_phnum; ++i) { |
| + ElfW(Phdr) phdr; |
| + if (ReadFromOffsetExact( |
| + mem_fd, &phdr, sizeof(phdr), |
| + start_address + ehdr.e_phoff + i * sizeof(phdr)) && |
| + phdr.p_type == PT_LOAD && phdr.p_offset == 0) { |
| + base_address = start_address - phdr.p_vaddr; |
| + break; |
| + } |
| + } |
| + break; |
| + default: |
| + // ET_REL or ET_CORE. These aren't directly executable, so they don't |
| + // affect the base address. |
| + break; |
| + } |
| + } |
| + |
| + // Check start and end addresses. |
| + if (!(start_address <= pc && pc < end_address)) { |
| + continue; // We skip this map. PC isn't in this map. |
| + } |
| + |
| // Check flags. We are only interested in "r*x" maps. |
| if (flags_start[0] != 'r' || flags_start[2] != 'x') { |
| continue; // We skip this map. |
| @@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
| } |
| ++cursor; // Skip ' '. |
| |
| - // Don't subtract 'start_address' from the first entry: |
| - // * If a binary is compiled w/o -pie, then the first entry in |
| - // process maps is likely the binary itself (all dynamic libs |
| - // are mapped higher in address space). For such a binary, |
| - // instruction offset in binary coincides with the actual |
| - // instruction address in virtual memory (as code section |
| - // is mapped to a fixed memory range). |
| - // * If a binary is compiled with -pie, all the modules are |
| - // mapped high at address space (in particular, higher than |
| - // shadow memory of the tool), so the module can't be the |
| - // first entry. |
| - base_address = ((num_maps == 1) ? 0U : start_address) - file_offset; |
| - |
| // Skip to file name. "cursor" now points to dev. We need to |
| // skip at least two spaces for dev and inode. |
| int num_spaces = 0; |