# Copyright (c) 2012 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import bisect import re _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') _READELF_SECTION_HEADER_PATTER = re.compile( '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+' '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+' '([0-9]+)\s+([0-9]+)\s+([0-9]+)') class ParsingException(Exception): def __str__(self): return repr(self.args[0]) class AddressMapping(object): def __init__(self): self._symbol_map = {} def append(self, start, entry): self._symbol_map[start] = entry def find(self, address): return self._symbol_map.get(address) class RangeAddressMapping(AddressMapping): def __init__(self): AddressMapping.__init__(self) self._sorted_start_list = [] self._is_sorted = True def append(self, start, entry): if self._sorted_start_list: if self._sorted_start_list[-1] > start: self._is_sorted = False elif self._sorted_start_list[-1] == start: return self._sorted_start_list.append(start) self._symbol_map[start] = entry def find(self, address): if not self._is_sorted: self._sorted_start_list.sort() self._is_sorted = True found_index = bisect.bisect_left(self._sorted_start_list, address) found_start_address = self._sorted_start_list[found_index - 1] return self._symbol_map[found_start_address] class Procedure(object): """A class for a procedure symbol and an address range for the symbol.""" def __init__(self, start, end, name): self.start = start self.end = end self.name = name def __eq__(self, other): return (self.start == other.start and self.end == other.end and self.name == other.name) def __ne__(self, other): return not self.__eq__(other) def __str__(self): return '%x-%x: %s' % (self.start, self.end, self.name) class ElfSection(object): """A class for an elf section header.""" def __init__( self, number, name, stype, address, offset, size, es, flg, lk, inf, al): self.number = number self.name = name self.stype = stype self.address = address self.offset = offset self.size = size self.es = es self.flg = flg self.lk = lk self.inf = inf self.al = al def __eq__(self, other): return (self.number == other.number and self.name == other.name and self.stype == other.stype and self.address == other.address and self.offset == other.offset and self.size == other.size and self.es == other.es and self.flg == other.flg and self.lk == other.lk and self.inf == other.inf and self.al == other.al) def __ne__(self, other): return not self.__eq__(other) def __str__(self): return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name) class StaticSymbolsInFile(object): """Represents static symbol information in a binary file.""" def __init__(self, my_name): self.my_name = my_name self._elf_sections = [] self._procedures = RangeAddressMapping() self._typeinfos = AddressMapping() def _append_elf_section(self, elf_section): self._elf_sections.append(elf_section) def _append_procedure(self, start, procedure): self._procedures.append(start, procedure) def _append_typeinfo(self, start, typeinfo): self._typeinfos.append(start, typeinfo) def _find_symbol_by_runtime_address(self, address, vma, target): if not (vma.begin <= address < vma.end): return None if vma.name != self.my_name: return None file_offset = address - (vma.begin - vma.offset) elf_address = None for section in self._elf_sections: if section.offset <= file_offset < (section.offset + section.size): elf_address = section.address + file_offset - section.offset if not elf_address: return None return target.find(elf_address) def find_procedure_by_runtime_address(self, address, vma): return self._find_symbol_by_runtime_address(address, vma, self._procedures) def find_typeinfo_by_runtime_address(self, address, vma): return self._find_symbol_by_runtime_address(address, vma, self._typeinfos) def load_readelf_ew(self, f): found_header = False for line in f: if line.rstrip() == 'Section Headers:': found_header = True break if not found_header: return None for line in f: line = line.rstrip() matched = _READELF_SECTION_HEADER_PATTER.match(line) if matched: self._append_elf_section(ElfSection( int(matched.group(1), 10), # number matched.group(2), # name matched.group(3), # stype int(matched.group(4), 16), # address int(matched.group(5), 16), # offset int(matched.group(6), 16), # size matched.group(7), # es matched.group(8), # flg matched.group(9), # lk matched.group(10), # inf matched.group(11) # al )) else: if line in ('Key to Flags:', 'Program Headers:'): break @staticmethod def _parse_nm_bsd_line(line): if line[8] == ' ': return line[0:8], line[9], line[11:] elif line[16] == ' ': return line[0:16], line[17], line[19:] raise ParsingException('Invalid nm output.') @staticmethod def _get_short_function_name(function): while True: function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) if not number: break while True: function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) if not number: break return _LEADING_TYPE_PATTERN.sub('\g<1>', function) def load_nm_bsd(self, f, mangled=False): last_start = 0 routine = '' for line in f: line = line.rstrip() sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line) if sym_value[0] == ' ': continue start_val = int(sym_value, 16) if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and (not mangled and sym_name.startswith('typeinfo'))): self._append_typeinfo(start_val, sym_name) # It's possible for two symbols to share the same address, if # one is a zero-length variable (like __start_google_malloc) or # one symbol is a weak alias to another (like __libc_malloc). # In such cases, we want to ignore all values except for the # actual symbol, which in nm-speak has type "T". The logic # below does this, though it's a bit tricky: what happens when # we have a series of lines with the same address, is the first # one gets queued up to be processed. However, it won't # *actually* be processed until later, when we read a line with # a different address. That means that as long as we're reading # lines with the same address, we have a chance to replace that # item in the queue, which we do whenever we see a 'T' entry -- # that is, a line with type 'T'. If we never see a 'T' entry, # we'll just go ahead and process the first entry (which never # got touched in the queue), and ignore the others. if start_val == last_start and (sym_type == 't' or sym_type == 'T'): # We are the 'T' symbol at this address, replace previous symbol. routine = sym_name continue elif start_val == last_start: # We're not the 'T' symbol at this address, so ignore us. continue # Tag this routine with the starting address in case the image # has multiple occurrences of this routine. We use a syntax # that resembles template paramters that are automatically # stripped out by ShortFunctionName() sym_name += "<%016x>" % start_val if not mangled: routine = self._get_short_function_name(routine) self._append_procedure( last_start, Procedure(last_start, start_val, routine)) last_start = start_val routine = sym_name if not mangled: routine = self._get_short_function_name(routine) self._append_procedure( last_start, Procedure(last_start, last_start, routine))