#!/usr/bin/python # Copyright 2015 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Utilities to get and manipulate symbols from a binary.""" import collections import logging import os import re import subprocess import sys import cygprofile_utils sys.path.insert( 0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'third_party', 'android_platform', 'development', 'scripts')) import symbol _MAX_WARNINGS_TO_PRINT = 200 SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size', 'section')) def SetArchitecture(arch): """Set the architecture for binaries to be symbolized.""" symbol.ARCH = arch def _FromObjdumpLine(line): """Create a SymbolInfo by parsing a properly formatted objdump output line. Args: line: line from objdump Returns: An instance of SymbolInfo if the line represents a symbol, None otherwise. """ # All of the symbol lines we care about are in the form # 0000000000 g F .text.foo 000000000 [.hidden] foo # where g (global) might also be l (local) or w (weak). parts = line.split() if len(parts) < 6 or parts[2] != 'F': return None assert len(parts) == 6 or (len(parts) == 7 and parts[5] == '.hidden') accepted_scopes = set(['g', 'l', 'w']) assert parts[1] in accepted_scopes offset = int(parts[0], 16) section = parts[3] size = int(parts[4], 16) name = parts[-1].rstrip('\n') # Forbid ARM mapping symbols and other unexpected symbol names, but allow $ # characters in a non-initial position, which can appear as a component of a # mangled name, e.g. Clang can mangle a lambda function to: # 02cd61e0 l F .text 000000c0 _ZZL11get_globalsvENK3$_1clEv # The equivalent objdump line from GCC is: # 0325c58c l F .text 000000d0 _ZZL11get_globalsvENKUlvE_clEv assert re.match('^[a-zA-Z0-9_.][a-zA-Z0-9_.$]*$', name) return SymbolInfo(name=name, offset=offset, section=section, size=size) def _SymbolInfosFromStream(objdump_lines): """Parses the output of objdump, and get all the symbols from a binary. Args: objdump_lines: An iterable of lines Returns: A list of SymbolInfo. """ symbol_infos = [] for line in objdump_lines: symbol_info = _FromObjdumpLine(line) if symbol_info is not None: symbol_infos.append(symbol_info) return symbol_infos def SymbolInfosFromBinary(binary_filename): """Runs objdump to get all the symbols from a binary. Args: binary_filename: path to the binary. Returns: A list of SymbolInfo from the binary. """ command = (symbol.ToolPath('objdump'), '-t', '-w', binary_filename) p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE) try: result = _SymbolInfosFromStream(p.stdout) return result finally: p.stdout.close() p.wait() def GroupSymbolInfosByOffset(symbol_infos): """Create a dict {offset: [symbol_info1, ...], ...}. As several symbols can be at the same offset, this is a 1-to-many relationship. Args: symbol_infos: iterable of SymbolInfo instances Returns: a dict {offset: [symbol_info1, ...], ...} """ offset_to_symbol_infos = collections.defaultdict(list) for symbol_info in symbol_infos: offset_to_symbol_infos[symbol_info.offset].append(symbol_info) return dict(offset_to_symbol_infos) def GroupSymbolInfosByName(symbol_infos): """Create a dict {name: [symbol_info1, ...], ...}. A symbol can have several offsets, this is a 1-to-many relationship. Args: symbol_infos: iterable of SymbolInfo instances Returns: a dict {name: [symbol_info1, ...], ...} """ name_to_symbol_infos = collections.defaultdict(list) for symbol_info in symbol_infos: name_to_symbol_infos[symbol_info.name].append(symbol_info) return dict(name_to_symbol_infos) def CreateNameToSymbolInfo(symbol_infos): """Create a dict {name: symbol_info, ...}. Args: symbol_infos: iterable of SymbolInfo instances Returns: a dict {name: symbol_info, ...} If a symbol name corresponds to more than one symbol_info, the symbol_info with the lowest offset is chosen. """ # TODO(lizeb,pasko): move the functionality in this method into # check_orderfile. symbol_infos_by_name = {} warnings = cygprofile_utils.WarningCollector(_MAX_WARNINGS_TO_PRINT) for infos in GroupSymbolInfosByName(symbol_infos).itervalues(): first_symbol_info = min(infos, key=lambda x:x.offset) symbol_infos_by_name[first_symbol_info.name] = first_symbol_info if len(infos) > 1: warnings.Write('Symbol %s appears at %d offsets: %s' % (first_symbol_info.name, len(infos), ','.join([hex(x.offset) for x in infos]))) warnings.WriteEnd('symbols at multiple offsets.') return symbol_infos_by_name def DemangleSymbol(mangled_symbol): """Return the demangled form of mangled_symbol.""" return symbol.CallCppFilt(mangled_symbol)