#!/usr/bin/python # Copyright 2015 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Symbolizes a log file produced by cyprofile instrumentation. Given a log file and the binary being profiled, creates an orderfile. """ import logging import multiprocessing import optparse import os import re import string import sys import tempfile import cygprofile_utils import symbol_extractor def _ParseLogLines(log_file_lines): """Parses a merged cyglog produced by mergetraces.py. Args: log_file_lines: array of lines in log file produced by profiled run Below is an example of a small log file: 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so secs usecs pid:threadid func START 1314897086 795828 3587:1074648168 0x509e105c 1314897086 795874 3587:1074648168 0x509e0eb4 1314897086 796326 3587:1074648168 0x509e0e3c 1314897086 796552 3587:1074648168 0x509e07bc END Returns: An ordered list of callee offsets. """ call_lines = [] vm_start = 0 line = log_file_lines[0] assert 'r-xp' in line end_index = line.find('-') vm_start = int(line[:end_index], 16) for line in log_file_lines[3:]: fields = line.split() if len(fields) == 4: call_lines.append(fields) else: assert fields[0] == 'END' # Convert strings to int in fields. call_info = [] for call_line in call_lines: addr = int(call_line[3], 16) if vm_start < addr: addr -= vm_start call_info.append(addr) return call_info def _GroupLibrarySymbolInfosByOffset(lib_filename): """Returns a dict {offset: [SymbolInfo]} from a library.""" symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) class SymbolNotFoundException(Exception): def __init__(self, value): super(SymbolNotFoundException, self).__init__(value) self.value = value def __str__(self): return repr(self.value) def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): """Finds all SymbolInfo at a given offset. Args: offset_to_symbol_infos: {offset: [SymbolInfo]} offset: offset to look the symbols at Returns: The list of SymbolInfo at the given offset Raises: SymbolNotFoundException if the offset doesn't match any symbol. """ if offset in offset_to_symbol_infos: return offset_to_symbol_infos[offset] elif offset % 2 and (offset - 1) in offset_to_symbol_infos: # On ARM, odd addresses are used to signal thumb instruction. They are # generated by setting the LSB to 1 (see # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html). # TODO(lizeb): Make sure this hack doesn't propagate to other archs. return offset_to_symbol_infos[offset - 1] else: raise SymbolNotFoundException(offset) def _GetObjectFileNames(obj_dir): """Returns the list of object files in a directory.""" obj_files = [] for (dirpath, _, filenames) in os.walk(obj_dir): for file_name in filenames: if file_name.endswith('.o'): obj_files.append(os.path.join(dirpath, file_name)) return obj_files def _AllSymbolInfos(object_filenames): """Returns a list of SymbolInfo from an iterable of filenames.""" pool = multiprocessing.Pool() # Hopefully the object files are in the page cache at this step, so IO should # not be a problem (hence no concurrency limit on the pool). symbol_infos_nested = pool.map( symbol_extractor.SymbolInfosFromBinary, object_filenames) result = [] for symbol_infos in symbol_infos_nested: result += symbol_infos return result def _SameCtorOrDtorNames(symbol1, symbol2): """Returns True if two symbols refer to the same constructor or destructor. The Itanium C++ ABI specifies dual constructor and destructor emmission (section 5.1.4.3): https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special To avoid fully parsing all mangled symbols, a heuristic is used with c++filt. Note: some compilers may name generated copies differently. If this becomes an issue this heuristic will need to be updated. """ # Check if this is the understood case of constructor/destructor # signatures. GCC emits up to three types of constructor/destructors: # complete, base, and allocating. If they're all the same they'll # get folded together. return (re.search('(C[123]|D[012])E', symbol1) and symbol_extractor.DemangleSymbol(symbol1) == symbol_extractor.DemangleSymbol(symbol2)) def GetSymbolToSectionsMapFromObjectFiles(obj_dir): """Scans object files to create a {symbol: linker section(s)} map. Args: obj_dir: The root of the output object file directory, which will be scanned for .o files to form the mapping. Returns: A map {symbol_name: [section_name1, section_name2...]} """ object_files = _GetObjectFileNames(obj_dir) symbol_to_sections_map = {} symbol_warnings = cygprofile_utils.WarningCollector(300) symbol_infos = _AllSymbolInfos(object_files) for symbol_info in symbol_infos: symbol = symbol_info.name if symbol.startswith('.LTHUNK'): continue section = symbol_info.section if ((symbol in symbol_to_sections_map) and (symbol_info.section not in symbol_to_sections_map[symbol])): symbol_to_sections_map[symbol].append(section) if not _SameCtorOrDtorNames( symbol, symbol_to_sections_map[symbol][0].lstrip('.text.')): symbol_warnings.Write('Symbol ' + symbol + ' unexpectedly in more than one section: ' + ', '.join(symbol_to_sections_map[symbol])) elif not section.startswith('.text.'): symbol_warnings.Write('Symbol ' + symbol + ' in incorrect section ' + section) else: # In most cases we expect just one item in this list, and maybe 4 or so in # the worst case. symbol_to_sections_map[symbol] = [section] symbol_warnings.WriteEnd('bad sections') return symbol_to_sections_map def _WarnAboutDuplicates(offsets): """Warns about duplicate offsets. Args: offsets: list of offsets to check for duplicates Returns: True if there are no duplicates, False otherwise. """ seen_offsets = set() ok = True for offset in offsets: if offset not in seen_offsets: seen_offsets.add(offset) else: ok = False logging.warning('Duplicate offset: ' + hex(offset)) return ok def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file): """Outputs the orderfile to output_file. Args: offsets: Iterable of offsets to match to section names offset_to_symbol_infos: {offset: [SymbolInfo]} symbol_to_sections_map: {name: [section1, section2]} output_file: file-like object to write the results to Returns: True if all symbols were found in the library. """ success = True unknown_symbol_warnings = cygprofile_utils.WarningCollector(300) symbol_not_found_errors = cygprofile_utils.WarningCollector( 300, level=logging.ERROR) output_sections = set() for offset in offsets: try: symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) for symbol_info in symbol_infos: if symbol_info.name in symbol_to_sections_map: sections = symbol_to_sections_map[symbol_info.name] for section in sections: if not section in output_sections: output_file.write(section + '\n') output_sections.add(section) else: unknown_symbol_warnings.Write( 'No known section for symbol ' + symbol_info.name) except SymbolNotFoundException: symbol_not_found_errors.Write( 'Did not find function in binary. offset: ' + hex(offset)) success = False unknown_symbol_warnings.WriteEnd('no known section for symbol.') symbol_not_found_errors.WriteEnd('symbol not found in the binary.') return success def main(): parser = optparse.OptionParser(usage= 'usage: %prog [options] ') parser.add_option('--target-arch', action='store', dest='arch', choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], help='The target architecture for libchrome.so') options, argv = parser.parse_args(sys.argv) if not options.arch: options.arch = cygprofile_utils.DetectArchitecture() if len(argv) != 4: parser.print_help() return 1 (log_filename, lib_filename, output_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) obj_dir = cygprofile_utils.GetObjDir(lib_filename) log_file_lines = map(string.rstrip, open(log_filename).readlines()) offsets = _ParseLogLines(log_file_lines) _WarnAboutDuplicates(offsets) offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename) symbol_to_sections_map = GetSymbolToSectionsMapFromObjectFiles(obj_dir) success = False temp_filename = None output_file = None try: (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) output_file = os.fdopen(fd, 'w') ok = _OutputOrderfile( offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file) output_file.close() os.rename(temp_filename, output_filename) temp_filename = None success = ok finally: if output_file: output_file.close() if temp_filename: os.remove(temp_filename) return 0 if success else 1 if __name__ == '__main__': logging.basicConfig(level=logging.INFO) sys.exit(main())