#!/usr/bin/python # Copyright 2015 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Symbolizes a log file produced by cyprofile instrumentation. Given a log file and the binary being profiled, creates an orderfile. """ import logging import multiprocessing import optparse import os import tempfile import string import sys import symbol_extractor def _ParseLogLines(log_file_lines): """Parses a merged cyglog produced by mergetraces.py. Args: log_file_lines: array of lines in log file produced by profiled run lib_name: library or executable containing symbols Below is an example of a small log file: 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so secs usecs pid:threadid func START 1314897086 795828 3587:1074648168 0x509e105c 1314897086 795874 3587:1074648168 0x509e0eb4 1314897086 796326 3587:1074648168 0x509e0e3c 1314897086 796552 3587:1074648168 0x509e07bc END Returns: An ordered list of callee offsets. """ call_lines = [] vm_start = 0 line = log_file_lines[0] assert 'r-xp' in line end_index = line.find('-') vm_start = int(line[:end_index], 16) for line in log_file_lines[3:]: fields = line.split() if len(fields) == 4: call_lines.append(fields) else: assert fields[0] == 'END' # Convert strings to int in fields. call_info = [] for call_line in call_lines: addr = int(call_line[3], 16) if vm_start < addr: addr -= vm_start call_info.append(addr) return call_info def _GroupLibrarySymbolInfosByOffset(lib_filename): """Returns a dict {offset: [SymbolInfo]} from a library.""" symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) class SymbolNotFoundException(Exception): def __init__(self, value): super(SymbolNotFoundException, self).__init__(value) self.value = value def __str__(self): return repr(self.value) def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): """Finds all SymbolInfo at a given offset. Args: offset_to_symbol_infos: {offset: [SymbolInfo]} offset: offset to look the symbols at Returns: The list of SymbolInfo at the given offset Raises: SymbolNotFoundException if the offset doesn't match any symbol. """ if offset in offset_to_symbol_infos: return offset_to_symbol_infos[offset] elif offset % 2 and (offset - 1) in offset_to_symbol_infos: # On ARM, odd addresses are used to signal thumb instruction. They are # generated by setting the LSB to 1 (see # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html). # TODO(lizeb): Make sure this hack doesn't propagate to other archs. return offset_to_symbol_infos[offset - 1] else: raise SymbolNotFoundException(offset) class WarningCollector(object): """Collect warnings, but limit the number printed to a set value.""" def __init__(self, max_warnings): self._warnings = 0 self._max_warnings = max_warnings def Write(self, message): if self._warnings < self._max_warnings: logging.warning(message) self._warnings += 1 def WriteEnd(self, message): if self._warnings > self._max_warnings: logging.warning('%d more warnings for: %s' % ( self._warnings - self._max_warnings, message)) def _GetObjectFileNames(obj_dir): """Returns the list of object files in a directory.""" obj_files = [] for (dirpath, _, filenames) in os.walk(obj_dir): for file_name in filenames: if file_name.endswith('.o'): obj_files.append(os.path.join(dirpath, file_name)) return obj_files def _AllSymbolInfos(object_filenames): """Returns a list of SymbolInfo from an iterable of filenames.""" pool = multiprocessing.Pool() # Hopefully the object files are in the page cache at this step, so IO should # not be a problem (hence no concurrency limit on the pool). symbol_infos_nested = pool.map( symbol_extractor.SymbolInfosFromBinary, object_filenames) result = [] for symbol_infos in symbol_infos_nested: result += symbol_infos return result def _GetSymbolToSectionMapFromObjectFiles(obj_dir): """ Creates a mapping from symbol to linker section name by scanning all the object files. """ object_files = _GetObjectFileNames(obj_dir) symbol_to_section_map = {} symbol_warnings = WarningCollector(300) symbol_infos = _AllSymbolInfos(object_files) for symbol_info in symbol_infos: symbol = symbol_info.name if symbol.startswith('.LTHUNK'): continue section = symbol_info.section if ((symbol in symbol_to_section_map) and (symbol_to_section_map[symbol] != symbol_info.section)): symbol_warnings.Write('Symbol ' + symbol + ' in conflicting sections ' + section + ' and ' + symbol_to_section_map[symbol]) elif not section.startswith('.text'): symbol_warnings.Write('Symbol ' + symbol + ' in incorrect section ' + section) else: symbol_to_section_map[symbol] = section symbol_warnings.WriteEnd('bad sections') return symbol_to_section_map def _WarnAboutDuplicates(offsets): """Warns about duplicate offsets. Args: offsets: list of offsets to check for duplicates Returns: True if there are no duplicates, False otherwise. """ seen_offsets = set() ok = True for offset in offsets: if offset not in seen_offsets: seen_offsets.add(offset) else: ok = False logging.warning('Duplicate offset: ' + hex(offset)) return ok def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, output_file): """Outputs the orderfile to output_file. Args: offsets: Iterable of offsets to match to section names offset_to_symbol_infos: {offset: [SymbolInfo]} symbol_to_section_map: {name: section} output_file: file-like object to write the results to """ success = True unknown_symbol_warnings = WarningCollector(300) symbol_not_found_warnings = WarningCollector(300) output_sections = set() for offset in offsets: try: symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) for symbol_info in symbol_infos: if symbol_info.name in symbol_to_section_map: section = symbol_to_section_map[symbol_info.name] if not section in output_sections: output_file.write(section + '\n') output_sections.add(section) else: unknown_symbol_warnings.Write( 'No known section for symbol ' + symbol_info.name) except SymbolNotFoundException: symbol_not_found_warnings.Write( 'Did not find function in binary. offset: ' + hex(offset)) success = False unknown_symbol_warnings.WriteEnd('no known section for symbol.') symbol_not_found_warnings.WriteEnd('symbol not found in the binary.') return success def main(): parser = optparse.OptionParser(usage= 'usage: %prog [options] ') parser.add_option('--target-arch', action='store', dest='arch', default='arm', choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], help='The target architecture for libchrome.so') options, argv = parser.parse_args(sys.argv) if len(argv) != 4: parser.print_help() return 1 (log_filename, lib_filename, output_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) obj_dir = os.path.abspath(os.path.join( os.path.dirname(lib_filename), '../obj')) log_file_lines = map(string.rstrip, open(log_filename).readlines()) offsets = _ParseLogLines(log_file_lines) _WarnAboutDuplicates(offsets) offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename) symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir) success = False temp_filename = None output_file = None try: (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) output_file = os.fdopen(fd, 'w') ok = _OutputOrderfile( offsets, offset_to_symbol_infos, symbol_to_section_map, output_file) output_file.close() os.rename(temp_filename, output_filename) temp_filename = None success = ok finally: if output_file: output_file.close() if temp_filename: os.remove(temp_filename) return 0 if success else 1 if __name__ == '__main__': logging.basicConfig(level=logging.INFO) sys.exit(main())