diff options
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/deep_memory_profiler/dmprof | 167 | ||||
-rwxr-xr-x | tools/find_runtime_symbols/find_runtime_symbols.py | 73 | ||||
-rwxr-xr-x | tools/find_runtime_symbols/prepare_symbol_info.py | 82 | ||||
-rw-r--r-- | tools/find_runtime_symbols/static_symbols.py (renamed from tools/find_runtime_symbols/procedure_boundaries.py) | 99 |
4 files changed, 260 insertions, 161 deletions
diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof index 1b2729f..6c63332 100755 --- a/tools/deep_memory_profiler/dmprof +++ b/tools/deep_memory_profiler/dmprof @@ -21,8 +21,9 @@ FIND_RUNTIME_SYMBOLS_PATH = os.path.join( 'find_runtime_symbols') sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) -from prepare_symbol_info import prepare_symbol_info from find_runtime_symbols import find_runtime_symbols_list +from prepare_symbol_info import prepare_symbol_info +from static_symbols import StaticSymbols BUCKET_ID = 5 VIRTUAL = 0 @@ -106,6 +107,29 @@ class ObsoleteDumpVersionException(ParsingException): return "obsolete heap profile dump version: %s" % repr(self.value) +class DelayedStaticSymbols(object): + """Represents static symbol information loaded lazily.""" + + def __init__(self, prefix, keep=False): + self.maps_path = prefix + '.maps' + self.keep = keep + if keep: + self.prepared_data_dir = prefix + '.pre' + self.loaded_static_symbols = None + + def get(self): + if not self.loaded_static_symbols: + if not self.keep: + self.prepared_data_dir = tempfile.mkdtemp() + try: + prepare_symbol_info(self.maps_path, self.prepared_data_dir) + self.loaded_static_symbols = StaticSymbols.load(self.prepared_data_dir) + finally: + if not self.keep: + shutil.rmtree(self.prepared_data_dir) + return self.loaded_static_symbols + + class Rule(object): """Represents one matching rule in a policy file.""" @@ -192,7 +216,7 @@ class Dump(object): for i in range(0, BUCKET_ID - 1): sys.stdout.write(words[i] + ' ') for address in bucket.stacktrace: - sys.stdout.write((symbols.get(address) or address) + ' ') + sys.stdout.write((symbols.get(address) or ('0x%016x' % address)) + ' ') sys.stdout.write('\n') @staticmethod @@ -252,7 +276,7 @@ class Dump(object): int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), words[COMMITTED])) for address in bucket.stacktrace: - sys.stdout.write(' ' + address) + sys.stdout.write(' 0x%016x' % address) sys.stdout.write('\n') def print_for_pprof( @@ -575,7 +599,7 @@ class Dump(object): def update_symbols( - symbol_path, maps_path, appeared_addresses, symbols): + symbol_path, delayed_static_symbols, appeared_addresses, symbols): """Updates address/symbol mapping on memory and in a .symbol cache file. It reads cached address/symbol mapping from a .symbol file if it exists. @@ -590,7 +614,7 @@ def update_symbols( Args: symbol_path: A string representing a path for a .symbol file. - maps_path: A string of the path of /proc/.../maps. + delayed_static_symbols: A DelayedStaticSymbols object. appeared_addresses: A list of known addresses. symbols: A dict mapping runtime addresses to symbol names. """ @@ -601,7 +625,7 @@ def update_symbols( items = line.split(None, 1) if len(items) == 1: items.append('??') - symbols[items[0]] = items[1].rstrip() + symbols[int(items[0], 16)] = items[1].rstrip() if symbols: sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols)) else: @@ -613,23 +637,20 @@ def update_symbols( if not unresolved_addresses: sys.stderr.write(' No need to resolve any more addresses.\n') else: - sys.stderr.write(' %d addresses are unresolved.\n' % + sys.stderr.write(' %d addresses unresolved.\n' % len(unresolved_addresses)) - prepared_data_dir = tempfile.mkdtemp() - try: - prepare_symbol_info(maps_path, prepared_data_dir) + static_symbols = delayed_static_symbols.get() + symbol_list = find_runtime_symbols_list( + static_symbols, unresolved_addresses) - symbol_list = find_runtime_symbols_list( - prepared_data_dir, unresolved_addresses) + for address, symbol in zip(unresolved_addresses, symbol_list): + if not symbol: + symbol = '??' + stripped_symbol = symbol.strip() + symbols[address] = stripped_symbol + symbol_f.write('%x %s\n' % (address, stripped_symbol)) - for address, symbol in zip(unresolved_addresses, symbol_list): - if not symbol: - symbol = '??' - stripped_symbol = symbol.strip() - symbols[address] = stripped_symbol - symbol_f.write('%s %s\n' % (address, stripped_symbol)) - finally: - shutil.rmtree(prepared_data_dir) + sys.stderr.write(' All symbols resolved.\n') def parse_policy(policy_path): @@ -703,7 +724,8 @@ def load_buckets(prefix): with open(buckets_path, 'r') as buckets_f: for line in buckets_f: words = line.split() - buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') + stacktrace = [int(address, 16) for address in words[2:]] + buckets[int(words[0])] = Bucket(stacktrace, words[1] == 'mmap') n += 1 return buckets @@ -760,12 +782,13 @@ def load_dumps(dump_path_list, buckets): return dumps, appeared_addresses -def load_and_update_symbol_cache(prefix, appeared_addresses): - maps_path = prefix + '.maps' +def load_and_update_symbol_cache( + prefix, appeared_addresses, delayed_static_symbols): symbol_path = prefix + '.symbols' sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path) symbols = {} - update_symbols(symbol_path, maps_path, appeared_addresses, symbols) + update_symbols( + symbol_path, delayed_static_symbols, appeared_addresses, symbols) return symbols @@ -807,8 +830,31 @@ def load_policies(options_policy): return policies +def load_basic_files_with_multiple_dumps(dump_path, keep): + prefix = find_prefix(dump_path) + buckets = load_buckets(prefix) + dumps, appeared_addresses = load_dumps( + determine_dump_path_list(dump_path, prefix), buckets) + delayed_static_symbols = DelayedStaticSymbols(prefix, keep) + symbols = load_and_update_symbol_cache( + prefix, appeared_addresses, delayed_static_symbols) + return buckets, dumps, appeared_addresses, delayed_static_symbols, symbols + + +def load_basic_files_with_single_dump(dump_path, keep): + prefix = find_prefix(dump_path) + buckets = load_buckets(prefix) + dump, appeared_addresses = load_dump(dump_path, buckets) + delayed_static_symbols = DelayedStaticSymbols(prefix, keep) + symbols = load_and_update_symbol_cache( + prefix, appeared_addresses, delayed_static_symbols) + return buckets, dump, appeared_addresses, delayed_static_symbols, symbols + + def do_stacktrace(sys_argv): - parser = optparse.OptionParser(usage='Usage: %prog stacktrace <dump>') + parser = optparse.OptionParser( + 'Usage: %prog stacktrace [--keep] <dump>') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 2: @@ -817,10 +863,8 @@ def do_stacktrace(sys_argv): dump_path = args[1] - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dump, appeared_addresses = load_dump(dump_path, buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dump, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_single_dump(dump_path, options.keep)) dump.print_stacktrace(buckets, symbols) @@ -828,9 +872,11 @@ def do_stacktrace(sys_argv): def do_csv(sys_argv): - parser = optparse.OptionParser('Usage: %prog csv [-p POLICY] <first-dump>') + parser = optparse.OptionParser( + 'Usage: %prog csv [-p POLICY] [--keep] <first-dump>') parser.add_option('-p', '--policy', type='string', dest='policy', help='profile with POLICY', metavar='POLICY') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 2: @@ -839,11 +885,8 @@ def do_csv(sys_argv): dump_path = args[1] - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dumps, appeared_addresses = load_dumps( - determine_dump_path_list(dump_path, prefix), buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dumps, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_multiple_dumps(dump_path, options.keep)) policies = load_policies(options.policy) max_components = 0 @@ -878,9 +921,11 @@ def do_csv(sys_argv): def do_json(sys_argv): - parser = optparse.OptionParser('Usage: %prog json [-p POLICY] <first-dump>') + parser = optparse.OptionParser( + 'Usage: %prog json [-p POLICY] [--keep] <first-dump>') parser.add_option('-p', '--policy', type='string', dest='policy', help='profile with POLICY', metavar='POLICY') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 2: @@ -889,11 +934,8 @@ def do_json(sys_argv): dump_path = args[1] - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dumps, appeared_addresses = load_dumps( - determine_dump_path_list(dump_path, prefix), buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dumps, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_multiple_dumps(dump_path, options.keep)) policies = load_policies(options.policy) json_base = { @@ -927,9 +969,11 @@ def do_json(sys_argv): def do_list(sys_argv): - parser = optparse.OptionParser('Usage: %prog [-p POLICY] list <first-dump>') + parser = optparse.OptionParser( + 'Usage: %prog [-p POLICY] [--keep] list <first-dump>') parser.add_option('-p', '--policy', type='string', dest='policy', help='profile with POLICY', metavar='POLICY') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 2: @@ -938,11 +982,8 @@ def do_list(sys_argv): dump_path = args[1] - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dumps, appeared_addresses = load_dumps( - determine_dump_path_list(dump_path, prefix), buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dumps, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_multiple_dumps(dump_path, options.keep)) policies = load_policies(options.policy) for policy in sorted(policies): @@ -967,7 +1008,8 @@ def do_list(sys_argv): def do_expand(sys_argv): parser = optparse.OptionParser( - 'Usage: %prog expand <dump> <policy> <component> <depth>') + 'Usage: %prog expand [--keep] <dump> <policy> <component> <depth>') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 5: @@ -979,10 +1021,8 @@ def do_expand(sys_argv): component_name = args[3] depth = args[4] - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dump, appeared_addresses = load_dump(dump_path, buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dump, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_single_dump(dump_path, options.keep)) policies = load_policies(target_policy) rule_list = policies[target_policy].rules @@ -994,9 +1034,10 @@ def do_expand(sys_argv): def do_pprof(sys_argv): parser = optparse.OptionParser( - 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') + 'Usage: %prog pprof [-c COMPONENT] [--keep] <dump> <policy>') parser.add_option('-c', '--component', type='string', dest='component', help='restrict to COMPONENT', metavar='COMPONENT') + parser.add_option('--keep', dest='keep', action='store_true') options, args = parser.parse_args(sys_argv) if len(args) != 3: @@ -1007,15 +1048,13 @@ def do_pprof(sys_argv): target_policy = args[2] component = options.component - prefix = find_prefix(dump_path) - buckets = load_buckets(prefix) - dump, appeared_addresses = load_dump(dump_path, buckets) - symbols = load_and_update_symbol_cache(prefix, appeared_addresses) + buckets, dump, appeared_addresses, delayed_static_symbols, symbols = ( + load_basic_files_with_single_dump(dump_path, options.keep)) policies = load_policies(target_policy) rule_list = policies[target_policy].rules - with open(prefix + '.maps', 'r') as maps_f: + with open(find_prefix(dump_path) + '.maps', 'r') as maps_f: maps_lines = maps_f.readlines() dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols) @@ -1054,12 +1093,12 @@ Commands: stacktrace Convert runtime addresses to symbol names Quick Reference: - dmprof csv [-p POLICY] <first-dump> - dmprof expand <dump> <policy> <component> <depth> - dmprof json [-p POLICY] <first-dump> - dmprof list [-p POLICY] <first-dump> - dmprof pprof [-c COMPONENT] <dump> <policy> - dmprof stacktrace <dump> + dmprof csv [-p POLICY] [--keep] <first-dump> + dmprof expand [--keep] <dump> <policy> <component> <depth> + dmprof json [-p POLICY] [--keep] <first-dump> + dmprof list [-p POLICY] [--keep] <first-dump> + dmprof pprof [-c COMPONENT] [--keep] <dump> <policy> + dmprof stacktrace [--keep] <dump> """ % (sys.argv[0])) sys.exit(1) action = sys.argv.pop(1) diff --git a/tools/find_runtime_symbols/find_runtime_symbols.py b/tools/find_runtime_symbols/find_runtime_symbols.py index 1c96c7f..2d9f452c 100755 --- a/tools/find_runtime_symbols/find_runtime_symbols.py +++ b/tools/find_runtime_symbols/find_runtime_symbols.py @@ -3,14 +3,12 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -import json import logging import os import re import sys -from parse_proc_maps import parse_proc_maps -from procedure_boundaries import get_procedure_boundaries_from_nm_bsd +from static_symbols import StaticSymbols from util import executable_condition @@ -50,37 +48,9 @@ class _FileOutput(object): self.result.write('%s\n' % symbol_name) -def _find_runtime_symbols( - prepared_data_dir, addresses, outputter, loglevel=logging.WARN): - log = logging.getLogger('find_runtime_symbols') - log.setLevel(loglevel) - handler = logging.StreamHandler() - handler.setLevel(loglevel) - formatter = logging.Formatter('%(message)s') - handler.setFormatter(formatter) - log.addHandler(handler) - - if not os.path.exists(prepared_data_dir): - log.warn("Nothing found: %s" % prepared_data_dir) - return 1 - if not os.path.isdir(prepared_data_dir): - log.warn("Not a directory: %s" % prepared_data_dir) - return 1 - - with open(os.path.join(prepared_data_dir, 'maps'), mode='r') as f: - maps = parse_proc_maps(f) - - with open(os.path.join(prepared_data_dir, 'nm.json'), mode='r') as f: - nm_files = json.load(f) - - symbol_table = {} - for entry in maps.iter(executable_condition): - if nm_files.has_key(entry.name): - if nm_files[entry.name]['format'] == 'bsd': - with open(os.path.join(prepared_data_dir, - nm_files[entry.name]['file']), mode='r') as f: - symbol_table[entry.name] = get_procedure_boundaries_from_nm_bsd( - f, nm_files[entry.name]['mangled']) +def _find_runtime_symbols(static_symbols, addresses, outputter): + maps = static_symbols.maps + symbol_tables = static_symbols.procedure_boundaries for address in addresses: if isinstance(address, str): @@ -88,8 +58,8 @@ def _find_runtime_symbols( is_found = False for entry in maps.iter(executable_condition): if entry.begin <= address < entry.end: - if entry.name in symbol_table: - found = symbol_table[entry.name].find_procedure( + if entry.name in symbol_tables: + found = symbol_tables[entry.name].find_procedure( address - (entry.begin - entry.offset)) outputter.output(address, found) else: @@ -102,21 +72,21 @@ def _find_runtime_symbols( return 0 -def find_runtime_symbols_list(prepared_data_dir, addresses): +def find_runtime_symbols_list(static_symbols, addresses): result = [] - _find_runtime_symbols(prepared_data_dir, addresses, _ListOutput(result)) + _find_runtime_symbols(static_symbols, addresses, _ListOutput(result)) return result -def find_runtime_symbols_dict(prepared_data_dir, addresses): +def find_runtime_symbols_dict(static_symbols, addresses): result = {} - _find_runtime_symbols(prepared_data_dir, addresses, _DictOutput(result)) + _find_runtime_symbols(static_symbols, addresses, _DictOutput(result)) return result -def find_runtime_symbols_file(prepared_data_dir, addresses, f): +def find_runtime_symbols_file(static_symbols, addresses, f): _find_runtime_symbols( - prepared_data_dir, addresses, _FileOutput(f, False)) + static_symbols, addresses, _FileOutput(f, False)) def main(): @@ -127,7 +97,24 @@ def main(): """ % sys.argv[0]) return 1 - return find_runtime_symbols_file(sys.argv[1], sys.stdin, sys.stdout) + log = logging.getLogger('find_runtime_symbols') + log.setLevel(logging.WARN) + handler = logging.StreamHandler() + handler.setLevel(logging.WARN) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + log.addHandler(handler) + + prepared_data_dir = sys.argv[1] + if not os.path.exists(prepared_data_dir): + log.warn("Nothing found: %s" % prepared_data_dir) + return 1 + if not os.path.isdir(prepared_data_dir): + log.warn("Not a directory: %s" % prepared_data_dir) + return 1 + + static_symbols = StaticSymbols.load(prepared_data_dir) + return find_runtime_symbols_file(static_symbols, sys.stdin, sys.stdout) if __name__ == '__main__': diff --git a/tools/find_runtime_symbols/prepare_symbol_info.py b/tools/find_runtime_symbols/prepare_symbol_info.py index 57fcfbc..50654b1a 100755 --- a/tools/find_runtime_symbols/prepare_symbol_info.py +++ b/tools/find_runtime_symbols/prepare_symbol_info.py @@ -16,6 +16,39 @@ from parse_proc_maps import parse_proc_maps from util import executable_condition +def _dump_command_result(command, output_dir_path, basename, suffix, log): + handle_out, filename_out = tempfile.mkstemp( + suffix=suffix, prefix=basename + '.', dir=output_dir_path) + handle_err, filename_err = tempfile.mkstemp( + suffix=suffix + '.err', prefix=basename + '.', dir=output_dir_path) + error = False + try: + subprocess.check_call( + command, stdout=handle_out, stderr=handle_err, shell=True) + except: + error = True + finally: + os.close(handle_err) + os.close(handle_out) + + if os.path.exists(filename_err): + if log.getEffectiveLevel() <= logging.DEBUG: + with open(filename_err, 'r') as f: + for line in f: + log.debug(line.rstrip()) + os.remove(filename_err) + + if os.path.exists(filename_out) and ( + os.path.getsize(filename_out) == 0 or error): + os.remove(filename_out) + return None + + if not os.path.exists(filename_out): + return None + + return filename_out + + def prepare_symbol_info(maps_path, output_dir_path=None, loglevel=logging.WARN): log = logging.getLogger('prepare_symbol_info') log.setLevel(loglevel) @@ -58,42 +91,31 @@ def prepare_symbol_info(maps_path, output_dir_path=None, loglevel=logging.WARN): maps = parse_proc_maps(f) log.debug('Listing up symbols.') - nm_files = {} + files = {} for entry in maps.iter(executable_condition): log.debug(' %016x-%016x +%06x %s' % ( entry.begin, entry.end, entry.offset, entry.name)) - with tempfile.NamedTemporaryFile( - prefix=os.path.basename(entry.name) + '.', - suffix='.nm', delete=False, mode='w', dir=output_dir_path) as f: - nm_filename = os.path.realpath(f.name) - nm_succeeded = False - cppfilt_succeeded = False - p_nm = subprocess.Popen( - 'nm -n --format bsd %s' % entry.name, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p_cppfilt = subprocess.Popen( - 'c++filt', shell=True, - stdin=p_nm.stdout, stdout=f, stderr=subprocess.PIPE) - - if p_nm.wait() == 0: - nm_succeeded = True - for line in p_nm.stderr: - log.debug(line.rstrip()) - if p_cppfilt.wait() == 0: - cppfilt_succeeded = True - for line in p_cppfilt.stderr: - log.debug(line.rstrip()) - - if nm_succeeded and cppfilt_succeeded: - nm_files[entry.name] = { + nm_filename = _dump_command_result( + 'nm -n --format bsd %s | c++filt' % entry.name, + output_dir_path, os.path.basename(entry.name), '.nm', log) + if not nm_filename: + continue + readelf_e_filename = _dump_command_result( + 'readelf -e %s' % entry.name, + output_dir_path, os.path.basename(entry.name), '.readelf-e', log) + if not readelf_e_filename: + continue + + files[entry.name] = {} + files[entry.name]['nm'] = { 'file': os.path.basename(nm_filename), 'format': 'bsd', 'mangled': False} - else: - os.remove(nm_filename) + files[entry.name]['readelf-e'] = { + 'file': os.path.basename(readelf_e_filename)} - with open(os.path.join(output_dir_path, 'nm.json'), 'w') as f: - json.dump(nm_files, f, indent=2, sort_keys=True) + with open(os.path.join(output_dir_path, 'files.json'), 'w') as f: + json.dump(files, f, indent=2, sort_keys=True) log.info('Collected symbol information at "%s".' % output_dir_path) return 0 @@ -110,7 +132,7 @@ def main(): """ % sys.argv[0]) return 1 elif len(sys.argv) == 2: - sys.exit(prepare_symbol_info(sys.argv[1], loglevel=logging.DEBUG)) + sys.exit(prepare_symbol_info(sys.argv[1], loglevel=logging.INFO)) else: sys.exit(prepare_symbol_info(sys.argv[1], sys.argv[2], loglevel=logging.INFO)) diff --git a/tools/find_runtime_symbols/procedure_boundaries.py b/tools/find_runtime_symbols/static_symbols.py index be1d76c..01412021 100644 --- a/tools/find_runtime_symbols/procedure_boundaries.py +++ b/tools/find_runtime_symbols/static_symbols.py @@ -3,10 +3,14 @@ # found in the LICENSE file. import bisect +import json import os import re import sys +from parse_proc_maps import parse_proc_maps +from util import executable_condition + _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') @@ -18,6 +22,63 @@ class ParsingException(Exception): return repr(self.args[0]) +class StaticSymbols(object): + """Represents static symbol information.""" + + def __init__(self, maps, procedure_boundaries): + self.maps = maps + self.procedure_boundaries = procedure_boundaries + + # TODO(dmikurube): It will be deprecated. + @staticmethod + def _load_nm(prepared_data_dir, maps_filename, nm_json_filename): + with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: + maps = parse_proc_maps(f) + with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f: + nm_files = json.load(f) + + symbol_tables = {} + for entry in maps.iter(executable_condition): + if nm_files.has_key(entry.name): + if nm_files[entry.name]['format'] == 'bsd': + with open(os.path.join(prepared_data_dir, + nm_files[entry.name]['file']), mode='r') as f: + symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( + f, nm_files[entry.name]['mangled']) + + return StaticSymbols(maps, symbol_tables) + + @staticmethod + def _load_files(prepared_data_dir, maps_filename, files_filename): + with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: + maps = parse_proc_maps(f) + with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f: + files = json.load(f) + + symbol_tables = {} + for entry in maps.iter(executable_condition): + if entry.name in files: + if 'nm' in files[entry.name]: + nm_entry = files[entry.name]['nm'] + if nm_entry['format'] == 'bsd': + with open(os.path.join(prepared_data_dir, nm_entry['file']), + mode='r') as f: + symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( + f, nm_entry['mangled']) + if 'readelf-e' in files: + readelf_entry = files[entry.name]['readelf-e'] + # TODO(dmikurube) Implement it. + + return StaticSymbols(maps, symbol_tables) + + @staticmethod + def load(prepared_data_dir): + if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')): + return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json') + else: + return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json') + + class ProcedureBoundary(object): """A class for a procedure symbol and an address range for the symbol.""" @@ -65,7 +126,15 @@ def _get_short_function_name(function): return _LEADING_TYPE_PATTERN.sub('\g<1>', function) -def get_procedure_boundaries_from_nm_bsd(f, mangled=False): +def _parse_nm_bsd_line(line): + if line[8] == ' ': + return line[0:8], line[9], line[11:] + elif line[16] == ' ': + return line[0:16], line[17], line[19:] + raise ParsingException('Invalid nm output.') + + +def _get_static_symbols_from_nm_bsd(f, mangled=False): """Gets procedure boundaries from a result of nm -n --format bsd. Args: @@ -81,27 +150,9 @@ def get_procedure_boundaries_from_nm_bsd(f, mangled=False): routine = '' for line in f: - symbol_info = line.rstrip().split(None, 2) - if len(symbol_info) == 3: - if len(symbol_info[0]) == 1: - symbol_info = line.split(None, 1) - (sym_type, this_routine) = symbol_info - sym_value = '' - else: - (sym_value, sym_type, this_routine) = symbol_info - elif len(symbol_info) == 2: - if len(symbol_info[0]) == 1: - (sym_type, this_routine) = symbol_info - sym_value = '' - elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16: - (sym_value, this_routine) = symbol_info - sym_type = ' ' - else: - raise ParsingException('Invalid output 1 from (eu-)nm.') - else: - raise ParsingException('Invalid output 2 from (eu-)nm.') + sym_value, sym_type, sym_name = _parse_nm_bsd_line(line) - if sym_value == '': + if sym_value[0] == ' ': continue start_val = int(sym_value, 16) @@ -123,7 +174,7 @@ def get_procedure_boundaries_from_nm_bsd(f, mangled=False): # got touched in the queue), and ignore the others. if start_val == last_start and (sym_type == 't' or sym_type == 'T'): # We are the 'T' symbol at this address, replace previous symbol. - routine = this_routine + routine = sym_name continue elif start_val == last_start: # We're not the 'T' symbol at this address, so ignore us. @@ -133,14 +184,14 @@ def get_procedure_boundaries_from_nm_bsd(f, mangled=False): # has multiple occurrences of this routine. We use a syntax # that resembles template paramters that are automatically # stripped out by ShortFunctionName() - this_routine += "<%016x>" % start_val + sym_name += "<%016x>" % start_val if not mangled: routine = _get_short_function_name(routine) symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) last_start = start_val - routine = this_routine + routine = sym_name if not mangled: routine = _get_short_function_name(routine) |