diff options
author | dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-07-17 06:28:58 +0000 |
---|---|---|
committer | dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-07-17 06:28:58 +0000 |
commit | 7963c8a95a026eabf8031bef22e9b8e4f6b1f3c6 (patch) | |
tree | 11e4a6c2fb90aa53c655bb4f2f3bc643700f1839 /tools/deep_memory_profiler | |
parent | 89c36f807bb63002f087066decdff93dd6e140e4 (diff) | |
download | chromium_src-7963c8a95a026eabf8031bef22e9b8e4f6b1f3c6.zip chromium_src-7963c8a95a026eabf8031bef22e9b8e4f6b1f3c6.tar.gz chromium_src-7963c8a95a026eabf8031bef22e9b8e4f6b1f3c6.tar.bz2 |
Refactor dmprof: Split dmprof.py into modules.
No changes in behavior and the main code. Changes are:
- .*Commands are moved into subcommands/.
- Other classes and functions are moved into into lib/.
- Constants are defined in their appropriate files in lib/.
- External libraries are imported via lib/.
- "Command" is renamed to "SubCommand".
BUG=123750
NOTRY=True
Review URL: https://chromiumcodereview.appspot.com/19346002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@211976 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/deep_memory_profiler')
23 files changed, 3219 insertions, 3012 deletions
diff --git a/tools/deep_memory_profiler/dmprof.py b/tools/deep_memory_profiler/dmprof.py index 533cbb9..0cb030a 100644 --- a/tools/deep_memory_profiler/dmprof.py +++ b/tools/deep_memory_profiler/dmprof.py @@ -2,2988 +2,33 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -"""The deep heap profiler script for Chrome.""" +"""The Deep Memory Profiler analyzer script. + +See http://dev.chromium.org/developers/deep-memory-profiler for details. +""" -import copy -import cStringIO -import datetime -import json import logging -import optparse -import os -import re -import struct -import subprocess import sys -import tempfile -import time -import zipfile - -try: - from collections import OrderedDict # pylint: disable=E0611 -except ImportError: - # TODO(dmikurube): Remove this once Python 2.7 is required. - BASE_PATH = os.path.dirname(os.path.abspath(__file__)) - SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party') - sys.path.insert(0, SIMPLEJSON_PATH) - from simplejson import OrderedDict - -from range_dict import ExclusiveRangeDict - -BASE_PATH = os.path.dirname(os.path.abspath(__file__)) -FIND_RUNTIME_SYMBOLS_PATH = os.path.join( - BASE_PATH, os.pardir, 'find_runtime_symbols') -sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) -import find_runtime_symbols -import prepare_symbol_info -import proc_maps +from lib.exceptions import ParsingException +import subcommands -from find_runtime_symbols import FUNCTION_SYMBOLS -from find_runtime_symbols import SOURCEFILE_SYMBOLS -from find_runtime_symbols import TYPEINFO_SYMBOLS - -BUCKET_ID = 5 -VIRTUAL = 0 -COMMITTED = 1 -ALLOC_COUNT = 2 -FREE_COUNT = 3 -NULL_REGEX = re.compile('') LOGGER = logging.getLogger('dmprof') -POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') -CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir) - -DEFAULT_SORTERS = [ - os.path.join(BASE_PATH, 'sorter.malloc-component.json'), - os.path.join(BASE_PATH, 'sorter.malloc-type.json'), - os.path.join(BASE_PATH, 'sorter.vm-map.json'), - os.path.join(BASE_PATH, 'sorter.vm-sharing.json'), - ] - - -# Heap Profile Dump versions - -# DUMP_DEEP_[1-4] are obsolete. -# DUMP_DEEP_2+ distinct mmap regions and malloc chunks. -# DUMP_DEEP_3+ don't include allocation functions in their stack dumps. -# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". -# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. -# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. -DUMP_DEEP_1 = 'DUMP_DEEP_1' -DUMP_DEEP_2 = 'DUMP_DEEP_2' -DUMP_DEEP_3 = 'DUMP_DEEP_3' -DUMP_DEEP_4 = 'DUMP_DEEP_4' - -DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) - -# DUMP_DEEP_5 doesn't separate sections for malloc and mmap. -# malloc and mmap are identified in bucket files. -# DUMP_DEEP_5 should be processed by POLICY_DEEP_4. -DUMP_DEEP_5 = 'DUMP_DEEP_5' - -# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. -DUMP_DEEP_6 = 'DUMP_DEEP_6' - -# Heap Profile Policy versions - -# POLICY_DEEP_1 DOES NOT include allocation_type columns. -# mmap regions are distincted w/ mmap frames in the pattern column. -POLICY_DEEP_1 = 'POLICY_DEEP_1' - -# POLICY_DEEP_2 DOES include allocation_type columns. -# mmap regions are distincted w/ the allocation_type column. -POLICY_DEEP_2 = 'POLICY_DEEP_2' - -# POLICY_DEEP_3 is in JSON format. -POLICY_DEEP_3 = 'POLICY_DEEP_3' - -# POLICY_DEEP_3 contains typeinfo. -POLICY_DEEP_4 = 'POLICY_DEEP_4' - - -class EmptyDumpException(Exception): - def __init__(self, value=''): - super(EmptyDumpException, self).__init__() - self.value = value - def __str__(self): - return repr(self.value) - - -class ParsingException(Exception): - def __init__(self, value=''): - super(ParsingException, self).__init__() - self.value = value - def __str__(self): - return repr(self.value) - - -class InvalidDumpException(ParsingException): - def __init__(self, value): - super(InvalidDumpException, self).__init__() - self.value = value - def __str__(self): - return "invalid heap profile dump: %s" % repr(self.value) - - -class ObsoleteDumpVersionException(ParsingException): - def __init__(self, value): - super(ObsoleteDumpVersionException, self).__init__() - self.value = value - def __str__(self): - return "obsolete heap profile dump version: %s" % repr(self.value) - - -class ListAttribute(ExclusiveRangeDict.RangeAttribute): - """Represents a list for an attribute in range_dict.ExclusiveRangeDict.""" - def __init__(self): - super(ListAttribute, self).__init__() - self._list = [] - - def __str__(self): - return str(self._list) - - def __repr__(self): - return 'ListAttribute' + str(self._list) - - def __len__(self): - return len(self._list) - - def __iter__(self): - for x in self._list: - yield x - - def __getitem__(self, index): - return self._list[index] - - def __setitem__(self, index, value): - if index >= len(self._list): - self._list.extend([None] * (index + 1 - len(self._list))) - self._list[index] = value - - def copy(self): - new_list = ListAttribute() - for index, item in enumerate(self._list): - new_list[index] = copy.deepcopy(item) - return new_list - - -class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): - """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" - _DUMMY_ENTRY = proc_maps.ProcMapsEntry( - 0, # begin - 0, # end - '-', # readable - '-', # writable - '-', # executable - '-', # private - 0, # offset - '00', # major - '00', # minor - 0, # inode - '' # name - ) - - def __init__(self): - super(ProcMapsEntryAttribute, self).__init__() - self._entry = self._DUMMY_ENTRY.as_dict() - - def __str__(self): - return str(self._entry) - - def __repr__(self): - return 'ProcMapsEntryAttribute' + str(self._entry) - - def __getitem__(self, key): - return self._entry[key] - - def __setitem__(self, key, value): - if key not in self._entry: - raise KeyError(key) - self._entry[key] = value - - def copy(self): - new_entry = ProcMapsEntryAttribute() - for key, value in self._entry.iteritems(): - new_entry[key] = copy.deepcopy(value) - return new_entry - - -def skip_while(index, max_index, skipping_condition): - """Increments |index| until |skipping_condition|(|index|) is False. - - Returns: - A pair of an integer indicating a line number after skipped, and a - boolean value which is True if found a line which skipping_condition - is False for. - """ - while skipping_condition(index): - index += 1 - if index >= max_index: - return index, False - return index, True - - -class SymbolDataSources(object): - """Manages symbol data sources in a process. - - The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and - so on. They are collected into a directory '|prefix|.symmap' from the binary - files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. - - Binaries are not mandatory to profile. The prepared data sources work in - place of the binary even if the binary has been overwritten with another - binary. - - Note that loading the symbol data sources takes a long time. They are often - very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' - which caches actually used symbols. - """ - def __init__(self, prefix, alternative_dirs=None): - self._prefix = prefix - self._prepared_symbol_data_sources_path = None - self._loaded_symbol_data_sources = None - self._alternative_dirs = alternative_dirs or {} - - def prepare(self): - """Prepares symbol data sources by extracting mapping from a binary. - - The prepared symbol data sources are stored in a directory. The directory - name is stored in |self._prepared_symbol_data_sources_path|. - - Returns: - True if succeeded. - """ - LOGGER.info('Preparing symbol mapping...') - self._prepared_symbol_data_sources_path, used_tempdir = ( - prepare_symbol_info.prepare_symbol_info( - self._prefix + '.maps', - output_dir_path=self._prefix + '.symmap', - alternative_dirs=self._alternative_dirs, - use_tempdir=True, - use_source_file_name=True)) - if self._prepared_symbol_data_sources_path: - LOGGER.info(' Prepared symbol mapping.') - if used_tempdir: - LOGGER.warn(' Using a temporary directory for symbol mapping.') - LOGGER.warn(' Delete it by yourself.') - LOGGER.warn(' Or, move the directory by yourself to use it later.') - return True - else: - LOGGER.warn(' Failed to prepare symbol mapping.') - return False - - def get(self): - """Returns the prepared symbol data sources. - - Returns: - The prepared symbol data sources. None if failed. - """ - if not self._prepared_symbol_data_sources_path and not self.prepare(): - return None - if not self._loaded_symbol_data_sources: - LOGGER.info('Loading symbol mapping...') - self._loaded_symbol_data_sources = ( - find_runtime_symbols.RuntimeSymbolsInProcess.load( - self._prepared_symbol_data_sources_path)) - return self._loaded_symbol_data_sources - - def path(self): - """Returns the path of the prepared symbol data sources if possible.""" - if not self._prepared_symbol_data_sources_path and not self.prepare(): - return None - return self._prepared_symbol_data_sources_path - - -class SymbolFinder(object): - """Finds corresponding symbols from addresses. - - This class does only 'find()' symbols from a specified |address_list|. - It is introduced to make a finder mockable. - """ - def __init__(self, symbol_type, symbol_data_sources): - self._symbol_type = symbol_type - self._symbol_data_sources = symbol_data_sources - - def find(self, address_list): - return find_runtime_symbols.find_runtime_symbols( - self._symbol_type, self._symbol_data_sources.get(), address_list) - - -class SymbolMappingCache(object): - """Caches mapping from actually used addresses to symbols. - - 'update()' updates the cache from the original symbol data sources via - 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. - """ - def __init__(self): - self._symbol_mapping_caches = { - FUNCTION_SYMBOLS: {}, - SOURCEFILE_SYMBOLS: {}, - TYPEINFO_SYMBOLS: {}, - } - - def update(self, symbol_type, bucket_set, symbol_finder, cache_f): - """Updates symbol mapping cache on memory and in a symbol cache file. - - It reads cached symbol mapping from a symbol cache file |cache_f| if it - exists. Unresolved addresses are then resolved and added to the cache - both on memory and in the symbol cache file with using 'SymbolFinder'. - - A cache file is formatted as follows: - <Address> <Symbol> - <Address> <Symbol> - <Address> <Symbol> - ... - - Args: - symbol_type: A type of symbols to update. It should be one of - FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. - bucket_set: A BucketSet object. - symbol_finder: A SymbolFinder object to find symbols. - cache_f: A readable and writable IO object of the symbol cache file. - """ - cache_f.seek(0, os.SEEK_SET) - self._load(cache_f, symbol_type) - - unresolved_addresses = sorted( - address for address in bucket_set.iter_addresses(symbol_type) - if address not in self._symbol_mapping_caches[symbol_type]) - - if not unresolved_addresses: - LOGGER.info('No need to resolve any more addresses.') - return - - cache_f.seek(0, os.SEEK_END) - LOGGER.info('Loading %d unresolved addresses.' % - len(unresolved_addresses)) - symbol_dict = symbol_finder.find(unresolved_addresses) - - for address, symbol in symbol_dict.iteritems(): - stripped_symbol = symbol.strip() or '?' - self._symbol_mapping_caches[symbol_type][address] = stripped_symbol - cache_f.write('%x %s\n' % (address, stripped_symbol)) - - def lookup(self, symbol_type, address): - """Looks up a symbol for a given |address|. - - Args: - symbol_type: A type of symbols to update. It should be one of - FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. - address: An integer that represents an address. - - Returns: - A string that represents a symbol. - """ - return self._symbol_mapping_caches[symbol_type].get(address) - - def _load(self, cache_f, symbol_type): - try: - for line in cache_f: - items = line.rstrip().split(None, 1) - if len(items) == 1: - items.append('??') - self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] - LOGGER.info('Loaded %d entries from symbol cache.' % - len(self._symbol_mapping_caches[symbol_type])) - except IOError as e: - LOGGER.info('The symbol cache file is invalid: %s' % e) - - -class Rule(object): - """Represents one matching rule in a policy file.""" - - def __init__(self, - name, - allocator_type, - stackfunction_pattern=None, - stacksourcefile_pattern=None, - typeinfo_pattern=None, - mappedpathname_pattern=None, - mappedpermission_pattern=None, - sharedwith=None): - self._name = name - self._allocator_type = allocator_type - - self._stackfunction_pattern = None - if stackfunction_pattern: - self._stackfunction_pattern = re.compile( - stackfunction_pattern + r'\Z') - - self._stacksourcefile_pattern = None - if stacksourcefile_pattern: - self._stacksourcefile_pattern = re.compile( - stacksourcefile_pattern + r'\Z') - - self._typeinfo_pattern = None - if typeinfo_pattern: - self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') - - self._mappedpathname_pattern = None - if mappedpathname_pattern: - self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') - - self._mappedpermission_pattern = None - if mappedpermission_pattern: - self._mappedpermission_pattern = re.compile( - mappedpermission_pattern + r'\Z') - - self._sharedwith = [] - if sharedwith: - self._sharedwith = sharedwith - - @property - def name(self): - return self._name - - @property - def allocator_type(self): - return self._allocator_type - - @property - def stackfunction_pattern(self): - return self._stackfunction_pattern - - @property - def stacksourcefile_pattern(self): - return self._stacksourcefile_pattern - - @property - def typeinfo_pattern(self): - return self._typeinfo_pattern - - @property - def mappedpathname_pattern(self): - return self._mappedpathname_pattern - - @property - def mappedpermission_pattern(self): - return self._mappedpermission_pattern - - @property - def sharedwith(self): - return self._sharedwith - - -class Policy(object): - """Represents a policy, a content of a policy file.""" - - def __init__(self, rules, version, components): - self._rules = rules - self._version = version - self._components = components - - @property - def rules(self): - return self._rules - - @property - def version(self): - return self._version - - @property - def components(self): - return self._components - - def find_rule(self, component_name): - """Finds a rule whose name is |component_name|. """ - for rule in self._rules: - if rule.name == component_name: - return rule - return None - - def find_malloc(self, bucket): - """Finds a matching component name which a given |bucket| belongs to. - - Args: - bucket: A Bucket object to be searched for. - - Returns: - A string representing a component name. - """ - assert not bucket or bucket.allocator_type == 'malloc' - - if not bucket: - return 'no-bucket' - if bucket.component_cache: - return bucket.component_cache - - stackfunction = bucket.symbolized_joined_stackfunction - stacksourcefile = bucket.symbolized_joined_stacksourcefile - typeinfo = bucket.symbolized_typeinfo - if typeinfo.startswith('0x'): - typeinfo = bucket.typeinfo_name - - for rule in self._rules: - if (rule.allocator_type == 'malloc' and - (not rule.stackfunction_pattern or - rule.stackfunction_pattern.match(stackfunction)) and - (not rule.stacksourcefile_pattern or - rule.stacksourcefile_pattern.match(stacksourcefile)) and - (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): - bucket.component_cache = rule.name - return rule.name - - assert False - - def find_mmap(self, region, bucket_set, - pageframe=None, group_pfn_counts=None): - """Finds a matching component which a given mmap |region| belongs to. - - It uses |bucket_set| to match with backtraces. If |pageframe| is given, - it considers memory sharing among processes. - - NOTE: Don't use Bucket's |component_cache| for mmap regions because they're - classified not only with bucket information (mappedpathname for example). - - Args: - region: A tuple representing a memory region. - bucket_set: A BucketSet object to look up backtraces. - pageframe: A PageFrame object representing a pageframe maybe including - a pagecount. - group_pfn_counts: A dict mapping a PFN to the number of times the - the pageframe is mapped by the known "group (Chrome)" processes. - - Returns: - A string representing a component name. - """ - assert region[0] == 'hooked' - bucket = bucket_set.get(region[1]['bucket_id']) - assert not bucket or bucket.allocator_type == 'mmap' - - if not bucket: - return 'no-bucket', None - - stackfunction = bucket.symbolized_joined_stackfunction - stacksourcefile = bucket.symbolized_joined_stacksourcefile - sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) - - for rule in self._rules: - if (rule.allocator_type == 'mmap' and - (not rule.stackfunction_pattern or - rule.stackfunction_pattern.match(stackfunction)) and - (not rule.stacksourcefile_pattern or - rule.stacksourcefile_pattern.match(stacksourcefile)) and - (not rule.mappedpathname_pattern or - rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and - (not rule.mappedpermission_pattern or - rule.mappedpermission_pattern.match( - region[1]['vma']['readable'] + - region[1]['vma']['writable'] + - region[1]['vma']['executable'] + - region[1]['vma']['private'])) and - (not rule.sharedwith or - not pageframe or sharedwith in rule.sharedwith)): - return rule.name, bucket - - assert False - - def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): - """Finds a matching component which a given unhooked |region| belongs to. - - If |pageframe| is given, it considers memory sharing among processes. - - Args: - region: A tuple representing a memory region. - pageframe: A PageFrame object representing a pageframe maybe including - a pagecount. - group_pfn_counts: A dict mapping a PFN to the number of times the - the pageframe is mapped by the known "group (Chrome)" processes. - - Returns: - A string representing a component name. - """ - assert region[0] == 'unhooked' - sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) - - for rule in self._rules: - if (rule.allocator_type == 'unhooked' and - (not rule.mappedpathname_pattern or - rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and - (not rule.mappedpermission_pattern or - rule.mappedpermission_pattern.match( - region[1]['vma']['readable'] + - region[1]['vma']['writable'] + - region[1]['vma']['executable'] + - region[1]['vma']['private'])) and - (not rule.sharedwith or - not pageframe or sharedwith in rule.sharedwith)): - return rule.name - - assert False - - @staticmethod - def load(filename, filetype): - """Loads a policy file of |filename| in a |format|. - - Args: - filename: A filename to be loaded. - filetype: A string to specify a type of the file. Only 'json' is - supported for now. - - Returns: - A loaded Policy object. - """ - with open(os.path.join(BASE_PATH, filename)) as policy_f: - return Policy.parse(policy_f, filetype) - - @staticmethod - def parse(policy_f, filetype): - """Parses a policy file content in a |format|. - - Args: - policy_f: An IO object to be loaded. - filetype: A string to specify a type of the file. Only 'json' is - supported for now. - - Returns: - A loaded Policy object. - """ - if filetype == 'json': - return Policy._parse_json(policy_f) - else: - return None - - @staticmethod - def _parse_json(policy_f): - """Parses policy file in json format. - - A policy file contains component's names and their stacktrace pattern - written in regular expression. Those patterns are matched against each - symbols of each stacktraces in the order written in the policy file - - Args: - policy_f: A File/IO object to read. - - Returns: - A loaded policy object. - """ - policy = json.load(policy_f) - - rules = [] - for rule in policy['rules']: - stackfunction = rule.get('stackfunction') or rule.get('stacktrace') - stacksourcefile = rule.get('stacksourcefile') - rules.append(Rule( - rule['name'], - rule['allocator'], # allocator_type - stackfunction, - stacksourcefile, - rule['typeinfo'] if 'typeinfo' in rule else None, - rule.get('mappedpathname'), - rule.get('mappedpermission'), - rule.get('sharedwith'))) - - return Policy(rules, policy['version'], policy['components']) - - @staticmethod - def _categorize_pageframe(pageframe, group_pfn_counts): - """Categorizes a pageframe based on its sharing status. - - Returns: - 'private' if |pageframe| is not shared with other processes. 'group' - if |pageframe| is shared only with group (Chrome-related) processes. - 'others' if |pageframe| is shared with non-group processes. - """ - if not pageframe: - return 'private' - - if pageframe.pagecount: - if pageframe.pagecount == 1: - return 'private' - elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: - return 'group' - else: - return 'others' - else: - if pageframe.pfn in group_pfn_counts: - return 'group' - else: - return 'private' - - -class PolicySet(object): - """Represents a set of policies.""" - - def __init__(self, policy_directory): - self._policy_directory = policy_directory - - @staticmethod - def load(labels=None): - """Loads a set of policies via the "default policy directory". - - The "default policy directory" contains pairs of policies and their labels. - For example, a policy "policy.l0.json" is labeled "l0" in the default - policy directory "policies.json". - - All policies in the directory are loaded by default. Policies can be - limited by |labels|. - - Args: - labels: An array that contains policy labels to be loaded. - - Returns: - A PolicySet object. - """ - default_policy_directory = PolicySet._load_default_policy_directory() - if labels: - specified_policy_directory = {} - for label in labels: - if label in default_policy_directory: - specified_policy_directory[label] = default_policy_directory[label] - # TODO(dmikurube): Load an un-labeled policy file. - return PolicySet._load_policies(specified_policy_directory) - else: - return PolicySet._load_policies(default_policy_directory) - - def __len__(self): - return len(self._policy_directory) - - def __iter__(self): - for label in self._policy_directory: - yield label - - def __getitem__(self, label): - return self._policy_directory[label] - - @staticmethod - def _load_default_policy_directory(): - with open(POLICIES_JSON_PATH, mode='r') as policies_f: - default_policy_directory = json.load(policies_f) - return default_policy_directory - - @staticmethod - def _load_policies(directory): - LOGGER.info('Loading policy files.') - policies = {} - for label in directory: - LOGGER.info(' %s: %s' % (label, directory[label]['file'])) - loaded = Policy.load(directory[label]['file'], directory[label]['format']) - if loaded: - policies[label] = loaded - return PolicySet(policies) - - -class Bucket(object): - """Represents a bucket, which is a unit of memory block classification.""" - - def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name): - self._stacktrace = stacktrace - self._allocator_type = allocator_type - self._typeinfo = typeinfo - self._typeinfo_name = typeinfo_name - - self._symbolized_stackfunction = stacktrace - self._symbolized_joined_stackfunction = '' - self._symbolized_stacksourcefile = stacktrace - self._symbolized_joined_stacksourcefile = '' - self._symbolized_typeinfo = typeinfo_name - - self.component_cache = '' - - def __str__(self): - result = [] - result.append(self._allocator_type) - if self._symbolized_typeinfo == 'no typeinfo': - result.append('tno_typeinfo') - else: - result.append('t' + self._symbolized_typeinfo) - result.append('n' + self._typeinfo_name) - result.extend(['%s(@%s)' % (function, sourcefile) - for function, sourcefile - in zip(self._symbolized_stackfunction, - self._symbolized_stacksourcefile)]) - return ' '.join(result) - - def symbolize(self, symbol_mapping_cache): - """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. - - Args: - symbol_mapping_cache: A SymbolMappingCache object. - """ - # TODO(dmikurube): Fill explicitly with numbers if symbol not found. - self._symbolized_stackfunction = [ - symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) - for address in self._stacktrace] - self._symbolized_joined_stackfunction = ' '.join( - self._symbolized_stackfunction) - self._symbolized_stacksourcefile = [ - symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) - for address in self._stacktrace] - self._symbolized_joined_stacksourcefile = ' '.join( - self._symbolized_stacksourcefile) - if not self._typeinfo: - self._symbolized_typeinfo = 'no typeinfo' - else: - self._symbolized_typeinfo = symbol_mapping_cache.lookup( - TYPEINFO_SYMBOLS, self._typeinfo) - if not self._symbolized_typeinfo: - self._symbolized_typeinfo = 'no typeinfo' - - def clear_component_cache(self): - self.component_cache = '' - - @property - def stacktrace(self): - return self._stacktrace - - @property - def allocator_type(self): - return self._allocator_type - - @property - def typeinfo(self): - return self._typeinfo - - @property - def typeinfo_name(self): - return self._typeinfo_name - - @property - def symbolized_stackfunction(self): - return self._symbolized_stackfunction - - @property - def symbolized_joined_stackfunction(self): - return self._symbolized_joined_stackfunction - - @property - def symbolized_stacksourcefile(self): - return self._symbolized_stacksourcefile - - @property - def symbolized_joined_stacksourcefile(self): - return self._symbolized_joined_stacksourcefile - - @property - def symbolized_typeinfo(self): - return self._symbolized_typeinfo - - -class BucketSet(object): - """Represents a set of bucket.""" - def __init__(self): - self._buckets = {} - self._code_addresses = set() - self._typeinfo_addresses = set() - - def load(self, prefix): - """Loads all related bucket files. - - Args: - prefix: A prefix string for bucket file names. - """ - LOGGER.info('Loading bucket files.') - - n = 0 - skipped = 0 - while True: - path = '%s.%04d.buckets' % (prefix, n) - if not os.path.exists(path) or not os.stat(path).st_size: - if skipped > 10: - break - n += 1 - skipped += 1 - continue - LOGGER.info(' %s' % path) - with open(path, 'r') as f: - self._load_file(f) - n += 1 - skipped = 0 - - def _load_file(self, bucket_f): - for line in bucket_f: - words = line.split() - typeinfo = None - typeinfo_name = '' - stacktrace_begin = 2 - for index, word in enumerate(words): - if index < 2: - continue - if word[0] == 't': - typeinfo = int(word[1:], 16) - self._typeinfo_addresses.add(typeinfo) - elif word[0] == 'n': - typeinfo_name = word[1:] - else: - stacktrace_begin = index - break - stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] - for frame in stacktrace: - self._code_addresses.add(frame) - self._buckets[int(words[0])] = Bucket( - stacktrace, words[1], typeinfo, typeinfo_name) - - def __iter__(self): - for bucket_id, bucket_content in self._buckets.iteritems(): - yield bucket_id, bucket_content - - def __getitem__(self, bucket_id): - return self._buckets[bucket_id] - - def get(self, bucket_id): - return self._buckets.get(bucket_id) - - def symbolize(self, symbol_mapping_cache): - for bucket_content in self._buckets.itervalues(): - bucket_content.symbolize(symbol_mapping_cache) - - def clear_component_cache(self): - for bucket_content in self._buckets.itervalues(): - bucket_content.clear_component_cache() - - def iter_addresses(self, symbol_type): - if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: - for function in self._code_addresses: - yield function - else: - for function in self._typeinfo_addresses: - yield function - - -class PageFrame(object): - """Represents a pageframe and maybe its shared count.""" - def __init__(self, pfn, size, pagecount, start_truncated, end_truncated): - self._pfn = pfn - self._size = size - self._pagecount = pagecount - self._start_truncated = start_truncated - self._end_truncated = end_truncated - - def __str__(self): - result = str() - if self._start_truncated: - result += '<' - result += '%06x#%d' % (self._pfn, self._pagecount) - if self._end_truncated: - result += '>' - return result - - def __repr__(self): - return str(self) - - @staticmethod - def parse(encoded_pfn, size): - start = 0 - end = len(encoded_pfn) - end_truncated = False - if encoded_pfn.endswith('>'): - end = len(encoded_pfn) - 1 - end_truncated = True - pagecount_found = encoded_pfn.find('#') - pagecount = None - if pagecount_found >= 0: - encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end] - pagecount = struct.unpack( - '>I', '\x00' + encoded_pagecount.decode('base64'))[0] - end = pagecount_found - start_truncated = False - if encoded_pfn.startswith('<'): - start = 1 - start_truncated = True - - pfn = struct.unpack( - '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0] - - return PageFrame(pfn, size, pagecount, start_truncated, end_truncated) - - @property - def pfn(self): - return self._pfn - - @property - def size(self): - return self._size - - def set_size(self, size): - self._size = size - - @property - def pagecount(self): - return self._pagecount - - @property - def start_truncated(self): - return self._start_truncated - - @property - def end_truncated(self): - return self._end_truncated - - -class PFNCounts(object): - """Represents counts of PFNs in a process.""" - - _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') - - def __init__(self, path, modified_time): - matched = self._PATH_PATTERN.match(path) - if matched: - self._pid = int(matched.group(2)) - else: - self._pid = 0 - self._command_line = '' - self._pagesize = 4096 - self._path = path - self._pfn_meta = '' - self._pfnset = {} - self._reason = '' - self._time = modified_time - - @staticmethod - def load(path, log_header='Loading PFNs from a heap profile dump: '): - pfnset = PFNCounts(path, float(os.stat(path).st_mtime)) - LOGGER.info('%s%s' % (log_header, path)) - - with open(path, 'r') as pfnset_f: - pfnset.load_file(pfnset_f) - - return pfnset - - @property - def path(self): - return self._path - - @property - def pid(self): - return self._pid - - @property - def time(self): - return self._time - - @property - def reason(self): - return self._reason - - @property - def iter_pfn(self): - for pfn, count in self._pfnset.iteritems(): - yield pfn, count - - def load_file(self, pfnset_f): - prev_pfn_end_truncated = None - for line in pfnset_f: - line = line.strip() - if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'): - break - elif line.startswith('PF: '): - for encoded_pfn in line[3:].split(): - page_frame = PageFrame.parse(encoded_pfn, self._pagesize) - if page_frame.start_truncated and ( - not prev_pfn_end_truncated or - prev_pfn_end_truncated != page_frame.pfn): - LOGGER.error('Broken page frame number: %s.' % encoded_pfn) - self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1 - if page_frame.end_truncated: - prev_pfn_end_truncated = page_frame.pfn - else: - prev_pfn_end_truncated = None - elif line.startswith('PageSize: '): - self._pagesize = int(line[10:]) - elif line.startswith('PFN: '): - self._pfn_meta = line[5:] - elif line.startswith('PageFrame: '): - self._pfn_meta = line[11:] - elif line.startswith('Time: '): - self._time = float(line[6:]) - elif line.startswith('CommandLine: '): - self._command_line = line[13:] - elif line.startswith('Reason: '): - self._reason = line[8:] - - -class Dump(object): - """Represents a heap profile dump.""" - - _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') - - _HOOK_PATTERN = re.compile( - r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' - r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) - - _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' - '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') - _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' - '(?P<RESERVED>[0-9]+)') - - _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') - _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') - - _TIME_PATTERN_FORMAT = re.compile( - r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') - _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') - - def __init__(self, path, modified_time): - self._path = path - matched = self._PATH_PATTERN.match(path) - self._pid = int(matched.group(2)) - self._count = int(matched.group(3)) - self._time = modified_time - self._map = {} - self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) - self._stacktrace_lines = [] - self._global_stats = {} # used only in apply_policy - - self._run_id = '' - self._pagesize = 4096 - self._pageframe_length = 0 - self._pageframe_encoding = '' - self._has_pagecount = False - - self._version = '' - self._lines = [] - - @property - def path(self): - return self._path - - @property - def count(self): - return self._count - - @property - def time(self): - return self._time - - @property - def iter_map(self): - for region in sorted(self._map.iteritems()): - yield region[0], region[1] - - def iter_procmaps(self): - for begin, end, attr in self._map.iter_range(): - yield begin, end, attr - - @property - def iter_stacktrace(self): - for line in self._stacktrace_lines: - yield line - - def global_stat(self, name): - return self._global_stats[name] - - @property - def run_id(self): - return self._run_id - - @property - def pagesize(self): - return self._pagesize - - @property - def pageframe_length(self): - return self._pageframe_length - - @property - def pageframe_encoding(self): - return self._pageframe_encoding - - @property - def has_pagecount(self): - return self._has_pagecount - - @staticmethod - def load(path, log_header='Loading a heap profile dump: '): - """Loads a heap profile dump. - - Args: - path: A file path string to load. - log_header: A preceding string for log messages. - - Returns: - A loaded Dump object. - - Raises: - ParsingException for invalid heap profile dumps. - """ - dump = Dump(path, os.stat(path).st_mtime) - with open(path, 'r') as f: - dump.load_file(f, log_header) - return dump - - def load_file(self, f, log_header): - self._lines = [line for line in f - if line and not line.startswith('#')] - - try: - self._version, ln = self._parse_version() - self._parse_meta_information() - if self._version == DUMP_DEEP_6: - self._parse_mmap_list() - self._parse_global_stats() - self._extract_stacktrace_lines(ln) - except EmptyDumpException: - LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) - except ParsingException, e: - LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) - raise - else: - LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) - - def _parse_version(self): - """Parses a version string in self._lines. - - Returns: - A pair of (a string representing a version of the stacktrace dump, - and an integer indicating a line number next to the version string). - - Raises: - ParsingException for invalid dump versions. - """ - version = '' - - # Skip until an identifiable line. - headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') - if not self._lines: - raise EmptyDumpException('Empty heap dump file.') - (ln, found) = skip_while( - 0, len(self._lines), - lambda n: not self._lines[n].startswith(headers)) - if not found: - raise InvalidDumpException('No version header.') - - # Identify a version. - if self._lines[ln].startswith('heap profile: '): - version = self._lines[ln][13:].strip() - if version in (DUMP_DEEP_5, DUMP_DEEP_6): - (ln, _) = skip_while( - ln, len(self._lines), - lambda n: self._lines[n] != 'STACKTRACES:\n') - elif version in DUMP_DEEP_OBSOLETE: - raise ObsoleteDumpVersionException(version) - else: - raise InvalidDumpException('Invalid version: %s' % version) - elif self._lines[ln] == 'STACKTRACES:\n': - raise ObsoleteDumpVersionException(DUMP_DEEP_1) - elif self._lines[ln] == 'MMAP_STACKTRACES:\n': - raise ObsoleteDumpVersionException(DUMP_DEEP_2) - - return (version, ln) - - def _parse_global_stats(self): - """Parses lines in self._lines as global stats.""" - (ln, _) = skip_while( - 0, len(self._lines), - lambda n: self._lines[n] != 'GLOBAL_STATS:\n') - - global_stat_names = [ - 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', - 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', - 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', - 'nonprofiled-stack', 'nonprofiled-other', - 'profiled-mmap', 'profiled-malloc'] - - for prefix in global_stat_names: - (ln, _) = skip_while( - ln, len(self._lines), - lambda n: self._lines[n].split()[0] != prefix) - words = self._lines[ln].split() - self._global_stats[prefix + '_virtual'] = int(words[-2]) - self._global_stats[prefix + '_committed'] = int(words[-1]) - - def _parse_meta_information(self): - """Parses lines in self._lines for meta information.""" - (ln, found) = skip_while( - 0, len(self._lines), - lambda n: self._lines[n] != 'META:\n') - if not found: - return - ln += 1 - - while True: - if self._lines[ln].startswith('Time:'): - matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) - matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) - if matched_format: - self._time = time.mktime(datetime.datetime.strptime( - matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) - if matched_format.group(2): - self._time += float(matched_format.group(2)[1:]) / 1000.0 - elif matched_seconds: - self._time = float(matched_seconds.group(1)) - elif self._lines[ln].startswith('Reason:'): - pass # Nothing to do for 'Reason:' - elif self._lines[ln].startswith('PageSize: '): - self._pagesize = int(self._lines[ln][10:]) - elif self._lines[ln].startswith('CommandLine:'): - pass - elif (self._lines[ln].startswith('PageFrame: ') or - self._lines[ln].startswith('PFN: ')): - if self._lines[ln].startswith('PageFrame: '): - words = self._lines[ln][11:].split(',') - else: - words = self._lines[ln][5:].split(',') - for word in words: - if word == '24': - self._pageframe_length = 24 - elif word == 'Base64': - self._pageframe_encoding = 'base64' - elif word == 'PageCount': - self._has_pagecount = True - elif self._lines[ln].startswith('RunID: '): - self._run_id = self._lines[ln][7:].strip() - elif (self._lines[ln].startswith('MMAP_LIST:') or - self._lines[ln].startswith('GLOBAL_STATS:')): - # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. - break - else: - pass - ln += 1 - - def _parse_mmap_list(self): - """Parses lines in self._lines as a mmap list.""" - (ln, found) = skip_while( - 0, len(self._lines), - lambda n: self._lines[n] != 'MMAP_LIST:\n') - if not found: - return {} - - ln += 1 - self._map = {} - current_vma = {} - pageframe_list = [] - while True: - entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) - if entry: - current_vma = {} - for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): - for key, value in entry.as_dict().iteritems(): - attr[key] = value - current_vma[key] = value - ln += 1 - continue - - if self._lines[ln].startswith(' PF: '): - for pageframe in self._lines[ln][5:].split(): - pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) - ln += 1 - continue - - matched = self._HOOK_PATTERN.match(self._lines[ln]) - if not matched: - break - # 2: starting address - # 5: end address - # 7: hooked or unhooked - # 8: additional information - if matched.group(7) == 'hooked': - submatched = self._HOOKED_PATTERN.match(matched.group(8)) - if not submatched: - submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) - elif matched.group(7) == 'unhooked': - submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) - if not submatched: - submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) - else: - assert matched.group(7) in ['hooked', 'unhooked'] - - submatched_dict = submatched.groupdict() - region_info = { 'vma': current_vma } - if submatched_dict.get('TYPE'): - region_info['type'] = submatched_dict['TYPE'].strip() - if submatched_dict.get('COMMITTED'): - region_info['committed'] = int(submatched_dict['COMMITTED']) - if submatched_dict.get('RESERVED'): - region_info['reserved'] = int(submatched_dict['RESERVED']) - if submatched_dict.get('BUCKETID'): - region_info['bucket_id'] = int(submatched_dict['BUCKETID']) - - if matched.group(1) == '(': - start = current_vma['begin'] - else: - start = int(matched.group(2), 16) - if matched.group(4) == '(': - end = current_vma['end'] - else: - end = int(matched.group(5), 16) - - if pageframe_list and pageframe_list[0].start_truncated: - pageframe_list[0].set_size( - pageframe_list[0].size - start % self._pagesize) - if pageframe_list and pageframe_list[-1].end_truncated: - pageframe_list[-1].set_size( - pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) - region_info['pageframe'] = pageframe_list - pageframe_list = [] - - self._map[(start, end)] = (matched.group(7), region_info) - ln += 1 - - def _extract_stacktrace_lines(self, line_number): - """Extracts the position of stacktrace lines. - - Valid stacktrace lines are stored into self._stacktrace_lines. - - Args: - line_number: A line number to start parsing in lines. - - Raises: - ParsingException for invalid dump versions. - """ - if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): - (line_number, _) = skip_while( - line_number, len(self._lines), - lambda n: not self._lines[n].split()[0].isdigit()) - stacktrace_start = line_number - (line_number, _) = skip_while( - line_number, len(self._lines), - lambda n: self._check_stacktrace_line(self._lines[n])) - self._stacktrace_lines = self._lines[stacktrace_start:line_number] - - elif self._version in DUMP_DEEP_OBSOLETE: - raise ObsoleteDumpVersionException(self._version) - - else: - raise InvalidDumpException('Invalid version: %s' % self._version) - - @staticmethod - def _check_stacktrace_line(stacktrace_line): - """Checks if a given stacktrace_line is valid as stacktrace. - - Args: - stacktrace_line: A string to be checked. - - Returns: - True if the given stacktrace_line is valid. - """ - words = stacktrace_line.split() - if len(words) < BUCKET_ID + 1: - return False - if words[BUCKET_ID - 1] != '@': - return False - return True - - -class DumpList(object): - """Represents a sequence of heap profile dumps.""" - - def __init__(self, dump_list): - self._dump_list = dump_list - - @staticmethod - def load(path_list): - LOGGER.info('Loading heap dump profiles.') - dump_list = [] - for path in path_list: - dump_list.append(Dump.load(path, ' ')) - return DumpList(dump_list) - - def __len__(self): - return len(self._dump_list) - - def __iter__(self): - for dump in self._dump_list: - yield dump - - def __getitem__(self, index): - return self._dump_list[index] - - -class Unit(object): - """Represents a minimum unit of memory usage categorization. - - It is supposed to be inherited for some different spaces like the entire - virtual memory and malloc arena. Such different spaces are called "worlds" - in dmprof. (For example, the "vm" world and the "malloc" world.) - """ - def __init__(self, unit_id, size): - self._unit_id = unit_id - self._size = size - - @property - def unit_id(self): - return self._unit_id - - @property - def size(self): - return self._size - - -class VMUnit(Unit): - """Represents a Unit for a memory region on virtual memory.""" - def __init__(self, unit_id, committed, reserved, mmap, region, - pageframe=None, group_pfn_counts=None): - super(VMUnit, self).__init__(unit_id, committed) - self._reserved = reserved - self._mmap = mmap - self._region = region - self._pageframe = pageframe - self._group_pfn_counts = group_pfn_counts - - @property - def committed(self): - return self._size - - @property - def reserved(self): - return self._reserved - - @property - def mmap(self): - return self._mmap - - @property - def region(self): - return self._region - - @property - def pageframe(self): - return self._pageframe - - @property - def group_pfn_counts(self): - return self._group_pfn_counts - - -class MMapUnit(VMUnit): - """Represents a Unit for a mmap'ed region.""" - def __init__(self, unit_id, committed, reserved, region, bucket_set, - pageframe=None, group_pfn_counts=None): - super(MMapUnit, self).__init__(unit_id, committed, reserved, True, - region, pageframe, group_pfn_counts) - self._bucket_set = bucket_set - - def __repr__(self): - return str(self.region) - - @property - def bucket_set(self): - return self._bucket_set - - -class UnhookedUnit(VMUnit): - """Represents a Unit for a non-mmap'ed memory region on virtual memory.""" - def __init__(self, unit_id, committed, reserved, region, - pageframe=None, group_pfn_counts=None): - super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False, - region, pageframe, group_pfn_counts) - - def __repr__(self): - return str(self.region) - - -class MallocUnit(Unit): - """Represents a Unit for a malloc'ed memory block.""" - def __init__(self, unit_id, size, alloc_count, free_count, bucket): - super(MallocUnit, self).__init__(unit_id, size) - self._bucket = bucket - self._alloc_count = alloc_count - self._free_count = free_count - - def __repr__(self): - return str(self.bucket) - - @property - def bucket(self): - return self._bucket - - @property - def alloc_count(self): - return self._alloc_count - - @property - def free_count(self): - return self._free_count - - -class UnitSet(object): - """Represents an iterable set of Units.""" - def __init__(self, world): - self._units = {} - self._world = world - - def __repr__(self): - return str(self._units) - - def __iter__(self): - for unit_id in sorted(self._units): - yield self._units[unit_id] - - def append(self, unit, overwrite=False): - if not overwrite and unit.unit_id in self._units: - LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id)) - self._units[unit.unit_id] = unit - - -class AbstractRule(object): - """An abstract class for rules to be matched with units.""" - def __init__(self, dct): - self._name = dct['name'] - self._hidden = dct.get('hidden', False) - self._subworlds = dct.get('subworlds', []) - - def match(self, unit): - raise NotImplementedError() - - @property - def name(self): - return self._name - - @property - def hidden(self): - return self._hidden - - def iter_subworld(self): - for subworld in self._subworlds: - yield subworld - - -class VMRule(AbstractRule): - """Represents a Rule to match with virtual memory regions.""" - def __init__(self, dct): - super(VMRule, self).__init__(dct) - self._backtrace_function = dct.get('backtrace_function', None) - if self._backtrace_function: - self._backtrace_function = re.compile(self._backtrace_function) - self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) - if self._backtrace_sourcefile: - self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) - self._mmap = dct.get('mmap', None) - self._sharedwith = dct.get('sharedwith', []) - self._mapped_pathname = dct.get('mapped_pathname', None) - if self._mapped_pathname: - self._mapped_pathname = re.compile(self._mapped_pathname) - self._mapped_permission = dct.get('mapped_permission', None) - if self._mapped_permission: - self._mapped_permission = re.compile(self._mapped_permission) - - def __repr__(self): - result = cStringIO.StringIO() - result.write('{"%s"=>' % self._name) - attributes = [] - attributes.append('mmap: %s' % self._mmap) - if self._backtrace_function: - attributes.append('backtrace_function: "%s"' % - self._backtrace_function.pattern) - if self._sharedwith: - attributes.append('sharedwith: "%s"' % self._sharedwith) - if self._mapped_pathname: - attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern) - if self._mapped_permission: - attributes.append('mapped_permission: "%s"' % - self._mapped_permission.pattern) - result.write('%s}' % ', '.join(attributes)) - return result.getvalue() - - def match(self, unit): - if unit.mmap: - assert unit.region[0] == 'hooked' - bucket = unit.bucket_set.get(unit.region[1]['bucket_id']) - assert bucket - assert bucket.allocator_type == 'mmap' - - stackfunction = bucket.symbolized_joined_stackfunction - stacksourcefile = bucket.symbolized_joined_stacksourcefile - - # TODO(dmikurube): Support shared memory. - sharedwith = None - - if self._mmap == False: # (self._mmap == None) should go through. - return False - if (self._backtrace_function and - not self._backtrace_function.match(stackfunction)): - return False - if (self._backtrace_sourcefile and - not self._backtrace_sourcefile.match(stacksourcefile)): - return False - if (self._mapped_pathname and - not self._mapped_pathname.match(unit.region[1]['vma']['name'])): - return False - if (self._mapped_permission and - not self._mapped_permission.match( - unit.region[1]['vma']['readable'] + - unit.region[1]['vma']['writable'] + - unit.region[1]['vma']['executable'] + - unit.region[1]['vma']['private'])): - return False - if (self._sharedwith and - unit.pageframe and sharedwith not in self._sharedwith): - return False - - return True - - else: - assert unit.region[0] == 'unhooked' - - # TODO(dmikurube): Support shared memory. - sharedwith = None - - if self._mmap == True: # (self._mmap == None) should go through. - return False - if (self._mapped_pathname and - not self._mapped_pathname.match(unit.region[1]['vma']['name'])): - return False - if (self._mapped_permission and - not self._mapped_permission.match( - unit.region[1]['vma']['readable'] + - unit.region[1]['vma']['writable'] + - unit.region[1]['vma']['executable'] + - unit.region[1]['vma']['private'])): - return False - if (self._sharedwith and - unit.pageframe and sharedwith not in self._sharedwith): - return False - - return True - - -class MallocRule(AbstractRule): - """Represents a Rule to match with malloc'ed blocks.""" - def __init__(self, dct): - super(MallocRule, self).__init__(dct) - self._backtrace_function = dct.get('backtrace_function', None) - if self._backtrace_function: - self._backtrace_function = re.compile(self._backtrace_function) - self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) - if self._backtrace_sourcefile: - self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) - self._typeinfo = dct.get('typeinfo', None) - if self._typeinfo: - self._typeinfo = re.compile(self._typeinfo) - - def __repr__(self): - result = cStringIO.StringIO() - result.write('{"%s"=>' % self._name) - attributes = [] - if self._backtrace_function: - attributes.append('backtrace_function: "%s"' % self._backtrace_function) - if self._typeinfo: - attributes.append('typeinfo: "%s"' % self._typeinfo) - result.write('%s}' % ', '.join(attributes)) - return result.getvalue() - - def match(self, unit): - assert unit.bucket.allocator_type == 'malloc' - - stackfunction = unit.bucket.symbolized_joined_stackfunction - stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile - typeinfo = unit.bucket.symbolized_typeinfo - if typeinfo.startswith('0x'): - typeinfo = unit.bucket.typeinfo_name - - return ((not self._backtrace_function or - self._backtrace_function.match(stackfunction)) and - (not self._backtrace_sourcefile or - self._backtrace_sourcefile.match(stacksourcefile)) and - (not self._typeinfo or self._typeinfo.match(typeinfo))) - - -class NoBucketMallocRule(MallocRule): - """Represents a Rule that small ignorable units match with.""" - def __init__(self): - super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'}) - self._no_bucket = True - - @property - def no_bucket(self): - return self._no_bucket - - -class AbstractSorter(object): - """An abstract class for classifying Units with a set of Rules.""" - def __init__(self, dct): - self._type = 'sorter' - self._version = dct['version'] - self._world = dct['world'] - self._name = dct['name'] - self._order = dct['order'] - - self._rules = [] - for rule in dct['rules']: - if dct['world'] == 'vm': - self._rules.append(VMRule(rule)) - elif dct['world'] == 'malloc': - self._rules.append(MallocRule(rule)) - else: - LOGGER.error('Unknown sorter world type') - - def __repr__(self): - result = cStringIO.StringIO() - result.write('world=%s' % self._world) - result.write('order=%s' % self._order) - result.write('rules:') - for rule in self._rules: - result.write(' %s' % rule) - return result.getvalue() - - @staticmethod - def load(filename): - with open(filename) as sorter_f: - sorter_dict = json.load(sorter_f) - if sorter_dict['world'] == 'vm': - return VMSorter(sorter_dict) - elif sorter_dict['world'] == 'malloc': - return MallocSorter(sorter_dict) - else: - LOGGER.error('Unknown sorter world type') - return None - - @property - def world(self): - return self._world - - @property - def name(self): - return self._name - - def find(self, unit): - raise NotImplementedError() - - def find_rule(self, name): - """Finds a rule whose name is |name|. """ - for rule in self._rules: - if rule.name == name: - return rule - return None - - -class VMSorter(AbstractSorter): - """Represents a Sorter for memory regions on virtual memory.""" - def __init__(self, dct): - assert dct['world'] == 'vm' - super(VMSorter, self).__init__(dct) - - def find(self, unit): - for rule in self._rules: - if rule.match(unit): - return rule - assert False - - -class MallocSorter(AbstractSorter): - """Represents a Sorter for malloc'ed blocks.""" - def __init__(self, dct): - assert dct['world'] == 'malloc' - super(MallocSorter, self).__init__(dct) - self._no_bucket_rule = NoBucketMallocRule() - - def find(self, unit): - if not unit.bucket: - return self._no_bucket_rule - assert unit.bucket.allocator_type == 'malloc' - - if unit.bucket.component_cache: - return unit.bucket.component_cache - - for rule in self._rules: - if rule.match(unit): - unit.bucket.component_cache = rule - return rule - assert False - - -class SorterSet(object): - """Represents an iterable set of Sorters.""" - def __init__(self, additional=None, default=None): - if not additional: - additional = [] - if not default: - default = DEFAULT_SORTERS - self._sorters = {} - for filename in default + additional: - sorter = AbstractSorter.load(filename) - if sorter.world not in self._sorters: - self._sorters[sorter.world] = [] - self._sorters[sorter.world].append(sorter) - - def __repr__(self): - result = cStringIO.StringIO() - result.write(self._sorters) - return result.getvalue() - - def __iter__(self): - for sorters in self._sorters.itervalues(): - for sorter in sorters: - yield sorter - - def iter_world(self, world): - for sorter in self._sorters.get(world, []): - yield sorter - - -class Command(object): - """Subclasses are a subcommand for this executable. - - See COMMANDS in main(). - """ - _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp'] - - def __init__(self, usage): - self._parser = optparse.OptionParser(usage) - - @staticmethod - def load_basic_files( - dump_path, multiple, no_dump=False, alternative_dirs=None): - prefix = Command._find_prefix(dump_path) - # If the target process is estimated to be working on Android, converts - # a path in the Android device to a path estimated to be corresponding in - # the host. Use --alternative-dirs to specify the conversion manually. - if not alternative_dirs: - alternative_dirs = Command._estimate_alternative_dirs(prefix) - if alternative_dirs: - for device, host in alternative_dirs.iteritems(): - LOGGER.info('Assuming %s on device as %s on host' % (device, host)) - symbol_data_sources = SymbolDataSources(prefix, alternative_dirs) - symbol_data_sources.prepare() - bucket_set = BucketSet() - bucket_set.load(prefix) - if not no_dump: - if multiple: - dump_list = DumpList.load(Command._find_all_dumps(dump_path)) - else: - dump = Dump.load(dump_path) - symbol_mapping_cache = SymbolMappingCache() - with open(prefix + '.cache.function', 'a+') as cache_f: - symbol_mapping_cache.update( - FUNCTION_SYMBOLS, bucket_set, - SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) - with open(prefix + '.cache.typeinfo', 'a+') as cache_f: - symbol_mapping_cache.update( - TYPEINFO_SYMBOLS, bucket_set, - SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) - with open(prefix + '.cache.sourcefile', 'a+') as cache_f: - symbol_mapping_cache.update( - SOURCEFILE_SYMBOLS, bucket_set, - SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) - bucket_set.symbolize(symbol_mapping_cache) - if no_dump: - return bucket_set - elif multiple: - return (bucket_set, dump_list) - else: - return (bucket_set, dump) - - @staticmethod - def _find_prefix(path): - return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) - - @staticmethod - def _estimate_alternative_dirs(prefix): - """Estimates a path in host from a corresponding path in target device. - - For Android, dmprof.py should find symbol information from binaries in - the host instead of the Android device because dmprof.py doesn't run on - the Android device. This method estimates a path in the host - corresponding to a path in the Android device. - - Returns: - A dict that maps a path in the Android device to a path in the host. - If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it - assumes the process was running on Android and maps the path to - "out/Debug/lib" in the Chromium directory. An empty dict is returned - unless Android. - """ - device_lib_path_candidates = set() - - with open(prefix + '.maps') as maps_f: - maps = proc_maps.ProcMaps.load(maps_f) - for entry in maps: - name = entry.as_dict()['name'] - if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]): - device_lib_path_candidates.add(os.path.dirname(name)) - - if len(device_lib_path_candidates) == 1: - return {device_lib_path_candidates.pop(): os.path.join( - CHROME_SRC_PATH, 'out', 'Debug', 'lib')} - else: - return {} - - @staticmethod - def _find_all_dumps(dump_path): - prefix = Command._find_prefix(dump_path) - dump_path_list = [dump_path] - - n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) - n += 1 - skipped = 0 - while True: - p = '%s.%04d.heap' % (prefix, n) - if os.path.exists(p) and os.stat(p).st_size: - dump_path_list.append(p) - else: - if skipped > 10: - break - skipped += 1 - n += 1 - - return dump_path_list - - @staticmethod - def _find_all_buckets(dump_path): - prefix = Command._find_prefix(dump_path) - bucket_path_list = [] - - n = 0 - while True: - path = '%s.%04d.buckets' % (prefix, n) - if not os.path.exists(path): - if n > 10: - break - n += 1 - continue - bucket_path_list.append(path) - n += 1 - - return bucket_path_list - - def _parse_args(self, sys_argv, required): - options, args = self._parser.parse_args(sys_argv) - if len(args) < required + 1: - self._parser.error('needs %d argument(s).\n' % required) - return None - return (options, args) - - @staticmethod - def _parse_policy_list(options_policy): - if options_policy: - return options_policy.split(',') - else: - return None - - -class BucketsCommand(Command): - def __init__(self): - super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>') - - def do(self, sys_argv, out=sys.stdout): - _, args = self._parse_args(sys_argv, 1) - dump_path = args[1] - bucket_set = Command.load_basic_files(dump_path, True, True) - - BucketsCommand._output(bucket_set, out) - return 0 - - @staticmethod - def _output(bucket_set, out): - """Prints all buckets with resolving symbols. - - Args: - bucket_set: A BucketSet object. - out: An IO object to output. - """ - for bucket_id, bucket in sorted(bucket_set): - out.write('%d: %s\n' % (bucket_id, bucket)) - - -class StacktraceCommand(Command): - def __init__(self): - super(StacktraceCommand, self).__init__( - 'Usage: %prog stacktrace <dump>') - - def do(self, sys_argv): - _, args = self._parse_args(sys_argv, 1) - dump_path = args[1] - (bucket_set, dump) = Command.load_basic_files(dump_path, False) - - StacktraceCommand._output(dump, bucket_set, sys.stdout) - return 0 - - @staticmethod - def _output(dump, bucket_set, out): - """Outputs a given stacktrace. - - Args: - bucket_set: A BucketSet object. - out: A file object to output. - """ - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if not bucket: - continue - for i in range(0, BUCKET_ID - 1): - out.write(words[i] + ' ') - for frame in bucket.symbolized_stackfunction: - out.write(frame + ' ') - out.write('\n') - - -class PolicyCommands(Command): - def __init__(self, command): - super(PolicyCommands, self).__init__( - 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' % - command) - self._parser.add_option('-p', '--policy', type='string', dest='policy', - help='profile with POLICY', metavar='POLICY') - self._parser.add_option('--alternative-dirs', dest='alternative_dirs', - metavar='/path/on/target@/path/on/host[:...]', - help='Read files in /path/on/host/ instead of ' - 'files in /path/on/target/.') - - def _set_up(self, sys_argv): - options, args = self._parse_args(sys_argv, 1) - dump_path = args[1] - shared_first_dump_paths = args[2:] - alternative_dirs_dict = {} - if options.alternative_dirs: - for alternative_dir_pair in options.alternative_dirs.split(':'): - target_path, host_path = alternative_dir_pair.split('@', 1) - alternative_dirs_dict[target_path] = host_path - (bucket_set, dumps) = Command.load_basic_files( - dump_path, True, alternative_dirs=alternative_dirs_dict) - - pfn_counts_dict = {} - for shared_first_dump_path in shared_first_dump_paths: - shared_dumps = Command._find_all_dumps(shared_first_dump_path) - for shared_dump in shared_dumps: - pfn_counts = PFNCounts.load(shared_dump) - if pfn_counts.pid not in pfn_counts_dict: - pfn_counts_dict[pfn_counts.pid] = [] - pfn_counts_dict[pfn_counts.pid].append(pfn_counts) - - policy_set = PolicySet.load(Command._parse_policy_list(options.policy)) - return policy_set, dumps, pfn_counts_dict, bucket_set - - @staticmethod - def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time): - """Aggregates the total memory size of each component. - - Iterate through all stacktraces and attribute them to one of the components - based on the policy. It is important to apply policy in right order. - - Args: - dump: A Dump object. - pfn_counts_dict: A dict mapping a pid to a list of PFNCounts. - policy: A Policy object. - bucket_set: A BucketSet object. - first_dump_time: An integer representing time when the first dump is - dumped. - - Returns: - A dict mapping components and their corresponding sizes. - """ - LOGGER.info(' %s' % dump.path) - all_pfn_dict = {} - if pfn_counts_dict: - LOGGER.info(' shared with...') - for pid, pfnset_list in pfn_counts_dict.iteritems(): - closest_pfnset_index = None - closest_pfnset_difference = 1024.0 - for index, pfnset in enumerate(pfnset_list): - time_difference = pfnset.time - dump.time - if time_difference >= 3.0: - break - elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or - (0.0 <= time_difference and time_difference < 3.0)): - closest_pfnset_index = index - closest_pfnset_difference = time_difference - elif time_difference < 0.0 and pfnset.reason == 'Exiting': - closest_pfnset_index = None - break - if closest_pfnset_index: - for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn: - all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count - LOGGER.info(' %s (time difference = %f)' % - (pfnset_list[closest_pfnset_index].path, - closest_pfnset_difference)) - else: - LOGGER.info(' (no match with pid:%d)' % pid) - - sizes = dict((c, 0) for c in policy.components) - - PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes) - verify_global_stats = PolicyCommands._accumulate_maps( - dump, all_pfn_dict, policy, bucket_set, sizes) - - # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed. - # http://crbug.com/245603. - for verify_key, verify_value in verify_global_stats.iteritems(): - dump_value = dump.global_stat('%s_committed' % verify_key) - if dump_value != verify_value: - LOGGER.warn('%25s: %12d != %d (%d)' % ( - verify_key, dump_value, verify_value, dump_value - verify_value)) - - sizes['mmap-no-log'] = ( - dump.global_stat('profiled-mmap_committed') - - sizes['mmap-total-log']) - sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') - sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') - - sizes['tc-no-log'] = ( - dump.global_stat('profiled-malloc_committed') - - sizes['tc-total-log']) - sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') - sizes['tc-unused'] = ( - sizes['mmap-tcmalloc'] - - dump.global_stat('profiled-malloc_committed')) - if sizes['tc-unused'] < 0: - LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' % - sizes['tc-unused']) - sizes['tc-unused'] = 0 - sizes['tc-total'] = sizes['mmap-tcmalloc'] - - # TODO(dmikurube): global_stat will be deprecated. - # See http://crbug.com/245603. - for key, value in { - 'total': 'total_committed', - 'filemapped': 'file_committed', - 'absent': 'absent_committed', - 'file-exec': 'file-exec_committed', - 'file-nonexec': 'file-nonexec_committed', - 'anonymous': 'anonymous_committed', - 'stack': 'stack_committed', - 'other': 'other_committed', - 'unhooked-absent': 'nonprofiled-absent_committed', - 'total-vm': 'total_virtual', - 'filemapped-vm': 'file_virtual', - 'anonymous-vm': 'anonymous_virtual', - 'other-vm': 'other_virtual' }.iteritems(): - if key in sizes: - sizes[key] = dump.global_stat(value) - - if 'mustbezero' in sizes: - removed_list = ( - 'profiled-mmap_committed', - 'nonprofiled-absent_committed', - 'nonprofiled-anonymous_committed', - 'nonprofiled-file-exec_committed', - 'nonprofiled-file-nonexec_committed', - 'nonprofiled-stack_committed', - 'nonprofiled-other_committed') - sizes['mustbezero'] = ( - dump.global_stat('total_committed') - - sum(dump.global_stat(removed) for removed in removed_list)) - if 'total-exclude-profiler' in sizes: - sizes['total-exclude-profiler'] = ( - dump.global_stat('total_committed') - - (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) - if 'hour' in sizes: - sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 - if 'minute' in sizes: - sizes['minute'] = (dump.time - first_dump_time) / 60.0 - if 'second' in sizes: - sizes['second'] = dump.time - first_dump_time - - return sizes - - @staticmethod - def _accumulate_malloc(dump, policy, bucket_set, sizes): - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if not bucket or bucket.allocator_type == 'malloc': - component_match = policy.find_malloc(bucket) - elif bucket.allocator_type == 'mmap': - continue - else: - assert False - sizes[component_match] += int(words[COMMITTED]) - - assert not component_match.startswith('mmap-') - if component_match.startswith('tc-'): - sizes['tc-total-log'] += int(words[COMMITTED]) - else: - sizes['other-total-log'] += int(words[COMMITTED]) - - @staticmethod - def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes): - # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed. - # http://crbug.com/245603. - global_stats = { - 'total': 0, - 'file-exec': 0, - 'file-nonexec': 0, - 'anonymous': 0, - 'stack': 0, - 'other': 0, - 'nonprofiled-file-exec': 0, - 'nonprofiled-file-nonexec': 0, - 'nonprofiled-anonymous': 0, - 'nonprofiled-stack': 0, - 'nonprofiled-other': 0, - 'profiled-mmap': 0, - } - - for key, value in dump.iter_map: - # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed. - # It's temporary verification code for transition described in - # http://crbug.com/245603. - committed = 0 - if 'committed' in value[1]: - committed = value[1]['committed'] - global_stats['total'] += committed - key = 'other' - name = value[1]['vma']['name'] - if name.startswith('/'): - if value[1]['vma']['executable'] == 'x': - key = 'file-exec' - else: - key = 'file-nonexec' - elif name == '[stack]': - key = 'stack' - elif name == '': - key = 'anonymous' - global_stats[key] += committed - if value[0] == 'unhooked': - global_stats['nonprofiled-' + key] += committed - if value[0] == 'hooked': - global_stats['profiled-mmap'] += committed - - if value[0] == 'unhooked': - if pfn_dict and dump.pageframe_length: - for pageframe in value[1]['pageframe']: - component_match = policy.find_unhooked(value, pageframe, pfn_dict) - sizes[component_match] += pageframe.size - else: - component_match = policy.find_unhooked(value) - sizes[component_match] += int(value[1]['committed']) - elif value[0] == 'hooked': - if pfn_dict and dump.pageframe_length: - for pageframe in value[1]['pageframe']: - component_match, _ = policy.find_mmap( - value, bucket_set, pageframe, pfn_dict) - sizes[component_match] += pageframe.size - assert not component_match.startswith('tc-') - if component_match.startswith('mmap-'): - sizes['mmap-total-log'] += pageframe.size - else: - sizes['other-total-log'] += pageframe.size - else: - component_match, _ = policy.find_mmap(value, bucket_set) - sizes[component_match] += int(value[1]['committed']) - if component_match.startswith('mmap-'): - sizes['mmap-total-log'] += int(value[1]['committed']) - else: - sizes['other-total-log'] += int(value[1]['committed']) - else: - LOGGER.error('Unrecognized mapping status: %s' % value[0]) - - return global_stats - - -class CSVCommand(PolicyCommands): - def __init__(self): - super(CSVCommand, self).__init__('csv') - - def do(self, sys_argv): - policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) - return CSVCommand._output( - policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) - - @staticmethod - def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): - max_components = 0 - for label in policy_set: - max_components = max(max_components, len(policy_set[label].components)) - - for label in sorted(policy_set): - components = policy_set[label].components - if len(policy_set) > 1: - out.write('%s%s\n' % (label, ',' * (max_components - 1))) - out.write('%s%s\n' % ( - ','.join(components), ',' * (max_components - len(components)))) - - LOGGER.info('Applying a policy %s to...' % label) - for dump in dumps: - component_sizes = PolicyCommands._apply_policy( - dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) - s = [] - for c in components: - if c in ('hour', 'minute', 'second'): - s.append('%05.5f' % (component_sizes[c])) - else: - s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) - out.write('%s%s\n' % ( - ','.join(s), ',' * (max_components - len(components)))) - - bucket_set.clear_component_cache() - - return 0 - - -class JSONCommand(PolicyCommands): - def __init__(self): - super(JSONCommand, self).__init__('json') - - def do(self, sys_argv): - policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) - return JSONCommand._output( - policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) - - @staticmethod - def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): - json_base = { - 'version': 'JSON_DEEP_2', - 'policies': {}, - } - - for label in sorted(policy_set): - json_base['policies'][label] = { - 'legends': policy_set[label].components, - 'snapshots': [], - } - - LOGGER.info('Applying a policy %s to...' % label) - for dump in dumps: - component_sizes = PolicyCommands._apply_policy( - dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) - component_sizes['dump_path'] = dump.path - component_sizes['dump_time'] = datetime.datetime.fromtimestamp( - dump.time).strftime('%Y-%m-%d %H:%M:%S') - json_base['policies'][label]['snapshots'].append(component_sizes) - - bucket_set.clear_component_cache() - - json.dump(json_base, out, indent=2, sort_keys=True) - - return 0 - - -class ListCommand(PolicyCommands): - def __init__(self): - super(ListCommand, self).__init__('list') - - def do(self, sys_argv): - policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) - return ListCommand._output( - policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) - - @staticmethod - def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): - for label in sorted(policy_set): - LOGGER.info('Applying a policy %s to...' % label) - for dump in dumps: - component_sizes = PolicyCommands._apply_policy( - dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time) - out.write('%s for %s:\n' % (label, dump.path)) - for c in policy_set[label].components: - if c in ['hour', 'minute', 'second']: - out.write('%40s %12.3f\n' % (c, component_sizes[c])) - else: - out.write('%40s %12d\n' % (c, component_sizes[c])) - - bucket_set.clear_component_cache() - - return 0 - - -class MapCommand(Command): - def __init__(self): - super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>') - - def do(self, sys_argv, out=sys.stdout): - _, args = self._parse_args(sys_argv, 2) - dump_path = args[1] - target_policy = args[2] - (bucket_set, dumps) = Command.load_basic_files(dump_path, True) - policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) - - MapCommand._output(dumps, bucket_set, policy_set[target_policy], out) - return 0 - - @staticmethod - def _output(dumps, bucket_set, policy, out): - """Prints all stacktraces in a given component of given depth. - - Args: - dumps: A list of Dump objects. - bucket_set: A BucketSet object. - policy: A Policy object. - out: An IO object to output. - """ - max_dump_count = 0 - range_dict = ExclusiveRangeDict(ListAttribute) - for dump in dumps: - max_dump_count = max(max_dump_count, dump.count) - for key, value in dump.iter_map: - for begin, end, attr in range_dict.iter_range(key[0], key[1]): - attr[dump.count] = value - - max_dump_count_digit = len(str(max_dump_count)) - for begin, end, attr in range_dict.iter_range(): - out.write('%x-%x\n' % (begin, end)) - if len(attr) < max_dump_count: - attr[max_dump_count] = None - for index, value in enumerate(attr[1:]): - out.write(' #%0*d: ' % (max_dump_count_digit, index + 1)) - if not value: - out.write('None\n') - elif value[0] == 'hooked': - component_match, _ = policy.find_mmap(value, bucket_set) - out.write('%s @ %d\n' % (component_match, value[1]['bucket_id'])) - else: - component_match = policy.find_unhooked(value) - region_info = value[1] - size = region_info['committed'] - out.write('%s [%d bytes] %s%s%s%s %s\n' % ( - component_match, size, value[1]['vma']['readable'], - value[1]['vma']['writable'], value[1]['vma']['executable'], - value[1]['vma']['private'], value[1]['vma']['name'])) - - -class ExpandCommand(Command): - def __init__(self): - super(ExpandCommand, self).__init__( - 'Usage: %prog expand <dump> <policy> <component> <depth>') - - def do(self, sys_argv): - _, args = self._parse_args(sys_argv, 4) - dump_path = args[1] - target_policy = args[2] - component_name = args[3] - depth = args[4] - (bucket_set, dump) = Command.load_basic_files(dump_path, False) - policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) - - ExpandCommand._output(dump, policy_set[target_policy], bucket_set, - component_name, int(depth), sys.stdout) - return 0 - - @staticmethod - def _output(dump, policy, bucket_set, component_name, depth, out): - """Prints all stacktraces in a given component of given depth. - - Args: - dump: A Dump object. - policy: A Policy object. - bucket_set: A BucketSet object. - component_name: A name of component for filtering. - depth: An integer representing depth to be printed. - out: An IO object to output. - """ - sizes = {} - - ExpandCommand._accumulate( - dump, policy, bucket_set, component_name, depth, sizes) - - sorted_sizes_list = sorted( - sizes.iteritems(), key=(lambda x: x[1]), reverse=True) - total = 0 - # TODO(dmikurube): Better formatting. - for size_pair in sorted_sizes_list: - out.write('%10d %s\n' % (size_pair[1], size_pair[0])) - total += size_pair[1] - LOGGER.info('total: %d\n' % total) - - @staticmethod - def _add_size(precedence, bucket, depth, committed, sizes): - stacktrace_sequence = precedence - for function, sourcefile in zip( - bucket.symbolized_stackfunction[ - 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], - bucket.symbolized_stacksourcefile[ - 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): - stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) - if not stacktrace_sequence in sizes: - sizes[stacktrace_sequence] = 0 - sizes[stacktrace_sequence] += committed - - @staticmethod - def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): - rule = policy.find_rule(component_name) - if not rule: - pass - elif rule.allocator_type == 'malloc': - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if not bucket or bucket.allocator_type == 'malloc': - component_match = policy.find_malloc(bucket) - elif bucket.allocator_type == 'mmap': - continue - else: - assert False - if component_match == component_name: - precedence = '' - precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT]) - precedence += '(free=%d) ' % int(words[FREE_COUNT]) - if bucket.typeinfo: - precedence += '(type=%s) ' % bucket.symbolized_typeinfo - precedence += '(type.name=%s) ' % bucket.typeinfo_name - ExpandCommand._add_size(precedence, bucket, depth, - int(words[COMMITTED]), sizes) - elif rule.allocator_type == 'mmap': - for _, region in dump.iter_map: - if region[0] != 'hooked': - continue - component_match, bucket = policy.find_mmap(region, bucket_set) - if component_match == component_name: - ExpandCommand._add_size('', bucket, depth, - region[1]['committed'], sizes) - - -class PProfCommand(Command): - def __init__(self): - super(PProfCommand, self).__init__( - 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') - self._parser.add_option('-c', '--component', type='string', - dest='component', - help='restrict to COMPONENT', metavar='COMPONENT') - - def do(self, sys_argv): - options, args = self._parse_args(sys_argv, 2) - - dump_path = args[1] - target_policy = args[2] - component = options.component - - (bucket_set, dump) = Command.load_basic_files(dump_path, False) - policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) - - with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: - maps_lines = maps_f.readlines() - PProfCommand._output( - dump, policy_set[target_policy], bucket_set, maps_lines, component, - sys.stdout) - - return 0 - - @staticmethod - def _output(dump, policy, bucket_set, maps_lines, component_name, out): - """Converts the heap profile dump so it can be processed by pprof. - - Args: - dump: A Dump object. - policy: A Policy object. - bucket_set: A BucketSet object. - maps_lines: A list of strings containing /proc/.../maps. - component_name: A name of component for filtering. - out: An IO object to output. - """ - out.write('heap profile: ') - com_committed, com_allocs = PProfCommand._accumulate( - dump, policy, bucket_set, component_name) - - out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( - com_allocs, com_committed, com_allocs, com_committed)) - - PProfCommand._output_stacktrace_lines( - dump, policy, bucket_set, component_name, out) - - out.write('MAPPED_LIBRARIES:\n') - for line in maps_lines: - out.write(line) - - @staticmethod - def _accumulate(dump, policy, bucket_set, component_name): - """Accumulates size of committed chunks and the number of allocated chunks. - - Args: - dump: A Dump object. - policy: A Policy object. - bucket_set: A BucketSet object. - component_name: A name of component for filtering. - - Returns: - Two integers which are the accumulated size of committed regions and the - number of allocated chunks, respectively. - """ - com_committed = 0 - com_allocs = 0 - - for _, region in dump.iter_map: - if region[0] != 'hooked': - continue - component_match, bucket = policy.find_mmap(region, bucket_set) - - if (component_name and component_name != component_match) or ( - region[1]['committed'] == 0): - continue - - com_committed += region[1]['committed'] - com_allocs += 1 - - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if not bucket or bucket.allocator_type == 'malloc': - component_match = policy.find_malloc(bucket) - elif bucket.allocator_type == 'mmap': - continue - else: - assert False - if (not bucket or - (component_name and component_name != component_match)): - continue - - com_committed += int(words[COMMITTED]) - com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) - - return com_committed, com_allocs - - @staticmethod - def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): - """Prints information of stacktrace lines for pprof. - - Args: - dump: A Dump object. - policy: A Policy object. - bucket_set: A BucketSet object. - component_name: A name of component for filtering. - out: An IO object to output. - """ - for _, region in dump.iter_map: - if region[0] != 'hooked': - continue - component_match, bucket = policy.find_mmap(region, bucket_set) - - if (component_name and component_name != component_match) or ( - region[1]['committed'] == 0): - continue - - out.write(' 1: %8s [ 1: %8s] @' % ( - region[1]['committed'], region[1]['committed'])) - for address in bucket.stacktrace: - out.write(' 0x%016x' % address) - out.write('\n') - - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if not bucket or bucket.allocator_type == 'malloc': - component_match = policy.find_malloc(bucket) - elif bucket.allocator_type == 'mmap': - continue - else: - assert False - if (not bucket or - (component_name and component_name != component_match)): - continue - - out.write('%6d: %8s [%6d: %8s] @' % ( - int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), - words[COMMITTED], - int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), - words[COMMITTED])) - for address in bucket.stacktrace: - out.write(' 0x%016x' % address) - out.write('\n') - - -class UploadCommand(Command): - def __init__(self): - super(UploadCommand, self).__init__( - 'Usage: %prog upload [--gsutil path/to/gsutil] ' - '<first-dump> <destination-gs-path>') - self._parser.add_option('--gsutil', default='gsutil', - help='path to GSUTIL', metavar='GSUTIL') - - def do(self, sys_argv): - options, args = self._parse_args(sys_argv, 2) - dump_path = args[1] - gs_path = args[2] - - dump_files = Command._find_all_dumps(dump_path) - bucket_files = Command._find_all_buckets(dump_path) - prefix = Command._find_prefix(dump_path) - symbol_data_sources = SymbolDataSources(prefix) - symbol_data_sources.prepare() - symbol_path = symbol_data_sources.path() - - handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof') - os.close(handle_zip) - - try: - file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED) - for filename in dump_files: - file_zip.write(filename, os.path.basename(os.path.abspath(filename))) - for filename in bucket_files: - file_zip.write(filename, os.path.basename(os.path.abspath(filename))) - - symbol_basename = os.path.basename(os.path.abspath(symbol_path)) - for filename in os.listdir(symbol_path): - if not filename.startswith('.'): - file_zip.write(os.path.join(symbol_path, filename), - os.path.join(symbol_basename, os.path.basename( - os.path.abspath(filename)))) - file_zip.close() - - returncode = UploadCommand._run_gsutil( - options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path) - finally: - os.remove(filename_zip) - - return returncode - - @staticmethod - def _run_gsutil(gsutil, *args): - """Run gsutil as a subprocess. - - Args: - *args: Arguments to pass to gsutil. The first argument should be an - operation such as ls, cp or cat. - Returns: - The return code from the process. - """ - command = [gsutil] + list(args) - LOGGER.info("Running: %s", command) - - try: - return subprocess.call(command) - except OSError, e: - LOGGER.error('Error to run gsutil: %s', e) - - -class CatCommand(Command): - def __init__(self): - super(CatCommand, self).__init__('Usage: %prog cat <first-dump>') - self._parser.add_option('--alternative-dirs', dest='alternative_dirs', - metavar='/path/on/target@/path/on/host[:...]', - help='Read files in /path/on/host/ instead of ' - 'files in /path/on/target/.') - self._parser.add_option('--indent', dest='indent', action='store_true', - help='Indent the output.') - - def do(self, sys_argv): - options, args = self._parse_args(sys_argv, 1) - dump_path = args[1] - # TODO(dmikurube): Support shared memory. - alternative_dirs_dict = {} - if options.alternative_dirs: - for alternative_dir_pair in options.alternative_dirs.split(':'): - target_path, host_path = alternative_dir_pair.split('@', 1) - alternative_dirs_dict[target_path] = host_path - (bucket_set, dumps) = Command.load_basic_files( - dump_path, True, alternative_dirs=alternative_dirs_dict) - - json_root = OrderedDict() - json_root['version'] = 1 - json_root['run_id'] = None - for dump in dumps: - if json_root['run_id'] and json_root['run_id'] != dump.run_id: - LOGGER.error('Inconsistent heap profile dumps.') - json_root['run_id'] = '' - break - json_root['run_id'] = dump.run_id - json_root['snapshots'] = [] - - # Load all sorters. - sorters = SorterSet() - - for dump in dumps: - json_root['snapshots'].append( - self._fill_snapshot(dump, bucket_set, sorters)) - - if options.indent: - json.dump(json_root, sys.stdout, indent=2) - else: - json.dump(json_root, sys.stdout) - print '' - - @staticmethod - def _fill_snapshot(dump, bucket_set, sorters): - root = OrderedDict() - root['time'] = dump.time - root['worlds'] = OrderedDict() - root['worlds']['vm'] = CatCommand._fill_world( - dump, bucket_set, sorters, 'vm') - root['worlds']['malloc'] = CatCommand._fill_world( - dump, bucket_set, sorters, 'malloc') - return root - - @staticmethod - def _fill_world(dump, bucket_set, sorters, world): - root = OrderedDict() - - root['name'] = 'world' - if world == 'vm': - root['unit_fields'] = ['committed', 'reserved'] - elif world == 'malloc': - root['unit_fields'] = ['size', 'alloc_count', 'free_count'] - - # Make { vm | malloc } units with their sizes. - root['units'] = OrderedDict() - unit_set = UnitSet(world) - if world == 'vm': - for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set): - unit_set.append(unit) - for unit in unit_set: - root['units'][unit.unit_id] = [unit.committed, unit.reserved] - elif world == 'malloc': - for unit in CatCommand._iterate_malloc_unit(dump, bucket_set): - unit_set.append(unit) - for unit in unit_set: - root['units'][unit.unit_id] = [ - unit.size, unit.alloc_count, unit.free_count] - - # Iterate for { vm | malloc } sorters. - root['breakdown'] = OrderedDict() - for sorter in sorters.iter_world(world): - breakdown = OrderedDict() - for unit in unit_set: - found = sorter.find(unit) - if found.name not in breakdown: - category = OrderedDict() - category['name'] = found.name - category['color'] = 'random' - subworlds = {} - for subworld in found.iter_subworld(): - subworlds[subworld] = False - if subworlds: - category['subworlds'] = subworlds - if found.hidden: - category['hidden'] = True - category['units'] = [] - breakdown[found.name] = category - breakdown[found.name]['units'].append(unit.unit_id) - root['breakdown'][sorter.name] = breakdown - - return root - - @staticmethod - def _iterate_vm_unit(dump, pfn_dict, bucket_set): - unit_id = 0 - for _, region in dump.iter_map: - unit_id += 1 - if region[0] == 'unhooked': - if pfn_dict and dump.pageframe_length: - for pageframe in region[1]['pageframe']: - yield UnhookedUnit(unit_id, pageframe.size, pageframe.size, - region, pageframe, pfn_dict) - else: - yield UnhookedUnit(unit_id, - int(region[1]['committed']), - int(region[1]['reserved']), - region) - elif region[0] == 'hooked': - if pfn_dict and dump.pageframe_length: - for pageframe in region[1]['pageframe']: - yield MMapUnit(unit_id, - pageframe.size, - pageframe.size, - region, bucket_set, pageframe, pfn_dict) - else: - yield MMapUnit(unit_id, - int(region[1]['committed']), - int(region[1]['reserved']), - region, - bucket_set) - else: - LOGGER.error('Unrecognized mapping status: %s' % region[0]) - - @staticmethod - def _iterate_malloc_unit(dump, bucket_set): - for line in dump.iter_stacktrace: - words = line.split() - bucket = bucket_set.get(int(words[BUCKET_ID])) - if bucket and bucket.allocator_type == 'malloc': - yield MallocUnit(int(words[BUCKET_ID]), - int(words[COMMITTED]), - int(words[ALLOC_COUNT]), - int(words[FREE_COUNT]), - bucket) - elif not bucket: - # 'Not-found' buckets are all assumed as malloc buckets. - yield MallocUnit(int(words[BUCKET_ID]), - int(words[COMMITTED]), - int(words[ALLOC_COUNT]), - int(words[FREE_COUNT]), - None) def main(): COMMANDS = { - 'buckets': BucketsCommand, - 'cat': CatCommand, - 'csv': CSVCommand, - 'expand': ExpandCommand, - 'json': JSONCommand, - 'list': ListCommand, - 'map': MapCommand, - 'pprof': PProfCommand, - 'stacktrace': StacktraceCommand, - 'upload': UploadCommand, + 'buckets': subcommands.BucketsCommand, + 'cat': subcommands.CatCommand, + 'csv': subcommands.CSVCommand, + 'expand': subcommands.ExpandCommand, + 'json': subcommands.JSONCommand, + 'list': subcommands.ListCommand, + 'map': subcommands.MapCommand, + 'pprof': subcommands.PProfCommand, + 'stacktrace': subcommands.StacktraceCommand, + 'upload': subcommands.UploadCommand, } if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): diff --git a/tools/deep_memory_profiler/lib/__init__.py b/tools/deep_memory_profiler/lib/__init__.py new file mode 100644 index 0000000..9228df8 --- /dev/null +++ b/tools/deep_memory_profiler/lib/__init__.py @@ -0,0 +1,3 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. diff --git a/tools/deep_memory_profiler/lib/bucket.py b/tools/deep_memory_profiler/lib/bucket.py new file mode 100644 index 0000000..51af5b2 --- /dev/null +++ b/tools/deep_memory_profiler/lib/bucket.py @@ -0,0 +1,191 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import os + +from lib.symbol import FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS, TYPEINFO_SYMBOLS + + +LOGGER = logging.getLogger('dmprof') + +# Indexes in dumped heap profile dumps. +VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _, BUCKET_ID = range(6) + + +class Bucket(object): + """Represents a bucket, which is a unit of memory block classification.""" + + def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name): + self._stacktrace = stacktrace + self._allocator_type = allocator_type + self._typeinfo = typeinfo + self._typeinfo_name = typeinfo_name + + self._symbolized_stackfunction = stacktrace + self._symbolized_joined_stackfunction = '' + self._symbolized_stacksourcefile = stacktrace + self._symbolized_joined_stacksourcefile = '' + self._symbolized_typeinfo = typeinfo_name + + self.component_cache = '' + + def __str__(self): + result = [] + result.append(self._allocator_type) + if self._symbolized_typeinfo == 'no typeinfo': + result.append('tno_typeinfo') + else: + result.append('t' + self._symbolized_typeinfo) + result.append('n' + self._typeinfo_name) + result.extend(['%s(@%s)' % (function, sourcefile) + for function, sourcefile + in zip(self._symbolized_stackfunction, + self._symbolized_stacksourcefile)]) + return ' '.join(result) + + def symbolize(self, symbol_mapping_cache): + """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. + + Args: + symbol_mapping_cache: A SymbolMappingCache object. + """ + # TODO(dmikurube): Fill explicitly with numbers if symbol not found. + self._symbolized_stackfunction = [ + symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) + for address in self._stacktrace] + self._symbolized_joined_stackfunction = ' '.join( + self._symbolized_stackfunction) + self._symbolized_stacksourcefile = [ + symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) + for address in self._stacktrace] + self._symbolized_joined_stacksourcefile = ' '.join( + self._symbolized_stacksourcefile) + if not self._typeinfo: + self._symbolized_typeinfo = 'no typeinfo' + else: + self._symbolized_typeinfo = symbol_mapping_cache.lookup( + TYPEINFO_SYMBOLS, self._typeinfo) + if not self._symbolized_typeinfo: + self._symbolized_typeinfo = 'no typeinfo' + + def clear_component_cache(self): + self.component_cache = '' + + @property + def stacktrace(self): + return self._stacktrace + + @property + def allocator_type(self): + return self._allocator_type + + @property + def typeinfo(self): + return self._typeinfo + + @property + def typeinfo_name(self): + return self._typeinfo_name + + @property + def symbolized_stackfunction(self): + return self._symbolized_stackfunction + + @property + def symbolized_joined_stackfunction(self): + return self._symbolized_joined_stackfunction + + @property + def symbolized_stacksourcefile(self): + return self._symbolized_stacksourcefile + + @property + def symbolized_joined_stacksourcefile(self): + return self._symbolized_joined_stacksourcefile + + @property + def symbolized_typeinfo(self): + return self._symbolized_typeinfo + + +class BucketSet(object): + """Represents a set of bucket.""" + def __init__(self): + self._buckets = {} + self._code_addresses = set() + self._typeinfo_addresses = set() + + def load(self, prefix): + """Loads all related bucket files. + + Args: + prefix: A prefix string for bucket file names. + """ + LOGGER.info('Loading bucket files.') + + n = 0 + skipped = 0 + while True: + path = '%s.%04d.buckets' % (prefix, n) + if not os.path.exists(path) or not os.stat(path).st_size: + if skipped > 10: + break + n += 1 + skipped += 1 + continue + LOGGER.info(' %s' % path) + with open(path, 'r') as f: + self._load_file(f) + n += 1 + skipped = 0 + + def _load_file(self, bucket_f): + for line in bucket_f: + words = line.split() + typeinfo = None + typeinfo_name = '' + stacktrace_begin = 2 + for index, word in enumerate(words): + if index < 2: + continue + if word[0] == 't': + typeinfo = int(word[1:], 16) + self._typeinfo_addresses.add(typeinfo) + elif word[0] == 'n': + typeinfo_name = word[1:] + else: + stacktrace_begin = index + break + stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] + for frame in stacktrace: + self._code_addresses.add(frame) + self._buckets[int(words[0])] = Bucket( + stacktrace, words[1], typeinfo, typeinfo_name) + + def __iter__(self): + for bucket_id, bucket_content in self._buckets.iteritems(): + yield bucket_id, bucket_content + + def __getitem__(self, bucket_id): + return self._buckets[bucket_id] + + def get(self, bucket_id): + return self._buckets.get(bucket_id) + + def symbolize(self, symbol_mapping_cache): + for bucket_content in self._buckets.itervalues(): + bucket_content.symbolize(symbol_mapping_cache) + + def clear_component_cache(self): + for bucket_content in self._buckets.itervalues(): + bucket_content.clear_component_cache() + + def iter_addresses(self, symbol_type): + if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: + for function in self._code_addresses: + yield function + else: + for function in self._typeinfo_addresses: + yield function diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py new file mode 100644 index 0000000..115979e --- /dev/null +++ b/tools/deep_memory_profiler/lib/dump.py @@ -0,0 +1,487 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import copy +import datetime +import logging +import os +import re +import time + +from lib.bucket import BUCKET_ID +from lib.exceptions import EmptyDumpException, InvalidDumpException +from lib.exceptions import ObsoleteDumpVersionException, ParsingException +from lib.pageframe import PageFrame +from lib.range_dict import ExclusiveRangeDict +from lib.symbol import proc_maps + + +LOGGER = logging.getLogger('dmprof') + + +# Heap Profile Dump versions + +# DUMP_DEEP_[1-4] are obsolete. +# DUMP_DEEP_2+ distinct mmap regions and malloc chunks. +# DUMP_DEEP_3+ don't include allocation functions in their stack dumps. +# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". +# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. +# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. +DUMP_DEEP_1 = 'DUMP_DEEP_1' +DUMP_DEEP_2 = 'DUMP_DEEP_2' +DUMP_DEEP_3 = 'DUMP_DEEP_3' +DUMP_DEEP_4 = 'DUMP_DEEP_4' + +DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) + +# DUMP_DEEP_5 doesn't separate sections for malloc and mmap. +# malloc and mmap are identified in bucket files. +# DUMP_DEEP_5 should be processed by POLICY_DEEP_4. +DUMP_DEEP_5 = 'DUMP_DEEP_5' + +# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. +DUMP_DEEP_6 = 'DUMP_DEEP_6' + + +class Dump(object): + """Represents a heap profile dump.""" + + _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') + + _HOOK_PATTERN = re.compile( + r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' + r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) + + _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' + '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') + _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' + '(?P<RESERVED>[0-9]+)') + + _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') + _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') + + _TIME_PATTERN_FORMAT = re.compile( + r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') + _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') + + def __init__(self, path, modified_time): + self._path = path + matched = self._PATH_PATTERN.match(path) + self._pid = int(matched.group(2)) + self._count = int(matched.group(3)) + self._time = modified_time + self._map = {} + self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) + self._stacktrace_lines = [] + self._global_stats = {} # used only in apply_policy + + self._run_id = '' + self._pagesize = 4096 + self._pageframe_length = 0 + self._pageframe_encoding = '' + self._has_pagecount = False + + self._version = '' + self._lines = [] + + @property + def path(self): + return self._path + + @property + def count(self): + return self._count + + @property + def time(self): + return self._time + + @property + def iter_map(self): + for region in sorted(self._map.iteritems()): + yield region[0], region[1] + + def iter_procmaps(self): + for begin, end, attr in self._map.iter_range(): + yield begin, end, attr + + @property + def iter_stacktrace(self): + for line in self._stacktrace_lines: + yield line + + def global_stat(self, name): + return self._global_stats[name] + + @property + def run_id(self): + return self._run_id + + @property + def pagesize(self): + return self._pagesize + + @property + def pageframe_length(self): + return self._pageframe_length + + @property + def pageframe_encoding(self): + return self._pageframe_encoding + + @property + def has_pagecount(self): + return self._has_pagecount + + @staticmethod + def load(path, log_header='Loading a heap profile dump: '): + """Loads a heap profile dump. + + Args: + path: A file path string to load. + log_header: A preceding string for log messages. + + Returns: + A loaded Dump object. + + Raises: + ParsingException for invalid heap profile dumps. + """ + dump = Dump(path, os.stat(path).st_mtime) + with open(path, 'r') as f: + dump.load_file(f, log_header) + return dump + + def load_file(self, f, log_header): + self._lines = [line for line in f + if line and not line.startswith('#')] + + try: + self._version, ln = self._parse_version() + self._parse_meta_information() + if self._version == DUMP_DEEP_6: + self._parse_mmap_list() + self._parse_global_stats() + self._extract_stacktrace_lines(ln) + except EmptyDumpException: + LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) + except ParsingException, e: + LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) + raise + else: + LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) + + def _parse_version(self): + """Parses a version string in self._lines. + + Returns: + A pair of (a string representing a version of the stacktrace dump, + and an integer indicating a line number next to the version string). + + Raises: + ParsingException for invalid dump versions. + """ + version = '' + + # Skip until an identifiable line. + headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') + if not self._lines: + raise EmptyDumpException('Empty heap dump file.') + (ln, found) = skip_while( + 0, len(self._lines), + lambda n: not self._lines[n].startswith(headers)) + if not found: + raise InvalidDumpException('No version header.') + + # Identify a version. + if self._lines[ln].startswith('heap profile: '): + version = self._lines[ln][13:].strip() + if version in (DUMP_DEEP_5, DUMP_DEEP_6): + (ln, _) = skip_while( + ln, len(self._lines), + lambda n: self._lines[n] != 'STACKTRACES:\n') + elif version in DUMP_DEEP_OBSOLETE: + raise ObsoleteDumpVersionException(version) + else: + raise InvalidDumpException('Invalid version: %s' % version) + elif self._lines[ln] == 'STACKTRACES:\n': + raise ObsoleteDumpVersionException(DUMP_DEEP_1) + elif self._lines[ln] == 'MMAP_STACKTRACES:\n': + raise ObsoleteDumpVersionException(DUMP_DEEP_2) + + return (version, ln) + + def _parse_global_stats(self): + """Parses lines in self._lines as global stats.""" + (ln, _) = skip_while( + 0, len(self._lines), + lambda n: self._lines[n] != 'GLOBAL_STATS:\n') + + global_stat_names = [ + 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', + 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', + 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', + 'nonprofiled-stack', 'nonprofiled-other', + 'profiled-mmap', 'profiled-malloc'] + + for prefix in global_stat_names: + (ln, _) = skip_while( + ln, len(self._lines), + lambda n: self._lines[n].split()[0] != prefix) + words = self._lines[ln].split() + self._global_stats[prefix + '_virtual'] = int(words[-2]) + self._global_stats[prefix + '_committed'] = int(words[-1]) + + def _parse_meta_information(self): + """Parses lines in self._lines for meta information.""" + (ln, found) = skip_while( + 0, len(self._lines), + lambda n: self._lines[n] != 'META:\n') + if not found: + return + ln += 1 + + while True: + if self._lines[ln].startswith('Time:'): + matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) + matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) + if matched_format: + self._time = time.mktime(datetime.datetime.strptime( + matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) + if matched_format.group(2): + self._time += float(matched_format.group(2)[1:]) / 1000.0 + elif matched_seconds: + self._time = float(matched_seconds.group(1)) + elif self._lines[ln].startswith('Reason:'): + pass # Nothing to do for 'Reason:' + elif self._lines[ln].startswith('PageSize: '): + self._pagesize = int(self._lines[ln][10:]) + elif self._lines[ln].startswith('CommandLine:'): + pass + elif (self._lines[ln].startswith('PageFrame: ') or + self._lines[ln].startswith('PFN: ')): + if self._lines[ln].startswith('PageFrame: '): + words = self._lines[ln][11:].split(',') + else: + words = self._lines[ln][5:].split(',') + for word in words: + if word == '24': + self._pageframe_length = 24 + elif word == 'Base64': + self._pageframe_encoding = 'base64' + elif word == 'PageCount': + self._has_pagecount = True + elif self._lines[ln].startswith('RunID: '): + self._run_id = self._lines[ln][7:].strip() + elif (self._lines[ln].startswith('MMAP_LIST:') or + self._lines[ln].startswith('GLOBAL_STATS:')): + # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. + break + else: + pass + ln += 1 + + def _parse_mmap_list(self): + """Parses lines in self._lines as a mmap list.""" + (ln, found) = skip_while( + 0, len(self._lines), + lambda n: self._lines[n] != 'MMAP_LIST:\n') + if not found: + return {} + + ln += 1 + self._map = {} + current_vma = {} + pageframe_list = [] + while True: + entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) + if entry: + current_vma = {} + for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): + for key, value in entry.as_dict().iteritems(): + attr[key] = value + current_vma[key] = value + ln += 1 + continue + + if self._lines[ln].startswith(' PF: '): + for pageframe in self._lines[ln][5:].split(): + pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) + ln += 1 + continue + + matched = self._HOOK_PATTERN.match(self._lines[ln]) + if not matched: + break + # 2: starting address + # 5: end address + # 7: hooked or unhooked + # 8: additional information + if matched.group(7) == 'hooked': + submatched = self._HOOKED_PATTERN.match(matched.group(8)) + if not submatched: + submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) + elif matched.group(7) == 'unhooked': + submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) + if not submatched: + submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) + else: + assert matched.group(7) in ['hooked', 'unhooked'] + + submatched_dict = submatched.groupdict() + region_info = { 'vma': current_vma } + if submatched_dict.get('TYPE'): + region_info['type'] = submatched_dict['TYPE'].strip() + if submatched_dict.get('COMMITTED'): + region_info['committed'] = int(submatched_dict['COMMITTED']) + if submatched_dict.get('RESERVED'): + region_info['reserved'] = int(submatched_dict['RESERVED']) + if submatched_dict.get('BUCKETID'): + region_info['bucket_id'] = int(submatched_dict['BUCKETID']) + + if matched.group(1) == '(': + start = current_vma['begin'] + else: + start = int(matched.group(2), 16) + if matched.group(4) == '(': + end = current_vma['end'] + else: + end = int(matched.group(5), 16) + + if pageframe_list and pageframe_list[0].start_truncated: + pageframe_list[0].set_size( + pageframe_list[0].size - start % self._pagesize) + if pageframe_list and pageframe_list[-1].end_truncated: + pageframe_list[-1].set_size( + pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) + region_info['pageframe'] = pageframe_list + pageframe_list = [] + + self._map[(start, end)] = (matched.group(7), region_info) + ln += 1 + + def _extract_stacktrace_lines(self, line_number): + """Extracts the position of stacktrace lines. + + Valid stacktrace lines are stored into self._stacktrace_lines. + + Args: + line_number: A line number to start parsing in lines. + + Raises: + ParsingException for invalid dump versions. + """ + if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): + (line_number, _) = skip_while( + line_number, len(self._lines), + lambda n: not self._lines[n].split()[0].isdigit()) + stacktrace_start = line_number + (line_number, _) = skip_while( + line_number, len(self._lines), + lambda n: self._check_stacktrace_line(self._lines[n])) + self._stacktrace_lines = self._lines[stacktrace_start:line_number] + + elif self._version in DUMP_DEEP_OBSOLETE: + raise ObsoleteDumpVersionException(self._version) + + else: + raise InvalidDumpException('Invalid version: %s' % self._version) + + @staticmethod + def _check_stacktrace_line(stacktrace_line): + """Checks if a given stacktrace_line is valid as stacktrace. + + Args: + stacktrace_line: A string to be checked. + + Returns: + True if the given stacktrace_line is valid. + """ + words = stacktrace_line.split() + if len(words) < BUCKET_ID + 1: + return False + if words[BUCKET_ID - 1] != '@': + return False + return True + + +class DumpList(object): + """Represents a sequence of heap profile dumps.""" + + def __init__(self, dump_list): + self._dump_list = dump_list + + @staticmethod + def load(path_list): + LOGGER.info('Loading heap dump profiles.') + dump_list = [] + for path in path_list: + dump_list.append(Dump.load(path, ' ')) + return DumpList(dump_list) + + def __len__(self): + return len(self._dump_list) + + def __iter__(self): + for dump in self._dump_list: + yield dump + + def __getitem__(self, index): + return self._dump_list[index] + + +class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): + """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" + _DUMMY_ENTRY = proc_maps.ProcMapsEntry( + 0, # begin + 0, # end + '-', # readable + '-', # writable + '-', # executable + '-', # private + 0, # offset + '00', # major + '00', # minor + 0, # inode + '' # name + ) + + def __init__(self): + super(ProcMapsEntryAttribute, self).__init__() + self._entry = self._DUMMY_ENTRY.as_dict() + + def __str__(self): + return str(self._entry) + + def __repr__(self): + return 'ProcMapsEntryAttribute' + str(self._entry) + + def __getitem__(self, key): + return self._entry[key] + + def __setitem__(self, key, value): + if key not in self._entry: + raise KeyError(key) + self._entry[key] = value + + def copy(self): + new_entry = ProcMapsEntryAttribute() + for key, value in self._entry.iteritems(): + new_entry[key] = copy.deepcopy(value) + return new_entry + + +def skip_while(index, max_index, skipping_condition): + """Increments |index| until |skipping_condition|(|index|) is False. + + Returns: + A pair of an integer indicating a line number after skipped, and a + boolean value which is True if found a line which skipping_condition + is False for. + """ + while skipping_condition(index): + index += 1 + if index >= max_index: + return index, False + return index, True diff --git a/tools/deep_memory_profiler/lib/exceptions.py b/tools/deep_memory_profiler/lib/exceptions.py new file mode 100644 index 0000000..2c68af7 --- /dev/null +++ b/tools/deep_memory_profiler/lib/exceptions.py @@ -0,0 +1,22 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +class EmptyDumpException(Exception): + def __str__(self): + return repr(self.args[0]) + + +class ParsingException(Exception): + def __str__(self): + return repr(self.args[0]) + + +class InvalidDumpException(ParsingException): + def __str__(self): + return "invalid heap profile dump: %s" % repr(self.args[0]) + + +class ObsoleteDumpVersionException(ParsingException): + def __str__(self): + return "obsolete heap profile dump version: %s" % repr(self.args[0]) diff --git a/tools/deep_memory_profiler/lib/ordered_dict.py b/tools/deep_memory_profiler/lib/ordered_dict.py new file mode 100644 index 0000000..f7d053b --- /dev/null +++ b/tools/deep_memory_profiler/lib/ordered_dict.py @@ -0,0 +1,19 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# TODO(dmikurube): Remove this file once Python 2.7 is required. + +import os +import sys + +try: + from collections import OrderedDict # pylint: disable=E0611,W0611 +except ImportError: + _BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + _SIMPLEJSON_PATH = os.path.join(_BASE_PATH, + os.pardir, + os.pardir, + 'third_party') + sys.path.insert(0, _SIMPLEJSON_PATH) + from simplejson import OrderedDict # pylint: disable=W0611 diff --git a/tools/deep_memory_profiler/lib/pageframe.py b/tools/deep_memory_profiler/lib/pageframe.py new file mode 100644 index 0000000..8722243 --- /dev/null +++ b/tools/deep_memory_profiler/lib/pageframe.py @@ -0,0 +1,163 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import os +import re +import struct + + +LOGGER = logging.getLogger('dmprof') + + +class PageFrame(object): + """Represents a pageframe and maybe its shared count.""" + def __init__(self, pfn, size, pagecount, start_truncated, end_truncated): + self._pfn = pfn + self._size = size + self._pagecount = pagecount + self._start_truncated = start_truncated + self._end_truncated = end_truncated + + def __str__(self): + result = str() + if self._start_truncated: + result += '<' + result += '%06x#%d' % (self._pfn, self._pagecount) + if self._end_truncated: + result += '>' + return result + + def __repr__(self): + return str(self) + + @staticmethod + def parse(encoded_pfn, size): + start = 0 + end = len(encoded_pfn) + end_truncated = False + if encoded_pfn.endswith('>'): + end = len(encoded_pfn) - 1 + end_truncated = True + pagecount_found = encoded_pfn.find('#') + pagecount = None + if pagecount_found >= 0: + encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end] + pagecount = struct.unpack( + '>I', '\x00' + encoded_pagecount.decode('base64'))[0] + end = pagecount_found + start_truncated = False + if encoded_pfn.startswith('<'): + start = 1 + start_truncated = True + + pfn = struct.unpack( + '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0] + + return PageFrame(pfn, size, pagecount, start_truncated, end_truncated) + + @property + def pfn(self): + return self._pfn + + @property + def size(self): + return self._size + + def set_size(self, size): + self._size = size + + @property + def pagecount(self): + return self._pagecount + + @property + def start_truncated(self): + return self._start_truncated + + @property + def end_truncated(self): + return self._end_truncated + + +class PFNCounts(object): + """Represents counts of PFNs in a process.""" + + _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') + + def __init__(self, path, modified_time): + matched = self._PATH_PATTERN.match(path) + if matched: + self._pid = int(matched.group(2)) + else: + self._pid = 0 + self._command_line = '' + self._pagesize = 4096 + self._path = path + self._pfn_meta = '' + self._pfnset = {} + self._reason = '' + self._time = modified_time + + @staticmethod + def load(path, log_header='Loading PFNs from a heap profile dump: '): + pfnset = PFNCounts(path, float(os.stat(path).st_mtime)) + LOGGER.info('%s%s' % (log_header, path)) + + with open(path, 'r') as pfnset_f: + pfnset.load_file(pfnset_f) + + return pfnset + + @property + def path(self): + return self._path + + @property + def pid(self): + return self._pid + + @property + def time(self): + return self._time + + @property + def reason(self): + return self._reason + + @property + def iter_pfn(self): + for pfn, count in self._pfnset.iteritems(): + yield pfn, count + + def load_file(self, pfnset_f): + prev_pfn_end_truncated = None + for line in pfnset_f: + line = line.strip() + if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'): + break + elif line.startswith('PF: '): + for encoded_pfn in line[3:].split(): + page_frame = PageFrame.parse(encoded_pfn, self._pagesize) + if page_frame.start_truncated and ( + not prev_pfn_end_truncated or + prev_pfn_end_truncated != page_frame.pfn): + LOGGER.error('Broken page frame number: %s.' % encoded_pfn) + self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1 + if page_frame.end_truncated: + prev_pfn_end_truncated = page_frame.pfn + else: + prev_pfn_end_truncated = None + elif line.startswith('PageSize: '): + self._pagesize = int(line[10:]) + elif line.startswith('PFN: '): + self._pfn_meta = line[5:] + elif line.startswith('PageFrame: '): + self._pfn_meta = line[11:] + elif line.startswith('Time: '): + self._time = float(line[6:]) + elif line.startswith('CommandLine: '): + self._command_line = line[13:] + elif line.startswith('Reason: '): + self._reason = line[8:] diff --git a/tools/deep_memory_profiler/lib/policy.py b/tools/deep_memory_profiler/lib/policy.py new file mode 100644 index 0000000..d7a3897 --- /dev/null +++ b/tools/deep_memory_profiler/lib/policy.py @@ -0,0 +1,404 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import logging +import os +import re + + +LOGGER = logging.getLogger('dmprof') + +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') + +# Heap Profile Policy versions + +# POLICY_DEEP_1 DOES NOT include allocation_type columns. +# mmap regions are distincted w/ mmap frames in the pattern column. +POLICY_DEEP_1 = 'POLICY_DEEP_1' + +# POLICY_DEEP_2 DOES include allocation_type columns. +# mmap regions are distincted w/ the allocation_type column. +POLICY_DEEP_2 = 'POLICY_DEEP_2' + +# POLICY_DEEP_3 is in JSON format. +POLICY_DEEP_3 = 'POLICY_DEEP_3' + +# POLICY_DEEP_3 contains typeinfo. +POLICY_DEEP_4 = 'POLICY_DEEP_4' + + +class Rule(object): + """Represents one matching rule in a policy file.""" + + def __init__(self, + name, + allocator_type, + stackfunction_pattern=None, + stacksourcefile_pattern=None, + typeinfo_pattern=None, + mappedpathname_pattern=None, + mappedpermission_pattern=None, + sharedwith=None): + self._name = name + self._allocator_type = allocator_type + + self._stackfunction_pattern = None + if stackfunction_pattern: + self._stackfunction_pattern = re.compile( + stackfunction_pattern + r'\Z') + + self._stacksourcefile_pattern = None + if stacksourcefile_pattern: + self._stacksourcefile_pattern = re.compile( + stacksourcefile_pattern + r'\Z') + + self._typeinfo_pattern = None + if typeinfo_pattern: + self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') + + self._mappedpathname_pattern = None + if mappedpathname_pattern: + self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') + + self._mappedpermission_pattern = None + if mappedpermission_pattern: + self._mappedpermission_pattern = re.compile( + mappedpermission_pattern + r'\Z') + + self._sharedwith = [] + if sharedwith: + self._sharedwith = sharedwith + + @property + def name(self): + return self._name + + @property + def allocator_type(self): + return self._allocator_type + + @property + def stackfunction_pattern(self): + return self._stackfunction_pattern + + @property + def stacksourcefile_pattern(self): + return self._stacksourcefile_pattern + + @property + def typeinfo_pattern(self): + return self._typeinfo_pattern + + @property + def mappedpathname_pattern(self): + return self._mappedpathname_pattern + + @property + def mappedpermission_pattern(self): + return self._mappedpermission_pattern + + @property + def sharedwith(self): + return self._sharedwith + + +class Policy(object): + """Represents a policy, a content of a policy file.""" + + def __init__(self, rules, version, components): + self._rules = rules + self._version = version + self._components = components + + @property + def rules(self): + return self._rules + + @property + def version(self): + return self._version + + @property + def components(self): + return self._components + + def find_rule(self, component_name): + """Finds a rule whose name is |component_name|. """ + for rule in self._rules: + if rule.name == component_name: + return rule + return None + + def find_malloc(self, bucket): + """Finds a matching component name which a given |bucket| belongs to. + + Args: + bucket: A Bucket object to be searched for. + + Returns: + A string representing a component name. + """ + assert not bucket or bucket.allocator_type == 'malloc' + + if not bucket: + return 'no-bucket' + if bucket.component_cache: + return bucket.component_cache + + stackfunction = bucket.symbolized_joined_stackfunction + stacksourcefile = bucket.symbolized_joined_stacksourcefile + typeinfo = bucket.symbolized_typeinfo + if typeinfo.startswith('0x'): + typeinfo = bucket.typeinfo_name + + for rule in self._rules: + if (rule.allocator_type == 'malloc' and + (not rule.stackfunction_pattern or + rule.stackfunction_pattern.match(stackfunction)) and + (not rule.stacksourcefile_pattern or + rule.stacksourcefile_pattern.match(stacksourcefile)) and + (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): + bucket.component_cache = rule.name + return rule.name + + assert False + + def find_mmap(self, region, bucket_set, + pageframe=None, group_pfn_counts=None): + """Finds a matching component which a given mmap |region| belongs to. + + It uses |bucket_set| to match with backtraces. If |pageframe| is given, + it considers memory sharing among processes. + + NOTE: Don't use Bucket's |component_cache| for mmap regions because they're + classified not only with bucket information (mappedpathname for example). + + Args: + region: A tuple representing a memory region. + bucket_set: A BucketSet object to look up backtraces. + pageframe: A PageFrame object representing a pageframe maybe including + a pagecount. + group_pfn_counts: A dict mapping a PFN to the number of times the + the pageframe is mapped by the known "group (Chrome)" processes. + + Returns: + A string representing a component name. + """ + assert region[0] == 'hooked' + bucket = bucket_set.get(region[1]['bucket_id']) + assert not bucket or bucket.allocator_type == 'mmap' + + if not bucket: + return 'no-bucket', None + + stackfunction = bucket.symbolized_joined_stackfunction + stacksourcefile = bucket.symbolized_joined_stacksourcefile + sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) + + for rule in self._rules: + if (rule.allocator_type == 'mmap' and + (not rule.stackfunction_pattern or + rule.stackfunction_pattern.match(stackfunction)) and + (not rule.stacksourcefile_pattern or + rule.stacksourcefile_pattern.match(stacksourcefile)) and + (not rule.mappedpathname_pattern or + rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and + (not rule.mappedpermission_pattern or + rule.mappedpermission_pattern.match( + region[1]['vma']['readable'] + + region[1]['vma']['writable'] + + region[1]['vma']['executable'] + + region[1]['vma']['private'])) and + (not rule.sharedwith or + not pageframe or sharedwith in rule.sharedwith)): + return rule.name, bucket + + assert False + + def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): + """Finds a matching component which a given unhooked |region| belongs to. + + If |pageframe| is given, it considers memory sharing among processes. + + Args: + region: A tuple representing a memory region. + pageframe: A PageFrame object representing a pageframe maybe including + a pagecount. + group_pfn_counts: A dict mapping a PFN to the number of times the + the pageframe is mapped by the known "group (Chrome)" processes. + + Returns: + A string representing a component name. + """ + assert region[0] == 'unhooked' + sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) + + for rule in self._rules: + if (rule.allocator_type == 'unhooked' and + (not rule.mappedpathname_pattern or + rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and + (not rule.mappedpermission_pattern or + rule.mappedpermission_pattern.match( + region[1]['vma']['readable'] + + region[1]['vma']['writable'] + + region[1]['vma']['executable'] + + region[1]['vma']['private'])) and + (not rule.sharedwith or + not pageframe or sharedwith in rule.sharedwith)): + return rule.name + + assert False + + @staticmethod + def load(filename, filetype): + """Loads a policy file of |filename| in a |format|. + + Args: + filename: A filename to be loaded. + filetype: A string to specify a type of the file. Only 'json' is + supported for now. + + Returns: + A loaded Policy object. + """ + with open(os.path.join(BASE_PATH, filename)) as policy_f: + return Policy.parse(policy_f, filetype) + + @staticmethod + def parse(policy_f, filetype): + """Parses a policy file content in a |format|. + + Args: + policy_f: An IO object to be loaded. + filetype: A string to specify a type of the file. Only 'json' is + supported for now. + + Returns: + A loaded Policy object. + """ + if filetype == 'json': + return Policy._parse_json(policy_f) + else: + return None + + @staticmethod + def _parse_json(policy_f): + """Parses policy file in json format. + + A policy file contains component's names and their stacktrace pattern + written in regular expression. Those patterns are matched against each + symbols of each stacktraces in the order written in the policy file + + Args: + policy_f: A File/IO object to read. + + Returns: + A loaded policy object. + """ + policy = json.load(policy_f) + + rules = [] + for rule in policy['rules']: + stackfunction = rule.get('stackfunction') or rule.get('stacktrace') + stacksourcefile = rule.get('stacksourcefile') + rules.append(Rule( + rule['name'], + rule['allocator'], # allocator_type + stackfunction, + stacksourcefile, + rule['typeinfo'] if 'typeinfo' in rule else None, + rule.get('mappedpathname'), + rule.get('mappedpermission'), + rule.get('sharedwith'))) + + return Policy(rules, policy['version'], policy['components']) + + @staticmethod + def _categorize_pageframe(pageframe, group_pfn_counts): + """Categorizes a pageframe based on its sharing status. + + Returns: + 'private' if |pageframe| is not shared with other processes. 'group' + if |pageframe| is shared only with group (Chrome-related) processes. + 'others' if |pageframe| is shared with non-group processes. + """ + if not pageframe: + return 'private' + + if pageframe.pagecount: + if pageframe.pagecount == 1: + return 'private' + elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: + return 'group' + else: + return 'others' + else: + if pageframe.pfn in group_pfn_counts: + return 'group' + else: + return 'private' + + +class PolicySet(object): + """Represents a set of policies.""" + + def __init__(self, policy_directory): + self._policy_directory = policy_directory + + @staticmethod + def load(labels=None): + """Loads a set of policies via the "default policy directory". + + The "default policy directory" contains pairs of policies and their labels. + For example, a policy "policy.l0.json" is labeled "l0" in the default + policy directory "policies.json". + + All policies in the directory are loaded by default. Policies can be + limited by |labels|. + + Args: + labels: An array that contains policy labels to be loaded. + + Returns: + A PolicySet object. + """ + default_policy_directory = PolicySet._load_default_policy_directory() + if labels: + specified_policy_directory = {} + for label in labels: + if label in default_policy_directory: + specified_policy_directory[label] = default_policy_directory[label] + # TODO(dmikurube): Load an un-labeled policy file. + return PolicySet._load_policies(specified_policy_directory) + else: + return PolicySet._load_policies(default_policy_directory) + + def __len__(self): + return len(self._policy_directory) + + def __iter__(self): + for label in self._policy_directory: + yield label + + def __getitem__(self, label): + return self._policy_directory[label] + + @staticmethod + def _load_default_policy_directory(): + with open(POLICIES_JSON_PATH, mode='r') as policies_f: + default_policy_directory = json.load(policies_f) + return default_policy_directory + + @staticmethod + def _load_policies(directory): + LOGGER.info('Loading policy files.') + policies = {} + for label in directory: + LOGGER.info(' %s: %s' % (label, directory[label]['file'])) + loaded = Policy.load(directory[label]['file'], directory[label]['format']) + if loaded: + policies[label] = loaded + return PolicySet(policies) diff --git a/tools/deep_memory_profiler/range_dict.py b/tools/deep_memory_profiler/lib/range_dict.py index 9acf8a6..565789d 100644 --- a/tools/deep_memory_profiler/range_dict.py +++ b/tools/deep_memory_profiler/lib/range_dict.py @@ -1,14 +1,14 @@ -# Copyright (c) 2013 The Chromium Authors. All rights reserved. +# Copyright 2013 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import os import sys -BASE_PATH = os.path.dirname(os.path.abspath(__file__)) -BINTREES_PATH = os.path.join( - BASE_PATH, os.pardir, os.pardir, 'third_party', 'bintrees') -sys.path.insert(0, BINTREES_PATH) +_BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +_BINTREES_PATH = os.path.join( + _BASE_PATH, os.pardir, os.pardir, 'third_party', 'bintrees') +sys.path.insert(0, _BINTREES_PATH) from bintrees import FastRBTree # pylint: disable=F0401 diff --git a/tools/deep_memory_profiler/lib/sorter.py b/tools/deep_memory_profiler/lib/sorter.py new file mode 100644 index 0000000..db50c70 --- /dev/null +++ b/tools/deep_memory_profiler/lib/sorter.py @@ -0,0 +1,443 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import cStringIO +import json +import logging +import os +import re + + +LOGGER = logging.getLogger('dmprof') + +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +DEFAULT_SORTERS = [ + os.path.join(BASE_PATH, 'sorter.malloc-component.json'), + os.path.join(BASE_PATH, 'sorter.malloc-type.json'), + os.path.join(BASE_PATH, 'sorter.vm-map.json'), + os.path.join(BASE_PATH, 'sorter.vm-sharing.json'), + ] + + +class Unit(object): + """Represents a minimum unit of memory usage categorization. + + It is supposed to be inherited for some different spaces like the entire + virtual memory and malloc arena. Such different spaces are called "worlds" + in dmprof. (For example, the "vm" world and the "malloc" world.) + """ + def __init__(self, unit_id, size): + self._unit_id = unit_id + self._size = size + + @property + def unit_id(self): + return self._unit_id + + @property + def size(self): + return self._size + + +class VMUnit(Unit): + """Represents a Unit for a memory region on virtual memory.""" + def __init__(self, unit_id, committed, reserved, mmap, region, + pageframe=None, group_pfn_counts=None): + super(VMUnit, self).__init__(unit_id, committed) + self._reserved = reserved + self._mmap = mmap + self._region = region + self._pageframe = pageframe + self._group_pfn_counts = group_pfn_counts + + @property + def committed(self): + return self._size + + @property + def reserved(self): + return self._reserved + + @property + def mmap(self): + return self._mmap + + @property + def region(self): + return self._region + + @property + def pageframe(self): + return self._pageframe + + @property + def group_pfn_counts(self): + return self._group_pfn_counts + + +class MMapUnit(VMUnit): + """Represents a Unit for a mmap'ed region.""" + def __init__(self, unit_id, committed, reserved, region, bucket_set, + pageframe=None, group_pfn_counts=None): + super(MMapUnit, self).__init__(unit_id, committed, reserved, True, + region, pageframe, group_pfn_counts) + self._bucket_set = bucket_set + + def __repr__(self): + return str(self.region) + + @property + def bucket_set(self): + return self._bucket_set + + +class UnhookedUnit(VMUnit): + """Represents a Unit for a non-mmap'ed memory region on virtual memory.""" + def __init__(self, unit_id, committed, reserved, region, + pageframe=None, group_pfn_counts=None): + super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False, + region, pageframe, group_pfn_counts) + + def __repr__(self): + return str(self.region) + + +class MallocUnit(Unit): + """Represents a Unit for a malloc'ed memory block.""" + def __init__(self, unit_id, size, alloc_count, free_count, bucket): + super(MallocUnit, self).__init__(unit_id, size) + self._bucket = bucket + self._alloc_count = alloc_count + self._free_count = free_count + + def __repr__(self): + return str(self.bucket) + + @property + def bucket(self): + return self._bucket + + @property + def alloc_count(self): + return self._alloc_count + + @property + def free_count(self): + return self._free_count + + +class UnitSet(object): + """Represents an iterable set of Units.""" + def __init__(self, world): + self._units = {} + self._world = world + + def __repr__(self): + return str(self._units) + + def __iter__(self): + for unit_id in sorted(self._units): + yield self._units[unit_id] + + def append(self, unit, overwrite=False): + if not overwrite and unit.unit_id in self._units: + LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id)) + self._units[unit.unit_id] = unit + + +class AbstractRule(object): + """An abstract class for rules to be matched with units.""" + def __init__(self, dct): + self._name = dct['name'] + self._hidden = dct.get('hidden', False) + self._subworlds = dct.get('subworlds', []) + + def match(self, unit): + raise NotImplementedError() + + @property + def name(self): + return self._name + + @property + def hidden(self): + return self._hidden + + def iter_subworld(self): + for subworld in self._subworlds: + yield subworld + + +class VMRule(AbstractRule): + """Represents a Rule to match with virtual memory regions.""" + def __init__(self, dct): + super(VMRule, self).__init__(dct) + self._backtrace_function = dct.get('backtrace_function', None) + if self._backtrace_function: + self._backtrace_function = re.compile(self._backtrace_function) + self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) + if self._backtrace_sourcefile: + self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) + self._mmap = dct.get('mmap', None) + self._sharedwith = dct.get('sharedwith', []) + self._mapped_pathname = dct.get('mapped_pathname', None) + if self._mapped_pathname: + self._mapped_pathname = re.compile(self._mapped_pathname) + self._mapped_permission = dct.get('mapped_permission', None) + if self._mapped_permission: + self._mapped_permission = re.compile(self._mapped_permission) + + def __repr__(self): + result = cStringIO.StringIO() + result.write('{"%s"=>' % self._name) + attributes = [] + attributes.append('mmap: %s' % self._mmap) + if self._backtrace_function: + attributes.append('backtrace_function: "%s"' % + self._backtrace_function.pattern) + if self._sharedwith: + attributes.append('sharedwith: "%s"' % self._sharedwith) + if self._mapped_pathname: + attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern) + if self._mapped_permission: + attributes.append('mapped_permission: "%s"' % + self._mapped_permission.pattern) + result.write('%s}' % ', '.join(attributes)) + return result.getvalue() + + def match(self, unit): + if unit.mmap: + assert unit.region[0] == 'hooked' + bucket = unit.bucket_set.get(unit.region[1]['bucket_id']) + assert bucket + assert bucket.allocator_type == 'mmap' + + stackfunction = bucket.symbolized_joined_stackfunction + stacksourcefile = bucket.symbolized_joined_stacksourcefile + + # TODO(dmikurube): Support shared memory. + sharedwith = None + + if self._mmap == False: # (self._mmap == None) should go through. + return False + if (self._backtrace_function and + not self._backtrace_function.match(stackfunction)): + return False + if (self._backtrace_sourcefile and + not self._backtrace_sourcefile.match(stacksourcefile)): + return False + if (self._mapped_pathname and + not self._mapped_pathname.match(unit.region[1]['vma']['name'])): + return False + if (self._mapped_permission and + not self._mapped_permission.match( + unit.region[1]['vma']['readable'] + + unit.region[1]['vma']['writable'] + + unit.region[1]['vma']['executable'] + + unit.region[1]['vma']['private'])): + return False + if (self._sharedwith and + unit.pageframe and sharedwith not in self._sharedwith): + return False + + return True + + else: + assert unit.region[0] == 'unhooked' + + # TODO(dmikurube): Support shared memory. + sharedwith = None + + if self._mmap == True: # (self._mmap == None) should go through. + return False + if (self._mapped_pathname and + not self._mapped_pathname.match(unit.region[1]['vma']['name'])): + return False + if (self._mapped_permission and + not self._mapped_permission.match( + unit.region[1]['vma']['readable'] + + unit.region[1]['vma']['writable'] + + unit.region[1]['vma']['executable'] + + unit.region[1]['vma']['private'])): + return False + if (self._sharedwith and + unit.pageframe and sharedwith not in self._sharedwith): + return False + + return True + + +class MallocRule(AbstractRule): + """Represents a Rule to match with malloc'ed blocks.""" + def __init__(self, dct): + super(MallocRule, self).__init__(dct) + self._backtrace_function = dct.get('backtrace_function', None) + if self._backtrace_function: + self._backtrace_function = re.compile(self._backtrace_function) + self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) + if self._backtrace_sourcefile: + self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) + self._typeinfo = dct.get('typeinfo', None) + if self._typeinfo: + self._typeinfo = re.compile(self._typeinfo) + + def __repr__(self): + result = cStringIO.StringIO() + result.write('{"%s"=>' % self._name) + attributes = [] + if self._backtrace_function: + attributes.append('backtrace_function: "%s"' % self._backtrace_function) + if self._typeinfo: + attributes.append('typeinfo: "%s"' % self._typeinfo) + result.write('%s}' % ', '.join(attributes)) + return result.getvalue() + + def match(self, unit): + assert unit.bucket.allocator_type == 'malloc' + + stackfunction = unit.bucket.symbolized_joined_stackfunction + stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile + typeinfo = unit.bucket.symbolized_typeinfo + if typeinfo.startswith('0x'): + typeinfo = unit.bucket.typeinfo_name + + return ((not self._backtrace_function or + self._backtrace_function.match(stackfunction)) and + (not self._backtrace_sourcefile or + self._backtrace_sourcefile.match(stacksourcefile)) and + (not self._typeinfo or self._typeinfo.match(typeinfo))) + + +class NoBucketMallocRule(MallocRule): + """Represents a Rule that small ignorable units match with.""" + def __init__(self): + super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'}) + self._no_bucket = True + + @property + def no_bucket(self): + return self._no_bucket + + +class AbstractSorter(object): + """An abstract class for classifying Units with a set of Rules.""" + def __init__(self, dct): + self._type = 'sorter' + self._version = dct['version'] + self._world = dct['world'] + self._name = dct['name'] + self._order = dct['order'] + + self._rules = [] + for rule in dct['rules']: + if dct['world'] == 'vm': + self._rules.append(VMRule(rule)) + elif dct['world'] == 'malloc': + self._rules.append(MallocRule(rule)) + else: + LOGGER.error('Unknown sorter world type') + + def __repr__(self): + result = cStringIO.StringIO() + result.write('world=%s' % self._world) + result.write('order=%s' % self._order) + result.write('rules:') + for rule in self._rules: + result.write(' %s' % rule) + return result.getvalue() + + @staticmethod + def load(filename): + with open(filename) as sorter_f: + sorter_dict = json.load(sorter_f) + if sorter_dict['world'] == 'vm': + return VMSorter(sorter_dict) + elif sorter_dict['world'] == 'malloc': + return MallocSorter(sorter_dict) + else: + LOGGER.error('Unknown sorter world type') + return None + + @property + def world(self): + return self._world + + @property + def name(self): + return self._name + + def find(self, unit): + raise NotImplementedError() + + def find_rule(self, name): + """Finds a rule whose name is |name|. """ + for rule in self._rules: + if rule.name == name: + return rule + return None + + +class VMSorter(AbstractSorter): + """Represents a Sorter for memory regions on virtual memory.""" + def __init__(self, dct): + assert dct['world'] == 'vm' + super(VMSorter, self).__init__(dct) + + def find(self, unit): + for rule in self._rules: + if rule.match(unit): + return rule + assert False + + +class MallocSorter(AbstractSorter): + """Represents a Sorter for malloc'ed blocks.""" + def __init__(self, dct): + assert dct['world'] == 'malloc' + super(MallocSorter, self).__init__(dct) + self._no_bucket_rule = NoBucketMallocRule() + + def find(self, unit): + if not unit.bucket: + return self._no_bucket_rule + assert unit.bucket.allocator_type == 'malloc' + + if unit.bucket.component_cache: + return unit.bucket.component_cache + + for rule in self._rules: + if rule.match(unit): + unit.bucket.component_cache = rule + return rule + assert False + + +class SorterSet(object): + """Represents an iterable set of Sorters.""" + def __init__(self, additional=None, default=None): + if not additional: + additional = [] + if not default: + default = DEFAULT_SORTERS + self._sorters = {} + for filename in default + additional: + sorter = AbstractSorter.load(filename) + if sorter.world not in self._sorters: + self._sorters[sorter.world] = [] + self._sorters[sorter.world].append(sorter) + + def __repr__(self): + result = cStringIO.StringIO() + result.write(self._sorters) + return result.getvalue() + + def __iter__(self): + for sorters in self._sorters.itervalues(): + for sorter in sorters: + yield sorter + + def iter_world(self, world): + for sorter in self._sorters.get(world, []): + yield sorter diff --git a/tools/deep_memory_profiler/lib/subcommand.py b/tools/deep_memory_profiler/lib/subcommand.py new file mode 100644 index 0000000..25416f6 --- /dev/null +++ b/tools/deep_memory_profiler/lib/subcommand.py @@ -0,0 +1,160 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import optparse +import os +import re + +from lib.bucket import BucketSet +from lib.dump import Dump, DumpList +from lib.symbol import SymbolDataSources, SymbolMappingCache, SymbolFinder +from lib.symbol import proc_maps +from lib.symbol import FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS, TYPEINFO_SYMBOLS + + +LOGGER = logging.getLogger('dmprof') + +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir) + + +class SubCommand(object): + """Subclasses are a subcommand for this executable. + + See COMMANDS in main() in dmprof.py. + """ + _DEVICE_BINDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp'] + + def __init__(self, usage): + self._parser = optparse.OptionParser(usage) + + @staticmethod + def load_basic_files( + dump_path, multiple, no_dump=False, alternative_dirs=None): + prefix = SubCommand._find_prefix(dump_path) + # If the target process is estimated to be working on Android, converts + # a path in the Android device to a path estimated to be corresponding in + # the host. Use --alternative-dirs to specify the conversion manually. + if not alternative_dirs: + alternative_dirs = SubCommand._estimate_alternative_dirs(prefix) + if alternative_dirs: + for device, host in alternative_dirs.iteritems(): + LOGGER.info('Assuming %s on device as %s on host' % (device, host)) + symbol_data_sources = SymbolDataSources(prefix, alternative_dirs) + symbol_data_sources.prepare() + bucket_set = BucketSet() + bucket_set.load(prefix) + if not no_dump: + if multiple: + dump_list = DumpList.load(SubCommand._find_all_dumps(dump_path)) + else: + dump = Dump.load(dump_path) + symbol_mapping_cache = SymbolMappingCache() + with open(prefix + '.cache.function', 'a+') as cache_f: + symbol_mapping_cache.update( + FUNCTION_SYMBOLS, bucket_set, + SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) + with open(prefix + '.cache.typeinfo', 'a+') as cache_f: + symbol_mapping_cache.update( + TYPEINFO_SYMBOLS, bucket_set, + SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) + with open(prefix + '.cache.sourcefile', 'a+') as cache_f: + symbol_mapping_cache.update( + SOURCEFILE_SYMBOLS, bucket_set, + SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) + bucket_set.symbolize(symbol_mapping_cache) + if no_dump: + return bucket_set + elif multiple: + return (bucket_set, dump_list) + else: + return (bucket_set, dump) + + @staticmethod + def _find_prefix(path): + return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) + + @staticmethod + def _estimate_alternative_dirs(prefix): + """Estimates a path in host from a corresponding path in target device. + + For Android, dmprof.py should find symbol information from binaries in + the host instead of the Android device because dmprof.py doesn't run on + the Android device. This method estimates a path in the host + corresponding to a path in the Android device. + + Returns: + A dict that maps a path in the Android device to a path in the host. + If a file in SubCommand._DEVICE_BINDIRS is found in /proc/maps, it + assumes the process was running on Android and maps the path to + "out/Debug/lib" in the Chromium directory. An empty dict is returned + unless Android. + """ + device_lib_path_candidates = set() + + with open(prefix + '.maps') as maps_f: + maps = proc_maps.ProcMaps.load(maps_f) + for entry in maps: + name = entry.as_dict()['name'] + if any([base_dir in name for base_dir in SubCommand._DEVICE_BINDIRS]): + device_lib_path_candidates.add(os.path.dirname(name)) + + if len(device_lib_path_candidates) == 1: + return {device_lib_path_candidates.pop(): os.path.join( + CHROME_SRC_PATH, 'out', 'Debug', 'lib')} + else: + return {} + + @staticmethod + def _find_all_dumps(dump_path): + prefix = SubCommand._find_prefix(dump_path) + dump_path_list = [dump_path] + + n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) + n += 1 + skipped = 0 + while True: + p = '%s.%04d.heap' % (prefix, n) + if os.path.exists(p) and os.stat(p).st_size: + dump_path_list.append(p) + else: + if skipped > 10: + break + skipped += 1 + n += 1 + + return dump_path_list + + @staticmethod + def _find_all_buckets(dump_path): + prefix = SubCommand._find_prefix(dump_path) + bucket_path_list = [] + + n = 0 + while True: + path = '%s.%04d.buckets' % (prefix, n) + if not os.path.exists(path): + if n > 10: + break + n += 1 + continue + bucket_path_list.append(path) + n += 1 + + return bucket_path_list + + def _parse_args(self, sys_argv, required): + options, args = self._parser.parse_args(sys_argv) + if len(args) < required + 1: + self._parser.error('needs %d argument(s).\n' % required) + return None + return (options, args) + + @staticmethod + def _parse_policy_list(options_policy): + if options_policy: + return options_policy.split(',') + else: + return None diff --git a/tools/deep_memory_profiler/lib/symbol.py b/tools/deep_memory_profiler/lib/symbol.py new file mode 100644 index 0000000..897d409 --- /dev/null +++ b/tools/deep_memory_profiler/lib/symbol.py @@ -0,0 +1,189 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import os +import sys + +_BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +_FIND_RUNTIME_SYMBOLS_PATH = os.path.join(_BASE_PATH, + os.pardir, + 'find_runtime_symbols') +sys.path.append(_FIND_RUNTIME_SYMBOLS_PATH) + +import find_runtime_symbols +import prepare_symbol_info +import proc_maps # pylint: disable=W0611 + +LOGGER = logging.getLogger('dmprof') + +FUNCTION_SYMBOLS = find_runtime_symbols.FUNCTION_SYMBOLS +SOURCEFILE_SYMBOLS = find_runtime_symbols.SOURCEFILE_SYMBOLS +TYPEINFO_SYMBOLS = find_runtime_symbols.TYPEINFO_SYMBOLS + + +class SymbolDataSources(object): + """Manages symbol data sources in a process. + + The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and + so on. They are collected into a directory '|prefix|.symmap' from the binary + files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. + + Binaries are not mandatory to profile. The prepared data sources work in + place of the binary even if the binary has been overwritten with another + binary. + + Note that loading the symbol data sources takes a long time. They are often + very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' + which caches actually used symbols. + """ + def __init__(self, prefix, alternative_dirs=None): + self._prefix = prefix + self._prepared_symbol_data_sources_path = None + self._loaded_symbol_data_sources = None + self._alternative_dirs = alternative_dirs or {} + + def prepare(self): + """Prepares symbol data sources by extracting mapping from a binary. + + The prepared symbol data sources are stored in a directory. The directory + name is stored in |self._prepared_symbol_data_sources_path|. + + Returns: + True if succeeded. + """ + LOGGER.info('Preparing symbol mapping...') + self._prepared_symbol_data_sources_path, used_tempdir = ( + prepare_symbol_info.prepare_symbol_info( + self._prefix + '.maps', + output_dir_path=self._prefix + '.symmap', + alternative_dirs=self._alternative_dirs, + use_tempdir=True, + use_source_file_name=True)) + if self._prepared_symbol_data_sources_path: + LOGGER.info(' Prepared symbol mapping.') + if used_tempdir: + LOGGER.warn(' Using a temporary directory for symbol mapping.') + LOGGER.warn(' Delete it by yourself.') + LOGGER.warn(' Or, move the directory by yourself to use it later.') + return True + else: + LOGGER.warn(' Failed to prepare symbol mapping.') + return False + + def get(self): + """Returns the prepared symbol data sources. + + Returns: + The prepared symbol data sources. None if failed. + """ + if not self._prepared_symbol_data_sources_path and not self.prepare(): + return None + if not self._loaded_symbol_data_sources: + LOGGER.info('Loading symbol mapping...') + self._loaded_symbol_data_sources = ( + find_runtime_symbols.RuntimeSymbolsInProcess.load( + self._prepared_symbol_data_sources_path)) + return self._loaded_symbol_data_sources + + def path(self): + """Returns the path of the prepared symbol data sources if possible.""" + if not self._prepared_symbol_data_sources_path and not self.prepare(): + return None + return self._prepared_symbol_data_sources_path + + +class SymbolFinder(object): + """Finds corresponding symbols from addresses. + + This class does only 'find()' symbols from a specified |address_list|. + It is introduced to make a finder mockable. + """ + def __init__(self, symbol_type, symbol_data_sources): + self._symbol_type = symbol_type + self._symbol_data_sources = symbol_data_sources + + def find(self, address_list): + return find_runtime_symbols.find_runtime_symbols( + self._symbol_type, self._symbol_data_sources.get(), address_list) + + +class SymbolMappingCache(object): + """Caches mapping from actually used addresses to symbols. + + 'update()' updates the cache from the original symbol data sources via + 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. + """ + def __init__(self): + self._symbol_mapping_caches = { + FUNCTION_SYMBOLS: {}, + SOURCEFILE_SYMBOLS: {}, + TYPEINFO_SYMBOLS: {}, + } + + def update(self, symbol_type, bucket_set, symbol_finder, cache_f): + """Updates symbol mapping cache on memory and in a symbol cache file. + + It reads cached symbol mapping from a symbol cache file |cache_f| if it + exists. Unresolved addresses are then resolved and added to the cache + both on memory and in the symbol cache file with using 'SymbolFinder'. + + A cache file is formatted as follows: + <Address> <Symbol> + <Address> <Symbol> + <Address> <Symbol> + ... + + Args: + symbol_type: A type of symbols to update. It should be one of + FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. + bucket_set: A BucketSet object. + symbol_finder: A SymbolFinder object to find symbols. + cache_f: A readable and writable IO object of the symbol cache file. + """ + cache_f.seek(0, os.SEEK_SET) + self._load(cache_f, symbol_type) + + unresolved_addresses = sorted( + address for address in bucket_set.iter_addresses(symbol_type) + if address not in self._symbol_mapping_caches[symbol_type]) + + if not unresolved_addresses: + LOGGER.info('No need to resolve any more addresses.') + return + + cache_f.seek(0, os.SEEK_END) + LOGGER.info('Loading %d unresolved addresses.' % + len(unresolved_addresses)) + symbol_dict = symbol_finder.find(unresolved_addresses) + + for address, symbol in symbol_dict.iteritems(): + stripped_symbol = symbol.strip() or '?' + self._symbol_mapping_caches[symbol_type][address] = stripped_symbol + cache_f.write('%x %s\n' % (address, stripped_symbol)) + + def lookup(self, symbol_type, address): + """Looks up a symbol for a given |address|. + + Args: + symbol_type: A type of symbols to update. It should be one of + FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. + address: An integer that represents an address. + + Returns: + A string that represents a symbol. + """ + return self._symbol_mapping_caches[symbol_type].get(address) + + def _load(self, cache_f, symbol_type): + try: + for line in cache_f: + items = line.rstrip().split(None, 1) + if len(items) == 1: + items.append('??') + self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] + LOGGER.info('Loaded %d entries from symbol cache.' % + len(self._symbol_mapping_caches[symbol_type])) + except IOError as e: + LOGGER.info('The symbol cache file is invalid: %s' % e) diff --git a/tools/deep_memory_profiler/subcommands/__init__.py b/tools/deep_memory_profiler/subcommands/__init__.py new file mode 100644 index 0000000..4fb29d0 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from subcommands.buckets import BucketsCommand +from subcommands.cat import CatCommand +from subcommands.expand import ExpandCommand +from subcommands.map import MapCommand +from subcommands.policies import CSVCommand +from subcommands.policies import JSONCommand +from subcommands.policies import ListCommand +from subcommands.pprof import PProfCommand +from subcommands.stacktrace import StacktraceCommand +from subcommands.upload import UploadCommand diff --git a/tools/deep_memory_profiler/subcommands/buckets.py b/tools/deep_memory_profiler/subcommands/buckets.py new file mode 100644 index 0000000..4ea8640 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/buckets.py @@ -0,0 +1,35 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import sys + +from lib.subcommand import SubCommand + + +LOGGER = logging.getLogger('dmprof') + + +class BucketsCommand(SubCommand): + def __init__(self): + super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>') + + def do(self, sys_argv, out=sys.stdout): + _, args = self._parse_args(sys_argv, 1) + dump_path = args[1] + bucket_set = SubCommand.load_basic_files(dump_path, True, True) + + BucketsCommand._output(bucket_set, out) + return 0 + + @staticmethod + def _output(bucket_set, out): + """Prints all buckets with resolving symbols. + + Args: + bucket_set: A BucketSet object. + out: An IO object to output. + """ + for bucket_id, bucket in sorted(bucket_set): + out.write('%d: %s\n' % (bucket_id, bucket)) diff --git a/tools/deep_memory_profiler/subcommands/cat.py b/tools/deep_memory_profiler/subcommands/cat.py new file mode 100644 index 0000000..b7783b9 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/cat.py @@ -0,0 +1,172 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import logging +import sys + +from lib.bucket import BUCKET_ID, COMMITTED, ALLOC_COUNT, FREE_COUNT +from lib.ordered_dict import OrderedDict +from lib.subcommand import SubCommand +from lib.sorter import MallocUnit, MMapUnit, SorterSet, UnhookedUnit, UnitSet + + +LOGGER = logging.getLogger('dmprof') + + +class CatCommand(SubCommand): + def __init__(self): + super(CatCommand, self).__init__('Usage: %prog cat <first-dump>') + self._parser.add_option('--alternative-dirs', dest='alternative_dirs', + metavar='/path/on/target@/path/on/host[:...]', + help='Read files in /path/on/host/ instead of ' + 'files in /path/on/target/.') + self._parser.add_option('--indent', dest='indent', action='store_true', + help='Indent the output.') + + def do(self, sys_argv): + options, args = self._parse_args(sys_argv, 1) + dump_path = args[1] + # TODO(dmikurube): Support shared memory. + alternative_dirs_dict = {} + if options.alternative_dirs: + for alternative_dir_pair in options.alternative_dirs.split(':'): + target_path, host_path = alternative_dir_pair.split('@', 1) + alternative_dirs_dict[target_path] = host_path + (bucket_set, dumps) = SubCommand.load_basic_files( + dump_path, True, alternative_dirs=alternative_dirs_dict) + + json_root = OrderedDict() + json_root['version'] = 1 + json_root['run_id'] = None + for dump in dumps: + if json_root['run_id'] and json_root['run_id'] != dump.run_id: + LOGGER.error('Inconsistent heap profile dumps.') + json_root['run_id'] = '' + break + json_root['run_id'] = dump.run_id + json_root['snapshots'] = [] + + # Load all sorters. + sorters = SorterSet() + + for dump in dumps: + json_root['snapshots'].append( + self._fill_snapshot(dump, bucket_set, sorters)) + + if options.indent: + json.dump(json_root, sys.stdout, indent=2) + else: + json.dump(json_root, sys.stdout) + print '' + + @staticmethod + def _fill_snapshot(dump, bucket_set, sorters): + root = OrderedDict() + root['time'] = dump.time + root['worlds'] = OrderedDict() + root['worlds']['vm'] = CatCommand._fill_world( + dump, bucket_set, sorters, 'vm') + root['worlds']['malloc'] = CatCommand._fill_world( + dump, bucket_set, sorters, 'malloc') + return root + + @staticmethod + def _fill_world(dump, bucket_set, sorters, world): + root = OrderedDict() + + root['name'] = 'world' + if world == 'vm': + root['unit_fields'] = ['committed', 'reserved'] + elif world == 'malloc': + root['unit_fields'] = ['size', 'alloc_count', 'free_count'] + + # Make { vm | malloc } units with their sizes. + root['units'] = OrderedDict() + unit_set = UnitSet(world) + if world == 'vm': + for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set): + unit_set.append(unit) + for unit in unit_set: + root['units'][unit.unit_id] = [unit.committed, unit.reserved] + elif world == 'malloc': + for unit in CatCommand._iterate_malloc_unit(dump, bucket_set): + unit_set.append(unit) + for unit in unit_set: + root['units'][unit.unit_id] = [ + unit.size, unit.alloc_count, unit.free_count] + + # Iterate for { vm | malloc } sorters. + root['breakdown'] = OrderedDict() + for sorter in sorters.iter_world(world): + breakdown = OrderedDict() + for unit in unit_set: + found = sorter.find(unit) + if found.name not in breakdown: + category = OrderedDict() + category['name'] = found.name + category['color'] = 'random' + subworlds = {} + for subworld in found.iter_subworld(): + subworlds[subworld] = False + if subworlds: + category['subworlds'] = subworlds + if found.hidden: + category['hidden'] = True + category['units'] = [] + breakdown[found.name] = category + breakdown[found.name]['units'].append(unit.unit_id) + root['breakdown'][sorter.name] = breakdown + + return root + + @staticmethod + def _iterate_vm_unit(dump, pfn_dict, bucket_set): + unit_id = 0 + for _, region in dump.iter_map: + unit_id += 1 + if region[0] == 'unhooked': + if pfn_dict and dump.pageframe_length: + for pageframe in region[1]['pageframe']: + yield UnhookedUnit(unit_id, pageframe.size, pageframe.size, + region, pageframe, pfn_dict) + else: + yield UnhookedUnit(unit_id, + int(region[1]['committed']), + int(region[1]['reserved']), + region) + elif region[0] == 'hooked': + if pfn_dict and dump.pageframe_length: + for pageframe in region[1]['pageframe']: + yield MMapUnit(unit_id, + pageframe.size, + pageframe.size, + region, bucket_set, pageframe, pfn_dict) + else: + yield MMapUnit(unit_id, + int(region[1]['committed']), + int(region[1]['reserved']), + region, + bucket_set) + else: + LOGGER.error('Unrecognized mapping status: %s' % region[0]) + + @staticmethod + def _iterate_malloc_unit(dump, bucket_set): + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if bucket and bucket.allocator_type == 'malloc': + yield MallocUnit(int(words[BUCKET_ID]), + int(words[COMMITTED]), + int(words[ALLOC_COUNT]), + int(words[FREE_COUNT]), + bucket) + elif not bucket: + # 'Not-found' buckets are all assumed as malloc buckets. + yield MallocUnit(int(words[BUCKET_ID]), + int(words[COMMITTED]), + int(words[ALLOC_COUNT]), + int(words[FREE_COUNT]), + None) diff --git a/tools/deep_memory_profiler/subcommands/expand.py b/tools/deep_memory_profiler/subcommands/expand.py new file mode 100644 index 0000000..4058a00 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/expand.py @@ -0,0 +1,104 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import sys + +from lib.bucket import BUCKET_ID, COMMITTED, ALLOC_COUNT, FREE_COUNT +from lib.policy import PolicySet +from lib.subcommand import SubCommand + + +LOGGER = logging.getLogger('dmprof') + + +class ExpandCommand(SubCommand): + def __init__(self): + super(ExpandCommand, self).__init__( + 'Usage: %prog expand <dump> <policy> <component> <depth>') + + def do(self, sys_argv): + _, args = self._parse_args(sys_argv, 4) + dump_path = args[1] + target_policy = args[2] + component_name = args[3] + depth = args[4] + (bucket_set, dump) = SubCommand.load_basic_files(dump_path, False) + policy_set = PolicySet.load(SubCommand._parse_policy_list(target_policy)) + + ExpandCommand._output(dump, policy_set[target_policy], bucket_set, + component_name, int(depth), sys.stdout) + return 0 + + @staticmethod + def _output(dump, policy, bucket_set, component_name, depth, out): + """Prints all stacktraces in a given component of given depth. + + Args: + dump: A Dump object. + policy: A Policy object. + bucket_set: A BucketSet object. + component_name: A name of component for filtering. + depth: An integer representing depth to be printed. + out: An IO object to output. + """ + sizes = {} + + ExpandCommand._accumulate( + dump, policy, bucket_set, component_name, depth, sizes) + + sorted_sizes_list = sorted( + sizes.iteritems(), key=(lambda x: x[1]), reverse=True) + total = 0 + # TODO(dmikurube): Better formatting. + for size_pair in sorted_sizes_list: + out.write('%10d %s\n' % (size_pair[1], size_pair[0])) + total += size_pair[1] + LOGGER.info('total: %d\n' % total) + + @staticmethod + def _add_size(precedence, bucket, depth, committed, sizes): + stacktrace_sequence = precedence + for function, sourcefile in zip( + bucket.symbolized_stackfunction[ + 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], + bucket.symbolized_stacksourcefile[ + 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): + stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) + if not stacktrace_sequence in sizes: + sizes[stacktrace_sequence] = 0 + sizes[stacktrace_sequence] += committed + + @staticmethod + def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): + rule = policy.find_rule(component_name) + if not rule: + pass + elif rule.allocator_type == 'malloc': + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if not bucket or bucket.allocator_type == 'malloc': + component_match = policy.find_malloc(bucket) + elif bucket.allocator_type == 'mmap': + continue + else: + assert False + if component_match == component_name: + precedence = '' + precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT]) + precedence += '(free=%d) ' % int(words[FREE_COUNT]) + if bucket.typeinfo: + precedence += '(type=%s) ' % bucket.symbolized_typeinfo + precedence += '(type.name=%s) ' % bucket.typeinfo_name + ExpandCommand._add_size(precedence, bucket, depth, + int(words[COMMITTED]), sizes) + elif rule.allocator_type == 'mmap': + for _, region in dump.iter_map: + if region[0] != 'hooked': + continue + component_match, bucket = policy.find_mmap(region, bucket_set) + if component_match == component_name: + ExpandCommand._add_size('', bucket, depth, + region[1]['committed'], sizes) diff --git a/tools/deep_memory_profiler/subcommands/map.py b/tools/deep_memory_profiler/subcommands/map.py new file mode 100644 index 0000000..2237d6f --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/map.py @@ -0,0 +1,102 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import copy +import logging +import sys + +from lib.range_dict import ExclusiveRangeDict +from lib.policy import PolicySet +from lib.subcommand import SubCommand + + +LOGGER = logging.getLogger('dmprof') + + +class MapCommand(SubCommand): + def __init__(self): + super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>') + + def do(self, sys_argv, out=sys.stdout): + _, args = self._parse_args(sys_argv, 2) + dump_path = args[1] + target_policy = args[2] + (bucket_set, dumps) = SubCommand.load_basic_files(dump_path, True) + policy_set = PolicySet.load(SubCommand._parse_policy_list(target_policy)) + + MapCommand._output(dumps, bucket_set, policy_set[target_policy], out) + return 0 + + @staticmethod + def _output(dumps, bucket_set, policy, out): + """Prints all stacktraces in a given component of given depth. + + Args: + dumps: A list of Dump objects. + bucket_set: A BucketSet object. + policy: A Policy object. + out: An IO object to output. + """ + max_dump_count = 0 + range_dict = ExclusiveRangeDict(ListAttribute) + for dump in dumps: + max_dump_count = max(max_dump_count, dump.count) + for key, value in dump.iter_map: + for begin, end, attr in range_dict.iter_range(key[0], key[1]): + attr[dump.count] = value + + max_dump_count_digit = len(str(max_dump_count)) + for begin, end, attr in range_dict.iter_range(): + out.write('%x-%x\n' % (begin, end)) + if len(attr) < max_dump_count: + attr[max_dump_count] = None + for index, value in enumerate(attr[1:]): + out.write(' #%0*d: ' % (max_dump_count_digit, index + 1)) + if not value: + out.write('None\n') + elif value[0] == 'hooked': + component_match, _ = policy.find_mmap(value, bucket_set) + out.write('%s @ %d\n' % (component_match, value[1]['bucket_id'])) + else: + component_match = policy.find_unhooked(value) + region_info = value[1] + size = region_info['committed'] + out.write('%s [%d bytes] %s%s%s%s %s\n' % ( + component_match, size, value[1]['vma']['readable'], + value[1]['vma']['writable'], value[1]['vma']['executable'], + value[1]['vma']['private'], value[1]['vma']['name'])) + + +class ListAttribute(ExclusiveRangeDict.RangeAttribute): + """Represents a list for an attribute in range_dict.ExclusiveRangeDict.""" + def __init__(self): + super(ListAttribute, self).__init__() + self._list = [] + + def __str__(self): + return str(self._list) + + def __repr__(self): + return 'ListAttribute' + str(self._list) + + def __len__(self): + return len(self._list) + + def __iter__(self): + for x in self._list: + yield x + + def __getitem__(self, index): + return self._list[index] + + def __setitem__(self, index, value): + if index >= len(self._list): + self._list.extend([None] * (index + 1 - len(self._list))) + self._list[index] = value + + def copy(self): + new_list = ListAttribute() + for index, item in enumerate(self._list): + new_list[index] = copy.deepcopy(item) + return new_list diff --git a/tools/deep_memory_profiler/subcommands/policies.py b/tools/deep_memory_profiler/subcommands/policies.py new file mode 100644 index 0000000..182959b --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/policies.py @@ -0,0 +1,375 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import datetime +import json +import logging +import sys + +from lib.bucket import BUCKET_ID, COMMITTED +from lib.pageframe import PFNCounts +from lib.policy import PolicySet +from lib.subcommand import SubCommand + + +LOGGER = logging.getLogger('dmprof') + + +class PolicyCommands(SubCommand): + def __init__(self, command): + super(PolicyCommands, self).__init__( + 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' % + command) + self._parser.add_option('-p', '--policy', type='string', dest='policy', + help='profile with POLICY', metavar='POLICY') + self._parser.add_option('--alternative-dirs', dest='alternative_dirs', + metavar='/path/on/target@/path/on/host[:...]', + help='Read files in /path/on/host/ instead of ' + 'files in /path/on/target/.') + + def _set_up(self, sys_argv): + options, args = self._parse_args(sys_argv, 1) + dump_path = args[1] + shared_first_dump_paths = args[2:] + alternative_dirs_dict = {} + if options.alternative_dirs: + for alternative_dir_pair in options.alternative_dirs.split(':'): + target_path, host_path = alternative_dir_pair.split('@', 1) + alternative_dirs_dict[target_path] = host_path + (bucket_set, dumps) = SubCommand.load_basic_files( + dump_path, True, alternative_dirs=alternative_dirs_dict) + + pfn_counts_dict = {} + for shared_first_dump_path in shared_first_dump_paths: + shared_dumps = SubCommand._find_all_dumps(shared_first_dump_path) + for shared_dump in shared_dumps: + pfn_counts = PFNCounts.load(shared_dump) + if pfn_counts.pid not in pfn_counts_dict: + pfn_counts_dict[pfn_counts.pid] = [] + pfn_counts_dict[pfn_counts.pid].append(pfn_counts) + + policy_set = PolicySet.load(SubCommand._parse_policy_list(options.policy)) + return policy_set, dumps, pfn_counts_dict, bucket_set + + @staticmethod + def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time): + """Aggregates the total memory size of each component. + + Iterate through all stacktraces and attribute them to one of the components + based on the policy. It is important to apply policy in right order. + + Args: + dump: A Dump object. + pfn_counts_dict: A dict mapping a pid to a list of PFNCounts. + policy: A Policy object. + bucket_set: A BucketSet object. + first_dump_time: An integer representing time when the first dump is + dumped. + + Returns: + A dict mapping components and their corresponding sizes. + """ + LOGGER.info(' %s' % dump.path) + all_pfn_dict = {} + if pfn_counts_dict: + LOGGER.info(' shared with...') + for pid, pfnset_list in pfn_counts_dict.iteritems(): + closest_pfnset_index = None + closest_pfnset_difference = 1024.0 + for index, pfnset in enumerate(pfnset_list): + time_difference = pfnset.time - dump.time + if time_difference >= 3.0: + break + elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or + (0.0 <= time_difference and time_difference < 3.0)): + closest_pfnset_index = index + closest_pfnset_difference = time_difference + elif time_difference < 0.0 and pfnset.reason == 'Exiting': + closest_pfnset_index = None + break + if closest_pfnset_index: + for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn: + all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count + LOGGER.info(' %s (time difference = %f)' % + (pfnset_list[closest_pfnset_index].path, + closest_pfnset_difference)) + else: + LOGGER.info(' (no match with pid:%d)' % pid) + + sizes = dict((c, 0) for c in policy.components) + + PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes) + verify_global_stats = PolicyCommands._accumulate_maps( + dump, all_pfn_dict, policy, bucket_set, sizes) + + # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed. + # http://crbug.com/245603. + for verify_key, verify_value in verify_global_stats.iteritems(): + dump_value = dump.global_stat('%s_committed' % verify_key) + if dump_value != verify_value: + LOGGER.warn('%25s: %12d != %d (%d)' % ( + verify_key, dump_value, verify_value, dump_value - verify_value)) + + sizes['mmap-no-log'] = ( + dump.global_stat('profiled-mmap_committed') - + sizes['mmap-total-log']) + sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') + sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') + + sizes['tc-no-log'] = ( + dump.global_stat('profiled-malloc_committed') - + sizes['tc-total-log']) + sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') + sizes['tc-unused'] = ( + sizes['mmap-tcmalloc'] - + dump.global_stat('profiled-malloc_committed')) + if sizes['tc-unused'] < 0: + LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' % + sizes['tc-unused']) + sizes['tc-unused'] = 0 + sizes['tc-total'] = sizes['mmap-tcmalloc'] + + # TODO(dmikurube): global_stat will be deprecated. + # See http://crbug.com/245603. + for key, value in { + 'total': 'total_committed', + 'filemapped': 'file_committed', + 'absent': 'absent_committed', + 'file-exec': 'file-exec_committed', + 'file-nonexec': 'file-nonexec_committed', + 'anonymous': 'anonymous_committed', + 'stack': 'stack_committed', + 'other': 'other_committed', + 'unhooked-absent': 'nonprofiled-absent_committed', + 'total-vm': 'total_virtual', + 'filemapped-vm': 'file_virtual', + 'anonymous-vm': 'anonymous_virtual', + 'other-vm': 'other_virtual' }.iteritems(): + if key in sizes: + sizes[key] = dump.global_stat(value) + + if 'mustbezero' in sizes: + removed_list = ( + 'profiled-mmap_committed', + 'nonprofiled-absent_committed', + 'nonprofiled-anonymous_committed', + 'nonprofiled-file-exec_committed', + 'nonprofiled-file-nonexec_committed', + 'nonprofiled-stack_committed', + 'nonprofiled-other_committed') + sizes['mustbezero'] = ( + dump.global_stat('total_committed') - + sum(dump.global_stat(removed) for removed in removed_list)) + if 'total-exclude-profiler' in sizes: + sizes['total-exclude-profiler'] = ( + dump.global_stat('total_committed') - + (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) + if 'hour' in sizes: + sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 + if 'minute' in sizes: + sizes['minute'] = (dump.time - first_dump_time) / 60.0 + if 'second' in sizes: + sizes['second'] = dump.time - first_dump_time + + return sizes + + @staticmethod + def _accumulate_malloc(dump, policy, bucket_set, sizes): + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if not bucket or bucket.allocator_type == 'malloc': + component_match = policy.find_malloc(bucket) + elif bucket.allocator_type == 'mmap': + continue + else: + assert False + sizes[component_match] += int(words[COMMITTED]) + + assert not component_match.startswith('mmap-') + if component_match.startswith('tc-'): + sizes['tc-total-log'] += int(words[COMMITTED]) + else: + sizes['other-total-log'] += int(words[COMMITTED]) + + @staticmethod + def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes): + # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed. + # http://crbug.com/245603. + global_stats = { + 'total': 0, + 'file-exec': 0, + 'file-nonexec': 0, + 'anonymous': 0, + 'stack': 0, + 'other': 0, + 'nonprofiled-file-exec': 0, + 'nonprofiled-file-nonexec': 0, + 'nonprofiled-anonymous': 0, + 'nonprofiled-stack': 0, + 'nonprofiled-other': 0, + 'profiled-mmap': 0, + } + + for key, value in dump.iter_map: + # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed. + # It's temporary verification code for transition described in + # http://crbug.com/245603. + committed = 0 + if 'committed' in value[1]: + committed = value[1]['committed'] + global_stats['total'] += committed + key = 'other' + name = value[1]['vma']['name'] + if name.startswith('/'): + if value[1]['vma']['executable'] == 'x': + key = 'file-exec' + else: + key = 'file-nonexec' + elif name == '[stack]': + key = 'stack' + elif name == '': + key = 'anonymous' + global_stats[key] += committed + if value[0] == 'unhooked': + global_stats['nonprofiled-' + key] += committed + if value[0] == 'hooked': + global_stats['profiled-mmap'] += committed + + if value[0] == 'unhooked': + if pfn_dict and dump.pageframe_length: + for pageframe in value[1]['pageframe']: + component_match = policy.find_unhooked(value, pageframe, pfn_dict) + sizes[component_match] += pageframe.size + else: + component_match = policy.find_unhooked(value) + sizes[component_match] += int(value[1]['committed']) + elif value[0] == 'hooked': + if pfn_dict and dump.pageframe_length: + for pageframe in value[1]['pageframe']: + component_match, _ = policy.find_mmap( + value, bucket_set, pageframe, pfn_dict) + sizes[component_match] += pageframe.size + assert not component_match.startswith('tc-') + if component_match.startswith('mmap-'): + sizes['mmap-total-log'] += pageframe.size + else: + sizes['other-total-log'] += pageframe.size + else: + component_match, _ = policy.find_mmap(value, bucket_set) + sizes[component_match] += int(value[1]['committed']) + if component_match.startswith('mmap-'): + sizes['mmap-total-log'] += int(value[1]['committed']) + else: + sizes['other-total-log'] += int(value[1]['committed']) + else: + LOGGER.error('Unrecognized mapping status: %s' % value[0]) + + return global_stats + + +class CSVCommand(PolicyCommands): + def __init__(self): + super(CSVCommand, self).__init__('csv') + + def do(self, sys_argv): + policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) + return CSVCommand._output( + policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) + + @staticmethod + def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): + max_components = 0 + for label in policy_set: + max_components = max(max_components, len(policy_set[label].components)) + + for label in sorted(policy_set): + components = policy_set[label].components + if len(policy_set) > 1: + out.write('%s%s\n' % (label, ',' * (max_components - 1))) + out.write('%s%s\n' % ( + ','.join(components), ',' * (max_components - len(components)))) + + LOGGER.info('Applying a policy %s to...' % label) + for dump in dumps: + component_sizes = PolicyCommands._apply_policy( + dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) + s = [] + for c in components: + if c in ('hour', 'minute', 'second'): + s.append('%05.5f' % (component_sizes[c])) + else: + s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) + out.write('%s%s\n' % ( + ','.join(s), ',' * (max_components - len(components)))) + + bucket_set.clear_component_cache() + + return 0 + + +class JSONCommand(PolicyCommands): + def __init__(self): + super(JSONCommand, self).__init__('json') + + def do(self, sys_argv): + policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) + return JSONCommand._output( + policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) + + @staticmethod + def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): + json_base = { + 'version': 'JSON_DEEP_2', + 'policies': {}, + } + + for label in sorted(policy_set): + json_base['policies'][label] = { + 'legends': policy_set[label].components, + 'snapshots': [], + } + + LOGGER.info('Applying a policy %s to...' % label) + for dump in dumps: + component_sizes = PolicyCommands._apply_policy( + dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) + component_sizes['dump_path'] = dump.path + component_sizes['dump_time'] = datetime.datetime.fromtimestamp( + dump.time).strftime('%Y-%m-%d %H:%M:%S') + json_base['policies'][label]['snapshots'].append(component_sizes) + + bucket_set.clear_component_cache() + + json.dump(json_base, out, indent=2, sort_keys=True) + + return 0 + + +class ListCommand(PolicyCommands): + def __init__(self): + super(ListCommand, self).__init__('list') + + def do(self, sys_argv): + policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) + return ListCommand._output( + policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) + + @staticmethod + def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): + for label in sorted(policy_set): + LOGGER.info('Applying a policy %s to...' % label) + for dump in dumps: + component_sizes = PolicyCommands._apply_policy( + dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time) + out.write('%s for %s:\n' % (label, dump.path)) + for c in policy_set[label].components: + if c in ['hour', 'minute', 'second']: + out.write('%40s %12.3f\n' % (c, component_sizes[c])) + else: + out.write('%40s %12d\n' % (c, component_sizes[c])) + + bucket_set.clear_component_cache() + + return 0 diff --git a/tools/deep_memory_profiler/subcommands/pprof.py b/tools/deep_memory_profiler/subcommands/pprof.py new file mode 100644 index 0000000..506d811 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/pprof.py @@ -0,0 +1,161 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import sys + +from lib.bucket import BUCKET_ID, COMMITTED, ALLOC_COUNT, FREE_COUNT +from lib.policy import PolicySet +from lib.subcommand import SubCommand + + +LOGGER = logging.getLogger('dmprof') + + +class PProfCommand(SubCommand): + def __init__(self): + super(PProfCommand, self).__init__( + 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') + self._parser.add_option('-c', '--component', type='string', + dest='component', + help='restrict to COMPONENT', metavar='COMPONENT') + + def do(self, sys_argv): + options, args = self._parse_args(sys_argv, 2) + + dump_path = args[1] + target_policy = args[2] + component = options.component + + (bucket_set, dump) = SubCommand.load_basic_files(dump_path, False) + policy_set = PolicySet.load(SubCommand._parse_policy_list(target_policy)) + + with open(SubCommand._find_prefix(dump_path) + '.maps', 'r') as maps_f: + maps_lines = maps_f.readlines() + PProfCommand._output( + dump, policy_set[target_policy], bucket_set, maps_lines, component, + sys.stdout) + + return 0 + + @staticmethod + def _output(dump, policy, bucket_set, maps_lines, component_name, out): + """Converts the heap profile dump so it can be processed by pprof. + + Args: + dump: A Dump object. + policy: A Policy object. + bucket_set: A BucketSet object. + maps_lines: A list of strings containing /proc/.../maps. + component_name: A name of component for filtering. + out: An IO object to output. + """ + out.write('heap profile: ') + com_committed, com_allocs = PProfCommand._accumulate( + dump, policy, bucket_set, component_name) + + out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( + com_allocs, com_committed, com_allocs, com_committed)) + + PProfCommand._output_stacktrace_lines( + dump, policy, bucket_set, component_name, out) + + out.write('MAPPED_LIBRARIES:\n') + for line in maps_lines: + out.write(line) + + @staticmethod + def _accumulate(dump, policy, bucket_set, component_name): + """Accumulates size of committed chunks and the number of allocated chunks. + + Args: + dump: A Dump object. + policy: A Policy object. + bucket_set: A BucketSet object. + component_name: A name of component for filtering. + + Returns: + Two integers which are the accumulated size of committed regions and the + number of allocated chunks, respectively. + """ + com_committed = 0 + com_allocs = 0 + + for _, region in dump.iter_map: + if region[0] != 'hooked': + continue + component_match, bucket = policy.find_mmap(region, bucket_set) + + if (component_name and component_name != component_match) or ( + region[1]['committed'] == 0): + continue + + com_committed += region[1]['committed'] + com_allocs += 1 + + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if not bucket or bucket.allocator_type == 'malloc': + component_match = policy.find_malloc(bucket) + elif bucket.allocator_type == 'mmap': + continue + else: + assert False + if (not bucket or + (component_name and component_name != component_match)): + continue + + com_committed += int(words[COMMITTED]) + com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) + + return com_committed, com_allocs + + @staticmethod + def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): + """Prints information of stacktrace lines for pprof. + + Args: + dump: A Dump object. + policy: A Policy object. + bucket_set: A BucketSet object. + component_name: A name of component for filtering. + out: An IO object to output. + """ + for _, region in dump.iter_map: + if region[0] != 'hooked': + continue + component_match, bucket = policy.find_mmap(region, bucket_set) + + if (component_name and component_name != component_match) or ( + region[1]['committed'] == 0): + continue + + out.write(' 1: %8s [ 1: %8s] @' % ( + region[1]['committed'], region[1]['committed'])) + for address in bucket.stacktrace: + out.write(' 0x%016x' % address) + out.write('\n') + + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if not bucket or bucket.allocator_type == 'malloc': + component_match = policy.find_malloc(bucket) + elif bucket.allocator_type == 'mmap': + continue + else: + assert False + if (not bucket or + (component_name and component_name != component_match)): + continue + + out.write('%6d: %8s [%6d: %8s] @' % ( + int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), + words[COMMITTED], + int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), + words[COMMITTED])) + for address in bucket.stacktrace: + out.write(' 0x%016x' % address) + out.write('\n') diff --git a/tools/deep_memory_profiler/subcommands/stacktrace.py b/tools/deep_memory_profiler/subcommands/stacktrace.py new file mode 100644 index 0000000..72b8509 --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/stacktrace.py @@ -0,0 +1,41 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import sys + +from lib.bucket import BUCKET_ID +from lib.subcommand import SubCommand + + +class StacktraceCommand(SubCommand): + def __init__(self): + super(StacktraceCommand, self).__init__( + 'Usage: %prog stacktrace <dump>') + + def do(self, sys_argv): + _, args = self._parse_args(sys_argv, 1) + dump_path = args[1] + (bucket_set, dump) = SubCommand.load_basic_files(dump_path, False) + + StacktraceCommand._output(dump, bucket_set, sys.stdout) + return 0 + + @staticmethod + def _output(dump, bucket_set, out): + """Outputs a given stacktrace. + + Args: + bucket_set: A BucketSet object. + out: A file object to output. + """ + for line in dump.iter_stacktrace: + words = line.split() + bucket = bucket_set.get(int(words[BUCKET_ID])) + if not bucket: + continue + for i in range(0, BUCKET_ID - 1): + out.write(words[i] + ' ') + for frame in bucket.symbolized_stackfunction: + out.write(frame + ' ') + out.write('\n') diff --git a/tools/deep_memory_profiler/subcommands/upload.py b/tools/deep_memory_profiler/subcommands/upload.py new file mode 100644 index 0000000..de34a8a --- /dev/null +++ b/tools/deep_memory_profiler/subcommands/upload.py @@ -0,0 +1,79 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import os +import subprocess +import tempfile +import zipfile + +from lib.subcommand import SubCommand +from lib.symbol import SymbolDataSources + + +LOGGER = logging.getLogger('dmprof') + + +class UploadCommand(SubCommand): + def __init__(self): + super(UploadCommand, self).__init__( + 'Usage: %prog upload [--gsutil path/to/gsutil] ' + '<first-dump> <destination-gs-path>') + self._parser.add_option('--gsutil', default='gsutil', + help='path to GSUTIL', metavar='GSUTIL') + + def do(self, sys_argv): + options, args = self._parse_args(sys_argv, 2) + dump_path = args[1] + gs_path = args[2] + + dump_files = SubCommand._find_all_dumps(dump_path) + bucket_files = SubCommand._find_all_buckets(dump_path) + prefix = SubCommand._find_prefix(dump_path) + symbol_data_sources = SymbolDataSources(prefix) + symbol_data_sources.prepare() + symbol_path = symbol_data_sources.path() + + handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof') + os.close(handle_zip) + + try: + file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED) + for filename in dump_files: + file_zip.write(filename, os.path.basename(os.path.abspath(filename))) + for filename in bucket_files: + file_zip.write(filename, os.path.basename(os.path.abspath(filename))) + + symbol_basename = os.path.basename(os.path.abspath(symbol_path)) + for filename in os.listdir(symbol_path): + if not filename.startswith('.'): + file_zip.write(os.path.join(symbol_path, filename), + os.path.join(symbol_basename, os.path.basename( + os.path.abspath(filename)))) + file_zip.close() + + returncode = UploadCommand._run_gsutil( + options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path) + finally: + os.remove(filename_zip) + + return returncode + + @staticmethod + def _run_gsutil(gsutil, *args): + """Run gsutil as a subprocess. + + Args: + *args: Arguments to pass to gsutil. The first argument should be an + operation such as ls, cp or cat. + Returns: + The return code from the process. + """ + command = [gsutil] + list(args) + LOGGER.info("Running: %s", command) + + try: + return subprocess.call(command) + except OSError, e: + LOGGER.error('Error to run gsutil: %s', e) diff --git a/tools/deep_memory_profiler/tests/dmprof_test.py b/tools/deep_memory_profiler/tests/dmprof_test.py index 8a216fc..68d1925 100755 --- a/tools/deep_memory_profiler/tests/dmprof_test.py +++ b/tools/deep_memory_profiler/tests/dmprof_test.py @@ -10,20 +10,16 @@ import sys import textwrap import unittest -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, ROOT_DIR) +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(BASE_PATH) -try: - from collections import OrderedDict # pylint: disable=E0611 -except ImportError: - SIMPLEJSON_PATH = os.path.join(ROOT_DIR, os.pardir, os.pardir, 'third_party') - sys.path.insert(0, SIMPLEJSON_PATH) - from simplejson import OrderedDict +from lib.bucket import Bucket +from lib.ordered_dict import OrderedDict +from lib.policy import Policy +from lib.symbol import SymbolMappingCache +from lib.symbol import FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS, TYPEINFO_SYMBOLS -import dmprof -from find_runtime_symbols import FUNCTION_SYMBOLS -from find_runtime_symbols import SOURCEFILE_SYMBOLS -from find_runtime_symbols import TYPEINFO_SYMBOLS +import subcommands class SymbolMappingCacheTest(unittest.TestCase): @@ -76,7 +72,7 @@ class SymbolMappingCacheTest(unittest.TestCase): } def test_update(self): - symbol_mapping_cache = dmprof.SymbolMappingCache() + symbol_mapping_cache = SymbolMappingCache() cache_f = cStringIO.StringIO() cache_f.write(self._TEST_FUNCTION_CACHE) @@ -164,23 +160,23 @@ class PolicyTest(unittest.TestCase): """) def test_load(self): - policy = dmprof.Policy.parse(cStringIO.StringIO(self._TEST_POLICY), 'json') + policy = Policy.parse(cStringIO.StringIO(self._TEST_POLICY), 'json') self.assertTrue(policy) self.assertEqual('POLICY_DEEP_3', policy.version) def test_find(self): - policy = dmprof.Policy.parse(cStringIO.StringIO(self._TEST_POLICY), 'json') + policy = Policy.parse(cStringIO.StringIO(self._TEST_POLICY), 'json') self.assertTrue(policy) symbol_mapping_cache = self.MockSymbolMappingCache() symbol_mapping_cache.add(FUNCTION_SYMBOLS, 0x1212, 'v8::create') symbol_mapping_cache.add(FUNCTION_SYMBOLS, 0x1381, 'WebKit::create') - bucket1 = dmprof.Bucket([0x1212, 0x013], 'malloc', 0x29492, '_Z') + bucket1 = Bucket([0x1212, 0x013], 'malloc', 0x29492, '_Z') bucket1.symbolize(symbol_mapping_cache) - bucket2 = dmprof.Bucket([0x18242, 0x1381], 'malloc', 0x9492, '_Z') + bucket2 = Bucket([0x18242, 0x1381], 'malloc', 0x9492, '_Z') bucket2.symbolize(symbol_mapping_cache) - bucket3 = dmprof.Bucket([0x18242, 0x181], 'malloc', 0x949, '_Z') + bucket3 = Bucket([0x18242, 0x181], 'malloc', 0x949, '_Z') bucket3.symbolize(symbol_mapping_cache) self.assertEqual('malloc-v8', policy.find_malloc(bucket1)) @@ -190,27 +186,29 @@ class PolicyTest(unittest.TestCase): class BucketsCommandTest(unittest.TestCase): def test(self): - with open(os.path.join(ROOT_DIR, 'tests', 'output', 'buckets')) as output_f: + BUCKETS_PATH = os.path.join(BASE_PATH, 'tests', 'output', 'buckets') + with open(BUCKETS_PATH) as output_f: expected = output_f.read() out = cStringIO.StringIO() - command = dmprof.BucketsCommand() - returncode = command.do([ - 'buckets', - os.path.join(ROOT_DIR, 'tests', 'data', 'heap.01234.0001.heap')], out) + HEAP_PATH = os.path.join(BASE_PATH, 'tests', 'data', 'heap.01234.0001.heap') + subcommand = subcommands.BucketsCommand() + returncode = subcommand.do(['buckets', HEAP_PATH], out) self.assertEqual(0, returncode) self.assertEqual(expected, out.getvalue()) class UploadCommandTest(unittest.TestCase): def test(self): - command = dmprof.UploadCommand() - returncode = command.do([ + MOCK_GSUTIL_PATH = os.path.join(BASE_PATH, 'tests', 'mock_gsutil.py') + HEAP_PATH = os.path.join(BASE_PATH, 'tests', 'data', 'heap.01234.0001.heap') + subcommand = subcommands.UploadCommand() + returncode = subcommand.do([ 'upload', '--gsutil', - os.path.join(ROOT_DIR, 'tests', 'mock_gsutil.py'), - os.path.join(ROOT_DIR, 'tests', 'data', 'heap.01234.0001.heap'), + MOCK_GSUTIL_PATH, + HEAP_PATH, 'gs://test-storage/']) self.assertEqual(0, returncode) diff --git a/tools/deep_memory_profiler/tests/range_dict_tests.py b/tools/deep_memory_profiler/tests/range_dict_tests.py index 4c7e50b..3bc2c13 100755 --- a/tools/deep_memory_profiler/tests/range_dict_tests.py +++ b/tools/deep_memory_profiler/tests/range_dict_tests.py @@ -8,14 +8,14 @@ import os import sys import unittest -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, ROOT_DIR) +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(BASE_PATH) -import range_dict +from lib.range_dict import ExclusiveRangeDict class ExclusiveRangeDictTest(unittest.TestCase): - class TestAttribute(range_dict.ExclusiveRangeDict.RangeAttribute): + class TestAttribute(ExclusiveRangeDict.RangeAttribute): def __init__(self): super(ExclusiveRangeDictTest.TestAttribute, self).__init__() self._value = 0 @@ -38,7 +38,7 @@ class ExclusiveRangeDictTest(unittest.TestCase): return new_attr def test_init(self): - ranges = range_dict.ExclusiveRangeDict(self.TestAttribute) + ranges = ExclusiveRangeDict(self.TestAttribute) result = [] for begin, end, attr in ranges.iter_range(20, 40): @@ -49,7 +49,7 @@ class ExclusiveRangeDictTest(unittest.TestCase): self.assertEqual(expected, result) def test_norange(self): - ranges = range_dict.ExclusiveRangeDict(self.TestAttribute) + ranges = ExclusiveRangeDict(self.TestAttribute) result = [] for begin, end, attr in ranges.iter_range(20, 20): @@ -58,7 +58,7 @@ class ExclusiveRangeDictTest(unittest.TestCase): self.assertEqual(expected, result) def test_set(self): - ranges = range_dict.ExclusiveRangeDict(self.TestAttribute) + ranges = ExclusiveRangeDict(self.TestAttribute) for begin, end, attr in ranges.iter_range(20, 30): attr.set(12) for begin, end, attr in ranges.iter_range(30, 40): @@ -74,7 +74,7 @@ class ExclusiveRangeDictTest(unittest.TestCase): self.assertEqual(expected, result) def test_split(self): - ranges = range_dict.ExclusiveRangeDict(self.TestAttribute) + ranges = ExclusiveRangeDict(self.TestAttribute) for begin, end, attr in ranges.iter_range(20, 30): attr.set(1000) for begin, end, attr in ranges.iter_range(30, 40): @@ -105,7 +105,7 @@ class ExclusiveRangeDictTest(unittest.TestCase): self.assertEqual(expected2, result2) def test_fill(self): - ranges = range_dict.ExclusiveRangeDict(self.TestAttribute) + ranges = ExclusiveRangeDict(self.TestAttribute) for begin, end, attr in ranges.iter_range(30, 35): attr.set(12345) for begin, end, attr in ranges.iter_range(40, 45): |