Change dmprof commandline format, and clean up start-up routines.

BUG=123749 TEST=check dmprof shows the same result. Review URL: https://chromiumcodereview.appspot.com/10802049 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@148138 0039d316-1c4b-4281-b951-d872f2087c98
author: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-24 18:04:24 +0000
committer: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-24 18:04:24 +0000
commit: 559c26ae75b6e3facb10ff9dbdcd075a21c07246 (patch)
tree: a6467e17887c989ee8e739ae164ad72a65d8c218 /tools/deep_memory_profiler
parent: e1751bef42578a975639fb9066c3458aa2a6f3cd (diff)
download: chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.zip
chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.tar.gz
chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.tar.bz2
2 files changed, 514 insertions, 244 deletions
diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof
index 8a6f93b..1b2729f 100755
--- a/tools/deep_memory_profiler/dmprof
+++ b/tools/deep_memory_profiler/dmprof
@@ -7,6 +7,7 @@
 
 from datetime import datetime
 import json
+import optparse
 import os
 import re
 import shutil
@@ -30,6 +31,10 @@ ALLOC_COUNT = 2
 FREE_COUNT = 3
 NULL_REGEX = re.compile('')
 
+POLICIES_JSON_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    'policies.json')
+
 # Heap Profile Dump versions
 
 # DUMP_DEEP_1 is OBSOLETE.
@@ -72,11 +77,6 @@ POLICY_DEEP_1 = 'POLICY_DEEP_1'
 # mmap regions are distincted w/ the allocation_type column.
 POLICY_DEEP_2 = 'POLICY_DEEP_2'
 
-# TODO(dmikurube): Avoid global variables.
-address_symbol_dict = {}
-appeared_addresses = set()
-components = []
-
 
 class EmptyDumpException(Exception):
   def __init__(self, value):
@@ -106,7 +106,8 @@ class ObsoleteDumpVersionException(ParsingException):
     return "obsolete heap profile dump version: %s" % repr(self.value)
 
 
-class Policy(object):
+class Rule(object):
+  """Represents one matching rule in a policy file."""
 
   def __init__(self, name, mmap, pattern):
     self.name = name
@@ -114,60 +115,74 @@ class Policy(object):
     self.condition = re.compile(pattern + r'\Z')
 
 
-def get_component(policy_list, bucket):
+class Policy(object):
+  """Represents a policy, a content of a policy file."""
+
+  def __init__(self, rules, version, components):
+    self.rules = rules
+    self.version = version
+    self.components = components
+
+  def append_rule(self, rule):
+    self.rules.append(rule)
+
+
+def get_component(rule_list, bucket, symbols):
   """Returns a component name which a given bucket belongs to.
 
   Args:
-      policy_list: A list containing Policy objects.  (Parsed policy data by
-          parse_policy.)
+      rule_list: A list of Rule objects.
       bucket: A Bucket object to be searched for.
+      symbols: A dict mapping runtime addresses to symbol names.
 
   Returns:
       A string representing a component name.
   """
   if not bucket:
     return 'no-bucket'
-  if bucket.component:
-    return bucket.component
+  if bucket.component_cache:
+    return bucket.component_cache
 
-  stacktrace = ''.join(
-      address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()
+  stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()
 
-  for policy in policy_list:
-    if bucket.mmap == policy.mmap and policy.condition.match(stacktrace):
-      bucket.component = policy.name
-      return policy.name
+  for rule in rule_list:
+    if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):
+      bucket.component_cache = rule.name
+      return rule.name
 
   assert False
 
 
 class Bucket(object):
+  """Represents a bucket, which is a unit of memory classification."""
 
   def __init__(self, stacktrace, mmap):
     self.stacktrace = stacktrace
     self.mmap = mmap
-    self.component = ''
+    self.component_cache = ''
+
+  def clear_component_cache(self):
+    self.component_cache = ''
 
 
-class Log(object):
+class Dump(object):
+  """Represents one heap profile dump."""
 
-  """A class representing one dumped log data."""
-  def __init__(self, log_path):
-    self.log_path = log_path
-    self.log_lines = [
-        l for l in open(self.log_path, 'r') if l and not l.startswith('#')]
-    self.log_version = ''
-    sys.stderr.write('Loading a dump: %s\n' % log_path)
+  def __init__(self, dump_path):
+    self.dump_path = dump_path
+    self.dump_lines = [
+        l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]
+    self.dump_version = ''
     self.stacktrace_lines = []
     self.counters = {}
-    self.log_time = os.stat(self.log_path).st_mtime
+    self.dump_time = os.stat(self.dump_path).st_mtime
 
-  def dump_stacktrace(buckets):
+  def print_stacktrace(self, buckets, symbols):
     """Prints a given stacktrace.
 
     Args:
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        symbols: A dict mapping runtime addresses to symbol names.
     """
     for line in self.stacktrace_lines:
       words = line.split()
@@ -177,21 +192,20 @@ class Log(object):
       for i in range(0, BUCKET_ID - 1):
         sys.stdout.write(words[i] + ' ')
       for address in bucket.stacktrace:
-        sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')
+        sys.stdout.write((symbols.get(address) or address) + ' ')
       sys.stdout.write('\n')
 
   @staticmethod
-  def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,
-                                component_name):
+  def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,
+                                component_name, symbols):
     """Accumulates size of committed chunks and the number of allocated chunks.
 
     Args:
         stacktrace_lines: A list of strings which are valid as stacktraces.
-        policy_list: A list containing Policy objects.  (Parsed policy data by
-            parse_policy.)
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
+        rule_list: A list of Rule objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
         component_name: A name of component for filtering.
+        symbols: A dict mapping runtime addresses to symbol names.
 
     Returns:
         Two integers which are the accumulated size of committed regions and the
@@ -204,7 +218,7 @@ class Log(object):
       bucket = buckets.get(int(words[BUCKET_ID]))
       if (not bucket or
           (component_name and
-           component_name != get_component(policy_list, bucket))):
+           component_name != get_component(rule_list, bucket, symbols))):
         continue
 
       com_committed += int(words[COMMITTED])
@@ -213,24 +227,23 @@ class Log(object):
     return com_committed, com_allocs
 
   @staticmethod
-  def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,
-                                      buckets, component_name):
+  def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,
+                                      buckets, component_name, symbols):
     """Prints information of stacktrace lines for pprof.
 
     Args:
         stacktrace_lines: A list of strings which are valid as stacktraces.
-        policy_list: A list containing Policy objects.  (Parsed policy data by
-            parse_policy.)
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
+        rule_list: A list of Rule objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
         component_name: A name of component for filtering.
+        symbols: A dict mapping runtime addresses to symbol names.
     """
     for line in stacktrace_lines:
       words = line.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
       if (not bucket or
           (component_name and
-           component_name != get_component(policy_list, bucket))):
+           component_name != get_component(rule_list, bucket, symbols))):
         continue
 
       sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
@@ -242,39 +255,39 @@ class Log(object):
         sys.stdout.write(' ' + address)
       sys.stdout.write('\n')
 
-  def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):
-    """Converts the log file so it can be processed by pprof.
+  def print_for_pprof(
+      self, rule_list, buckets, maps_lines, component_name, symbols):
+    """Converts the heap profile dump so it can be processed by pprof.
 
     Args:
-        policy_list: A list containing Policy objects.  (Parsed policy data by
-            parse_policy.)
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
-        mapping_lines: A list of strings containing /proc/.../maps.
+        rule_list: A list of Rule objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        maps_lines: A list of strings containing /proc/.../maps.
         component_name: A name of component for filtering.
+        symbols: A dict mapping runtime addresses to symbol names.
     """
     sys.stdout.write('heap profile: ')
     com_committed, com_allocs = self.accumulate_size_for_pprof(
-        self.stacktrace_lines, policy_list, buckets, component_name)
+        self.stacktrace_lines, rule_list, buckets, component_name, symbols)
 
     sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
         com_allocs, com_committed, com_allocs, com_committed))
 
-    self.dump_stacktrace_lines_for_pprof(
-        self.stacktrace_lines, policy_list, buckets, component_name)
+    self.print_stacktrace_lines_for_pprof(
+        self.stacktrace_lines, rule_list, buckets, component_name, symbols)
 
     sys.stdout.write('MAPPED_LIBRARIES:\n')
-    for line in mapping_lines:
+    for line in maps_lines:
       sys.stdout.write(line)
 
   @staticmethod
-  def check_stacktrace_line(stacktrace_line, buckets):
+  def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):
     """Checks if a given stacktrace_line is valid as stacktrace.
 
     Args:
         stacktrace_line: A string to be checked.
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        appeared_addresses: A list where appeared addresses will be stored.
 
     Returns:
         True if the given stacktrace_line is valid.
@@ -305,61 +318,59 @@ class Log(object):
         return line_number, False
     return line_number, True
 
-  def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):
+  def parse_stacktraces_while_valid(
+      self, buckets, dump_lines, line_number, appeared_addresses):
     """Parses stacktrace lines while the lines are valid.
 
     Args:
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
-        log_lines: A list of lines to be parsed.
-        line_number: An integer representing the starting line number in
-            log_lines.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        dump_lines: A list of lines to be parsed.
+        line_number: A line number to start parsing in dump_lines.
+        appeared_addresses: A list where appeared addresses will be stored.
 
     Returns:
         A pair of a list of valid lines and an integer representing the last
-        line number in log_lines.
+        line number in dump_lines.
     """
     (line_number, _) = self.skip_lines_while(
-        line_number, len(log_lines),
-        lambda n: not log_lines[n].split()[0].isdigit())
+        line_number, len(dump_lines),
+        lambda n: not dump_lines[n].split()[0].isdigit())
     stacktrace_lines_start = line_number
     (line_number, _) = self.skip_lines_while(
-        line_number, len(log_lines),
-        lambda n: self.check_stacktrace_line(log_lines[n], buckets))
-    return (log_lines[stacktrace_lines_start:line_number], line_number)
+        line_number, len(dump_lines),
+        lambda n: self.check_stacktrace_line(
+            dump_lines[n], buckets, appeared_addresses))
+    return (dump_lines[stacktrace_lines_start:line_number], line_number)
 
-  def parse_stacktraces(self, buckets, line_number):
-    """Parses lines in self.log_lines as stacktrace.
+  def parse_stacktraces(self, buckets, line_number, appeared_addresses):
+    """Parses lines in self.dump_lines as stacktrace.
 
     Valid stacktrace lines are stored into self.stacktrace_lines.
 
     Args:
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
-        line_number: An integer representing the starting line number in
-            log_lines.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        line_number: A line number to start parsing in dump_lines.
+        appeared_addresses: A list where appeared addresses will be stored.
 
     Raises:
         ParsingException for invalid dump versions.
     """
-    sys.stderr.write('  Version: %s\n' % self.log_version)
-
-    if self.log_version == DUMP_DEEP_5:
+    if self.dump_version == DUMP_DEEP_5:
       (self.stacktrace_lines, line_number) = (
           self.parse_stacktraces_while_valid(
-              buckets, self.log_lines, line_number))
+              buckets, self.dump_lines, line_number, appeared_addresses))
 
-    elif self.log_version in DUMP_DEEP_OBSOLETE:
-      raise ObsoleteDumpVersionException(self.log_version)
+    elif self.dump_version in DUMP_DEEP_OBSOLETE:
+      raise ObsoleteDumpVersionException(self.dump_version)
 
     else:
-      raise InvalidDumpException('Invalid version: %s' % self.log_version)
+      raise InvalidDumpException('Invalid version: %s' % self.dump_version)
 
   def parse_global_stats(self):
-    """Parses lines in self.log_lines as global stats."""
+    """Parses lines in self.dump_lines as global stats."""
     (ln, _) = self.skip_lines_while(
-        0, len(self.log_lines),
-        lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')
+        0, len(self.dump_lines),
+        lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')
 
     global_stat_names = [
         'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
@@ -370,14 +381,14 @@ class Log(object):
 
     for prefix in global_stat_names:
       (ln, _) = self.skip_lines_while(
-          ln, len(self.log_lines),
-          lambda n: self.log_lines[n].split()[0] != prefix)
-      words = self.log_lines[ln].split()
+          ln, len(self.dump_lines),
+          lambda n: self.dump_lines[n].split()[0] != prefix)
+      words = self.dump_lines[ln].split()
       self.counters[prefix + '_virtual'] = int(words[-2])
       self.counters[prefix + '_committed'] = int(words[-1])
 
   def parse_version(self):
-    """Parses a version string in self.log_lines.
+    """Parses a version string in self.dump_lines.
 
     Returns:
         A pair of (a string representing a version of the stacktrace dump,
@@ -390,44 +401,44 @@ class Log(object):
 
     # Skip until an identifiable line.
     headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
-    if not self.log_lines:
+    if not self.dump_lines:
       raise EmptyDumpException('Empty heap dump file.')
     (ln, found) = self.skip_lines_while(
-        0, len(self.log_lines),
-        lambda n: not self.log_lines[n].startswith(headers))
+        0, len(self.dump_lines),
+        lambda n: not self.dump_lines[n].startswith(headers))
     if not found:
       raise InvalidDumpException('No version header.')
 
     # Identify a version.
-    if self.log_lines[ln].startswith('heap profile: '):
-      version = self.log_lines[ln][13:].strip()
+    if self.dump_lines[ln].startswith('heap profile: '):
+      version = self.dump_lines[ln][13:].strip()
       if version == DUMP_DEEP_5:
         (ln, _) = self.skip_lines_while(
-            ln, len(self.log_lines),
-            lambda n: self.log_lines[n] != 'STACKTRACES:\n')
+            ln, len(self.dump_lines),
+            lambda n: self.dump_lines[n] != 'STACKTRACES:\n')
       elif version in DUMP_DEEP_OBSOLETE:
         raise ObsoleteDumpVersionException(version)
       else:
         raise InvalidDumpException('Invalid version: %s' % version)
-    elif self.log_lines[ln] == 'STACKTRACES:\n':
+    elif self.dump_lines[ln] == 'STACKTRACES:\n':
       raise ObsoleteDumpVersionException(DUMP_DEEP_1)
-    elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':
+    elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':
       raise ObsoleteDumpVersionException(DUMP_DEEP_2)
 
     return (version, ln)
 
-  def parse_log(self, buckets):
-    self.log_version, ln = self.parse_version()
+  def parse_dump(self, buckets, appeared_addresses):
+    self.dump_version, ln = self.parse_version()
     self.parse_global_stats()
-    self.parse_stacktraces(buckets, ln)
+    self.parse_stacktraces(buckets, ln, appeared_addresses)
 
   @staticmethod
   def accumulate_size_for_policy(stacktrace_lines,
-                                 policy_list, buckets, sizes):
+                                 rule_list, buckets, sizes, symbols):
     for line in stacktrace_lines:
       words = line.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
-      component_match = get_component(policy_list, bucket)
+      component_match = get_component(rule_list, bucket, symbols)
       sizes[component_match] += int(words[COMMITTED])
 
       if component_match.startswith('tc-'):
@@ -437,29 +448,30 @@ class Log(object):
       else:
         sizes['other-total-log'] += int(words[COMMITTED])
 
-  def apply_policy(self, policy_list, buckets, first_log_time):
+  def apply_policy(
+      self, rule_list, buckets, first_dump_time, components, symbols):
     """Aggregates the total memory size of each component.
 
     Iterate through all stacktraces and attribute them to one of the components
     based on the policy.  It is important to apply policy in right order.
 
     Args:
-        policy_list: A list containing Policy objects.  (Parsed policy data by
-            parse_policy.)
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
-        first_log_time: An integer representing time when the first log is
+        rule_list: A list of Rule objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
+        first_dump_time: An integer representing time when the first dump is
             dumped.
+        components: A list of strings of component names.
+        symbols: A dict mapping runtime addresses to symbol names.
 
     Returns:
         A dict mapping components and their corresponding sizes.
     """
 
-    sys.stderr.write('apply policy:%s\n' % (self.log_path))
+    sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)
     sizes = dict((c, 0) for c in components)
 
     self.accumulate_size_for_policy(self.stacktrace_lines,
-                                    policy_list, buckets, sizes)
+                                    rule_list, buckets, sizes, symbols)
 
     mmap_prefix = 'profiled-mmap'
     malloc_prefix = 'profiled-malloc'
@@ -513,46 +525,45 @@ class Log(object):
       sizes['total-exclude-profiler'] = (
           self.counters['total_committed'] - sizes['mmap-profiler'])
     if 'hour' in sizes:
-      sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0
+      sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0
     if 'minute' in sizes:
-      sizes['minute'] = (self.log_time - first_log_time) / 60.0
+      sizes['minute'] = (self.dump_time - first_dump_time) / 60.0
     if 'second' in sizes:
-      sizes['second'] = self.log_time - first_log_time
+      sizes['second'] = self.dump_time - first_dump_time
 
     return sizes
 
   @staticmethod
-  def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,
-                                 component_name, depth, sizes):
+  def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,
+                                 component_name, depth, sizes, symbols):
     for line in stacktrace_lines:
       words = line.split()
       bucket = buckets.get(int(words[BUCKET_ID]))
-      component_match = get_component(policy_list, bucket)
+      component_match = get_component(rule_list, bucket, symbols)
       if component_match == component_name:
         stacktrace_sequence = ''
         for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
                                                  1 + depth)]:
-          stacktrace_sequence += address_symbol_dict[address] + ' '
+          stacktrace_sequence += symbols[address] + ' '
         if not stacktrace_sequence in sizes:
           sizes[stacktrace_sequence] = 0
         sizes[stacktrace_sequence] += int(words[COMMITTED])
 
-  def expand(self, policy_list, buckets, component_name, depth):
+  def expand(self, rule_list, buckets, component_name, depth, symbols):
     """Prints all stacktraces in a given component of given depth.
 
     Args:
-        policy_list: A list containing Policy objects.  (Parsed policy data by
-            parse_policy.)
-        buckets: A dict mapping bucket ids and their corresponding Bucket
-            objects.
+        rule_list: A list of Rule objects.
+        buckets: A dict mapping bucket ids to Bucket objects.
         component_name: A name of component for filtering.
         depth: An integer representing depth to be printed.
+        symbols: A dict mapping runtime addresses to symbol names.
     """
     sizes = {}
 
     self.accumulate_size_for_expand(
-        self.stacktrace_lines, policy_list, buckets, component_name,
-        depth, sizes)
+        self.stacktrace_lines, rule_list, buckets, component_name,
+        depth, sizes, symbols)
 
     sorted_sizes_list = sorted(
         sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
@@ -563,7 +574,8 @@ class Log(object):
     sys.stderr.write('total: %d\n' % (total))
 
 
-def update_symbols(symbol_path, mapping_lines, maps_path):
+def update_symbols(
+    symbol_path, maps_path, appeared_addresses, symbols):
   """Updates address/symbol mapping on memory and in a .symbol cache file.
 
   It reads cached address/symbol mapping from a .symbol file if it exists.
@@ -578,30 +590,43 @@ def update_symbols(symbol_path, mapping_lines, maps_path):
 
   Args:
       symbol_path: A string representing a path for a .symbol file.
-      mapping_lines: A list of strings containing /proc/.../maps.
       maps_path: A string of the path of /proc/.../maps.
+      appeared_addresses: A list of known addresses.
+      symbols: A dict mapping runtime addresses to symbol names.
   """
   with open(symbol_path, mode='a+') as symbol_f:
     symbol_lines = symbol_f.readlines()
     if symbol_lines:
       for line in symbol_lines:
         items = line.split(None, 1)
-        address_symbol_dict[items[0]] = items[1].rstrip()
+        if len(items) == 1:
+          items.append('??')
+        symbols[items[0]] = items[1].rstrip()
+    if symbols:
+      sys.stderr.write('  Found %d symbols in cache.\n' % len(symbols))
+    else:
+      sys.stderr.write('  No symbols found in cache.\n')
 
     unresolved_addresses = sorted(
-        a for a in appeared_addresses if a not in address_symbol_dict)
+        a for a in appeared_addresses if a not in symbols)
 
-    if unresolved_addresses:
+    if not unresolved_addresses:
+      sys.stderr.write('  No need to resolve any more addresses.\n')
+    else:
+      sys.stderr.write('  %d addresses are unresolved.\n' %
+                       len(unresolved_addresses))
       prepared_data_dir = tempfile.mkdtemp()
       try:
         prepare_symbol_info(maps_path, prepared_data_dir)
 
-        symbols = find_runtime_symbols_list(
+        symbol_list = find_runtime_symbols_list(
             prepared_data_dir, unresolved_addresses)
 
-        for address, symbol in zip(unresolved_addresses, symbols):
+        for address, symbol in zip(unresolved_addresses, symbol_list):
+          if not symbol:
+            symbol = '??'
           stripped_symbol = symbol.strip()
-          address_symbol_dict[address] = stripped_symbol
+          symbols[address] = stripped_symbol
           symbol_f.write('%s %s\n' % (address, stripped_symbol))
       finally:
         shutil.rmtree(prepared_data_dir)
@@ -627,10 +652,10 @@ def parse_policy(policy_path):
   if policy_lines[0].startswith('heap profile policy: '):
     policy_version = policy_lines[0][21:].strip()
     policy_lines.pop(0)
-  policy_list = []
+  rule_list = []
+  components = []
 
   if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
-    sys.stderr.write('  heap profile policy version: %s\n' % policy_version)
     for line in policy_lines:
       if line[0] == '#':
         continue
@@ -646,7 +671,7 @@ def parse_policy(policy_path):
         mmap = False
 
       if pattern != 'default':
-        policy_list.append(Policy(name, mmap, pattern))
+        rule_list.append(Rule(name, mmap, pattern))
       if components.count(name) == 0:
         components.append(name)
 
@@ -654,57 +679,16 @@ def parse_policy(policy_path):
     sys.stderr.write('  invalid heap profile policy version: %s\n' % (
         policy_version))
 
-  return policy_list
+  return rule_list, policy_version, components
 
 
-def main():
-  if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',
-                                                  '--json',
-                                                  '--expand',
-                                                  '--list',
-                                                  '--stacktrace',
-                                                  '--pprof'])):
-    sys.stderr.write("""Usage:
-%s [options] <chrome-binary> <policy> <profile> [component-name] [depth]
-
-Options:
-  --csv                         Output result in csv format
-  --json                        Output result in json format
-  --stacktrace                  Convert raw address to symbol names
-  --list                        Lists components and their sizes
-  --expand                      Show all stacktraces in the specified component
-                                of given depth with their sizes
-  --pprof                       Format the profile file so it can be processed
-                                by pprof
-
-Examples:
-  dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
-  dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
-  dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
-  dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
-  dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
-""" % (sys.argv[0]))
-    sys.exit(1)
-
-  action = sys.argv[1]
-  chrome_path = sys.argv[2]
-  policy_path = sys.argv[3]
-  log_path = sys.argv[4]
-
-  sys.stderr.write('parsing a policy file\n')
-  policy_list = parse_policy(policy_path)
-
-  p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
-  prefix = p.sub('', log_path)
-  symbol_path = prefix + '.symbols'
+def find_prefix(path):
+  return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
 
-  sys.stderr.write('parsing the maps file\n')
-  maps_path = prefix + '.maps'
-  with open(maps_path, 'r') as maps_f:
-    maps_lines = maps_f.readlines()
 
+def load_buckets(prefix):
   # Reading buckets
-  sys.stderr.write('parsing the bucket file\n')
+  sys.stderr.write('Loading bucket files.\n')
   buckets = {}
   bucket_count = 0
   n = 0
@@ -715,80 +699,259 @@ Examples:
         break
       n += 1
       continue
-    sys.stderr.write('reading buckets from %s\n' % (buckets_path))
+    sys.stderr.write('  %s\n' % buckets_path)
     with open(buckets_path, 'r') as buckets_f:
       for line in buckets_f:
         words = line.split()
         buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')
     n += 1
 
-  log_path_list = [log_path]
+  return buckets
 
-  if action in ('--csv', '--json'):
-    # search for the sequence of files
-    n = int(log_path[len(log_path) - 9 : len(log_path) - 5])
-    n += 1  # skip current file
-    while True:
-      p = '%s.%04d.heap' % (prefix, n)
-      if os.path.exists(p):
-        log_path_list.append(p)
-      else:
-        break
-      n += 1
 
-  logs = []
-  for path in log_path_list:
-    new_log = Log(path)
-    sys.stderr.write('Parsing a dump: %s\n' % path)
-    try:
-      new_log.parse_log(buckets)
-    except EmptyDumpException:
-      sys.stderr.write('  WARNING: ignored an empty dump: %s\n' % path)
-    except ParsingException, e:
-      sys.stderr.write('  Error in parsing heap profile dump: %s\n' % e)
-      sys.exit(1)
+def determine_dump_path_list(dump_path, prefix):
+  dump_path_list = [dump_path]
+
+  # search for the sequence of files
+  n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
+  n += 1  # skip current file
+  while True:
+    p = '%s.%04d.heap' % (prefix, n)
+    if os.path.exists(p):
+      dump_path_list.append(p)
     else:
-      logs.append(new_log)
+      break
+    n += 1
+
+  return dump_path_list
+
+
+def load_single_dump(dump_path, buckets, appeared_addresses):
+  new_dump = Dump(dump_path)
+  try:
+    new_dump.parse_dump(buckets, appeared_addresses)
+  except EmptyDumpException:
+    sys.stderr.write('... ignored an empty dump')
+  except ParsingException, e:
+    sys.stderr.write('... error in parsing: %s' % e)
+    sys.exit(1)
+  else:
+    sys.stderr.write(' (version: %s)' % new_dump.dump_version)
+
+  return new_dump
+
+
+def load_dump(dump_path, buckets):
+  sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)
+  appeared_addresses = set()
+  dump = load_single_dump(dump_path, buckets, appeared_addresses)
+  sys.stderr.write('.\n')
+  return dump, appeared_addresses
+
+
+def load_dumps(dump_path_list, buckets):
+  sys.stderr.write('Loading heap dump files.\n')
+  appeared_addresses = set()
+  dumps = []
+  for path in dump_path_list:
+    sys.stderr.write('  %s' % path)
+    dumps.append(load_single_dump(path, buckets, appeared_addresses))
+    sys.stderr.write('\n')
+  return dumps, appeared_addresses
+
+
+def load_and_update_symbol_cache(prefix, appeared_addresses):
+  maps_path = prefix + '.maps'
+  symbol_path = prefix + '.symbols'
+  sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)
+  symbols = {}
+  update_symbols(symbol_path, maps_path, appeared_addresses, symbols)
+  return symbols
+
+
+def load_default_policies():
+  with open(POLICIES_JSON_PATH, mode='r') as policies_f:
+    default_policies = json.load(policies_f)
+  return default_policies
+
+
+def load_policy(policies_dict, policy_label):
+  policy_file = policies_dict[policy_label]['file']
+  policy_path = os.path.join(os.path.dirname(__file__), policy_file)
+  rule_list, policy_version, components = parse_policy(policy_path)
+  sys.stderr.write('  %s: %s (version: %s)\n' %
+                   (policy_label, policy_path, policy_version))
+  return Policy(rule_list, policy_version, components)
+
+
+def load_policies_dict(policies_dict):
+  sys.stderr.write('Loading policy files.\n')
+  policies = {}
+  for policy_label in policies_dict:
+    policies[policy_label] = load_policy(policies_dict, policy_label)
+  return policies
+
+
+def load_policies(options_policy):
+  default_policies = load_default_policies()
+  if options_policy:
+    policy_labels = options_policy.split(',')
+    specified_policies = {}
+    for specified_policy in policy_labels:
+      if specified_policy in default_policies:
+        specified_policies[specified_policy] = (
+            default_policies[specified_policy])
+    policies = load_policies_dict(specified_policies)
+  else:
+    policies = load_policies_dict(default_policies)
+  return policies
+
+
+def do_stacktrace(sys_argv):
+  parser = optparse.OptionParser(usage='Usage: %prog stacktrace <dump>')
+  options, args = parser.parse_args(sys_argv)
+
+  if len(args) != 2:
+    parser.error('needs 1 argument.')
+    return 1
+
+  dump_path = args[1]
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dump, appeared_addresses = load_dump(dump_path, buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+
+  dump.print_stacktrace(buckets, symbols)
+
+  return 0
 
-  sys.stderr.write('getting symbols\n')
-  update_symbols(symbol_path, maps_lines, maps_path)
 
-  # TODO(dmikurube): Many modes now.  Split them into separete functions.
-  if action == '--stacktrace':
-    logs[0].dump_stacktrace(buckets)
+def do_csv(sys_argv):
+  parser = optparse.OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')
+  parser.add_option('-p', '--policy', type='string', dest='policy',
+                    help='profile with POLICY', metavar='POLICY')
+  options, args = parser.parse_args(sys_argv)
 
-  elif action == '--csv':
-    sys.stdout.write(','.join(components))
-    sys.stdout.write('\n')
+  if len(args) != 2:
+    parser.error('needs 1 argument.')
+    return 1
 
-    for log in logs:
-      component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
+  dump_path = args[1]
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dumps, appeared_addresses = load_dumps(
+      determine_dump_path_list(dump_path, prefix), buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+  policies = load_policies(options.policy)
+
+  max_components = 0
+  for policy in policies:
+    max_components = max(max_components, len(policies[policy].components))
+
+  for policy in sorted(policies):
+    rule_list = policies[policy].rules
+    components = policies[policy].components
+
+    if len(policies) > 1:
+      sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))
+    sys.stdout.write('%s%s\n' % (
+        ','.join(components), ',' * (max_components - len(components))))
+
+    for dump in dumps:
+      component_sizes = dump.apply_policy(
+          rule_list, buckets, dumps[0].dump_time, components, symbols)
       s = []
       for c in components:
         if c in ('hour', 'minute', 'second'):
           s.append('%05.5f' % (component_sizes[c]))
         else:
           s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
-      sys.stdout.write(','.join(s))
-      sys.stdout.write('\n')
+      sys.stdout.write('%s%s\n' % (
+            ','.join(s), ',' * (max_components - len(components))))
+
+    for bucket in buckets.itervalues():
+      bucket.clear_component_cache()
+
+  return 0
+
+
+def do_json(sys_argv):
+  parser = optparse.OptionParser('Usage: %prog json [-p POLICY] <first-dump>')
+  parser.add_option('-p', '--policy', type='string', dest='policy',
+                    help='profile with POLICY', metavar='POLICY')
+  options, args = parser.parse_args(sys_argv)
+
+  if len(args) != 2:
+    parser.error('needs 1 argument.')
+    return 1
+
+  dump_path = args[1]
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dumps, appeared_addresses = load_dumps(
+      determine_dump_path_list(dump_path, prefix), buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+  policies = load_policies(options.policy)
 
-  elif action == '--json':
-    json_base = {
-      'version': 'JSON_DEEP_1',
+  json_base = {
+    'version': 'JSON_DEEP_2',
+    'policies': {},
+  }
+
+  for policy in sorted(policies):
+    rule_list = policies[policy].rules
+    components = policies[policy].components
+
+    json_base['policies'][policy] = {
       'legends': components,
       'snapshots': [],
     }
-    for log in logs:
-      component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)
-      component_sizes['log_path'] = log.log_path
-      component_sizes['log_time'] = datetime.fromtimestamp(
-          log.log_time).strftime('%Y-%m-%d %H:%M:%S')
-      json_base['snapshots'].append(component_sizes)
-    json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
-
-  elif action == '--list':
-    component_sizes = logs[0].apply_policy(
-        policy_list, buckets, logs[0].log_time)
+
+    for dump in dumps:
+      component_sizes = dump.apply_policy(
+          rule_list, buckets, dumps[0].dump_time, components, symbols)
+      component_sizes['dump_path'] = dump.dump_path
+      component_sizes['dump_time'] = datetime.fromtimestamp(
+          dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')
+      json_base['policies'][policy]['snapshots'].append(component_sizes)
+
+    for bucket in buckets.itervalues():
+      bucket.clear_component_cache()
+
+  json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
+
+  return 0
+
+
+def do_list(sys_argv):
+  parser = optparse.OptionParser('Usage: %prog [-p POLICY] list <first-dump>')
+  parser.add_option('-p', '--policy', type='string', dest='policy',
+                    help='profile with POLICY', metavar='POLICY')
+  options, args = parser.parse_args(sys_argv)
+
+  if len(args) != 2:
+    parser.error('needs 1 argument.')
+    return 1
+
+  dump_path = args[1]
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dumps, appeared_addresses = load_dumps(
+      determine_dump_path_list(dump_path, prefix), buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+  policies = load_policies(options.policy)
+
+  for policy in sorted(policies):
+    rule_list = policies[policy].rules
+    components = policies[policy].components
+
+    component_sizes = dumps[0].apply_policy(
+        rule_list, buckets, dumps[0].dump_time, components, symbols)
+    sys.stdout.write('%s:\n' % policy)
     for c in components:
       if c in ['hour', 'minute', 'second']:
         sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
@@ -796,16 +959,112 @@ Examples:
         sys.stdout.write('%30s %10.3f\n' % (
             c, component_sizes[c] / 1024.0 / 1024.0))
 
-  elif action == '--expand':
-    component_name = sys.argv[5]
-    depth = sys.argv[6]
-    logs[0].expand(policy_list, buckets, component_name, int(depth))
+    for bucket in buckets.itervalues():
+      bucket.clear_component_cache()
 
-  elif action == '--pprof':
-    if len(sys.argv) > 5:
-      logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])
-    else:
-      logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)
+  return 0
+
+
+def do_expand(sys_argv):
+  parser = optparse.OptionParser(
+      'Usage: %prog expand <dump> <policy> <component> <depth>')
+  options, args = parser.parse_args(sys_argv)
+
+  if len(args) != 5:
+    parser.error('needs 4 arguments.')
+    return 1
+
+  dump_path = args[1]
+  target_policy = args[2]
+  component_name = args[3]
+  depth = args[4]
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dump, appeared_addresses = load_dump(dump_path, buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+  policies = load_policies(target_policy)
+
+  rule_list = policies[target_policy].rules
+
+  dump.expand(rule_list, buckets, component_name, int(depth), symbols)
+
+  return 0
+
+
+def do_pprof(sys_argv):
+  parser = optparse.OptionParser(
+      'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
+  parser.add_option('-c', '--component', type='string', dest='component',
+                    help='restrict to COMPONENT', metavar='COMPONENT')
+  options, args = parser.parse_args(sys_argv)
+
+  if len(args) != 3:
+    parser.error('needs 2 arguments.')
+    return 1
+
+  dump_path = args[1]
+  target_policy = args[2]
+  component = options.component
+
+  prefix = find_prefix(dump_path)
+  buckets = load_buckets(prefix)
+  dump, appeared_addresses = load_dump(dump_path, buckets)
+  symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
+  policies = load_policies(target_policy)
+
+  rule_list = policies[target_policy].rules
+
+  with open(prefix + '.maps', 'r') as maps_f:
+    maps_lines = maps_f.readlines()
+  dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)
+
+  return 0
+
+
+def main():
+  COMMANDS = {
+    'csv': do_csv,
+    'expand': do_expand,
+    'json': do_json,
+    'list': do_list,
+    'pprof': do_pprof,
+    'stacktrace': do_stacktrace,
+  }
+
+  # TODO(dmikurube): Remove this message after a while.
+  if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):
+    sys.stderr.write("""
+**************** NOTICE!! ****************
+   The command line format has changed.
+   Please look at the description below.
+******************************************
+
+""")
+
+  if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
+    sys.stderr.write("""Usage: %s <command> [options] [<args>]
+
+Commands:
+   csv          Classify memory usage in CSV
+   expand       Show all stacktraces contained in the specified component
+   json         Classify memory usage in JSON
+   list         Classify memory usage in simple listing format
+   pprof        Format the profile dump so that it can be processed by pprof
+   stacktrace   Convert runtime addresses to symbol names
+
+Quick Reference:
+   dmprof csv [-p POLICY] <first-dump>
+   dmprof expand <dump> <policy> <component> <depth>
+   dmprof json [-p POLICY] <first-dump>
+   dmprof list [-p POLICY] <first-dump>
+   dmprof pprof [-c COMPONENT] <dump> <policy>
+   dmprof stacktrace <dump>
+""" % (sys.argv[0]))
+    sys.exit(1)
+  action = sys.argv.pop(1)
+
+  return COMMANDS[action](sys.argv)
 
 
 if __name__ == '__main__':
diff --git a/tools/deep_memory_profiler/policies.json b/tools/deep_memory_profiler/policies.json
new file mode 100644
index 0000000..2d1b34e
--- /dev/null
+++ b/tools/deep_memory_profiler/policies.json
@@ -0,0 +1,11 @@
+{
+  "l0": {
+    "file": "policy.l0.txt"
+  },
+  "l1": {
+    "file": "policy.l1.txt"
+  },
+  "l2": {
+    "file": "policy.l2.txt"
+  }
+}
+\ No newline at end of file
author	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-24 18:04:24 +0000
committer	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-24 18:04:24 +0000
commit	559c26ae75b6e3facb10ff9dbdcd075a21c07246 (patch)
tree	a6467e17887c989ee8e739ae164ad72a65d8c218 /tools/deep_memory_profiler
parent	e1751bef42578a975639fb9066c3458aa2a6f3cd (diff)
download	chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.zip chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.tar.gz chromium_src-559c26ae75b6e3facb10ff9dbdcd075a21c07246.tar.bz2