Refactor dmprof: split lib.Dump into lib.Dump and lib.DeepDump.

It's the second step to accept Android's heap profiler discussed in http://crbug.com/382489. BUG=391212 NOTRY=True Review URL: https://codereview.chromium.org/371303002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@281979 0039d316-1c4b-4281-b951-d872f2087c98
author: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-07-09 06:26:32 +0000
committer: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-07-09 06:26:32 +0000
commit: e755f1a1cb3b4a193c95716a6ce5d0987dec2697 (patch)
tree: 171e6ad58777ac36bbb10d4b0feae23b4a873c6a /tools/deep_memory_profiler
parent: b3a97b552e3e46a0172c6b92645a2f8319f5397d (diff)
download: chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.zip
chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.gz
chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.bz2
2 files changed, 480 insertions, 400 deletions
diff --git a/tools/deep_memory_profiler/lib/deep_dump.py b/tools/deep_memory_profiler/lib/deep_dump.py
new file mode 100644
index 0000000..dc37ea0
--- /dev/null
+++ b/tools/deep_memory_profiler/lib/deep_dump.py
@@ -0,0 +1,465 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import copy
+import datetime
+import logging
+import os
+import re
+import time
+
+from lib.dump import Dump
+from lib.exceptions import EmptyDumpException, InvalidDumpException
+from lib.exceptions import ObsoleteDumpVersionException, ParsingException
+from lib.pageframe import PageFrame
+from lib.range_dict import ExclusiveRangeDict
+from lib.symbol import procfs
+
+
+LOGGER = logging.getLogger('dmprof')
+VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
+
+
+# Heap Profile Dump versions
+
+# DUMP_DEEP_[1-4] are obsolete.
+# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
+# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
+# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
+# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
+# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
+DUMP_DEEP_1 = 'DUMP_DEEP_1'
+DUMP_DEEP_2 = 'DUMP_DEEP_2'
+DUMP_DEEP_3 = 'DUMP_DEEP_3'
+DUMP_DEEP_4 = 'DUMP_DEEP_4'
+
+DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
+
+# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
+# malloc and mmap are identified in bucket files.
+# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
+DUMP_DEEP_5 = 'DUMP_DEEP_5'
+
+# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
+DUMP_DEEP_6 = 'DUMP_DEEP_6'
+
+
+class DeepDump(Dump):
+  """Represents a heap profile dump."""
+
+  _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
+
+  _HOOK_PATTERN = re.compile(
+      r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
+      r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
+
+  _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
+                               '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
+  _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
+                                 '(?P<RESERVED>[0-9]+)')
+
+  _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
+  _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
+
+  _TIME_PATTERN_FORMAT = re.compile(
+      r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
+  _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
+
+  def __init__(self, path, modified_time):
+    super(DeepDump, self).__init__()
+    self._path = path
+    matched = self._PATH_PATTERN.match(path)
+    self._pid = int(matched.group(2))
+    self._count = int(matched.group(3))
+    self._time = modified_time
+    self._map = {}
+    self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
+    self._stacktrace_lines = []
+    self._global_stats = {} # used only in apply_policy
+
+    self._run_id = ''
+    self._pagesize = 4096
+    self._pageframe_length = 0
+    self._pageframe_encoding = ''
+    self._has_pagecount = False
+
+    self._version = ''
+    self._lines = []
+
+  @property
+  def path(self):
+    return self._path
+
+  @property
+  def count(self):
+    return self._count
+
+  @property
+  def time(self):
+    return self._time
+
+  @property
+  def iter_map(self):
+    for region in sorted(self._map.iteritems()):
+      yield region[0], region[1]
+
+  @property
+  def iter_stacktrace(self):
+    for line in self._stacktrace_lines:
+      words = line.split()
+      yield (int(words[BUCKET_ID]),
+             int(words[VIRTUAL]),
+             int(words[COMMITTED]),
+             int(words[ALLOC_COUNT]),
+             int(words[FREE_COUNT]))
+
+  def global_stat(self, name):
+    return self._global_stats[name]
+
+  @property
+  def run_id(self):
+    return self._run_id
+
+  @property
+  def pagesize(self):
+    return self._pagesize
+
+  @property
+  def pageframe_length(self):
+    return self._pageframe_length
+
+  @property
+  def pageframe_encoding(self):
+    return self._pageframe_encoding
+
+  @property
+  def has_pagecount(self):
+    return self._has_pagecount
+
+  @staticmethod
+  def load(path, log_header='Loading a heap profile dump: '):
+    """Loads a heap profile dump.
+
+    Args:
+        path: A file path string to load.
+        log_header: A preceding string for log messages.
+
+    Returns:
+        A loaded Dump object.
+
+    Raises:
+        ParsingException for invalid heap profile dumps.
+    """
+    dump = Dump(path, os.stat(path).st_mtime)
+    with open(path, 'r') as f:
+      dump.load_file(f, log_header)
+    return dump
+
+  def load_file(self, f, log_header):
+    self._lines = [line for line in f
+                   if line and not line.startswith('#')]
+
+    try:
+      self._version, ln = self._parse_version()
+      self._parse_meta_information()
+      if self._version == DUMP_DEEP_6:
+        self._parse_mmap_list()
+      self._parse_global_stats()
+      self._extract_stacktrace_lines(ln)
+    except EmptyDumpException:
+      LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
+    except ParsingException, e:
+      LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
+      raise
+    else:
+      LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
+
+  def _parse_version(self):
+    """Parses a version string in self._lines.
+
+    Returns:
+        A pair of (a string representing a version of the stacktrace dump,
+        and an integer indicating a line number next to the version string).
+
+    Raises:
+        ParsingException for invalid dump versions.
+    """
+    version = ''
+
+    # Skip until an identifiable line.
+    headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
+    if not self._lines:
+      raise EmptyDumpException('Empty heap dump file.')
+    (ln, found) = skip_while(
+        0, len(self._lines),
+        lambda n: not self._lines[n].startswith(headers))
+    if not found:
+      raise InvalidDumpException('No version header.')
+
+    # Identify a version.
+    if self._lines[ln].startswith('heap profile: '):
+      version = self._lines[ln][13:].strip()
+      if version in (DUMP_DEEP_5, DUMP_DEEP_6):
+        (ln, _) = skip_while(
+            ln, len(self._lines),
+            lambda n: self._lines[n] != 'STACKTRACES:\n')
+      elif version in DUMP_DEEP_OBSOLETE:
+        raise ObsoleteDumpVersionException(version)
+      else:
+        raise InvalidDumpException('Invalid version: %s' % version)
+    elif self._lines[ln] == 'STACKTRACES:\n':
+      raise ObsoleteDumpVersionException(DUMP_DEEP_1)
+    elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
+      raise ObsoleteDumpVersionException(DUMP_DEEP_2)
+
+    return (version, ln)
+
+  def _parse_global_stats(self):
+    """Parses lines in self._lines as global stats."""
+    (ln, _) = skip_while(
+        0, len(self._lines),
+        lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
+
+    global_stat_names = [
+        'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
+        'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
+        'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
+        'nonprofiled-stack', 'nonprofiled-other',
+        'profiled-mmap', 'profiled-malloc']
+
+    for prefix in global_stat_names:
+      (ln, _) = skip_while(
+          ln, len(self._lines),
+          lambda n: self._lines[n].split()[0] != prefix)
+      words = self._lines[ln].split()
+      self._global_stats[prefix + '_virtual'] = int(words[-2])
+      self._global_stats[prefix + '_committed'] = int(words[-1])
+
+  def _parse_meta_information(self):
+    """Parses lines in self._lines for meta information."""
+    (ln, found) = skip_while(
+        0, len(self._lines),
+        lambda n: self._lines[n] != 'META:\n')
+    if not found:
+      return
+    ln += 1
+
+    while True:
+      if self._lines[ln].startswith('Time:'):
+        matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
+        matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
+        if matched_format:
+          self._time = time.mktime(datetime.datetime.strptime(
+              matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
+          if matched_format.group(2):
+            self._time += float(matched_format.group(2)[1:]) / 1000.0
+        elif matched_seconds:
+          self._time = float(matched_seconds.group(1))
+      elif self._lines[ln].startswith('Reason:'):
+        pass  # Nothing to do for 'Reason:'
+      elif self._lines[ln].startswith('PageSize: '):
+        self._pagesize = int(self._lines[ln][10:])
+      elif self._lines[ln].startswith('CommandLine:'):
+        pass
+      elif (self._lines[ln].startswith('PageFrame: ') or
+            self._lines[ln].startswith('PFN: ')):
+        if self._lines[ln].startswith('PageFrame: '):
+          words = self._lines[ln][11:].split(',')
+        else:
+          words = self._lines[ln][5:].split(',')
+        for word in words:
+          if word == '24':
+            self._pageframe_length = 24
+          elif word == 'Base64':
+            self._pageframe_encoding = 'base64'
+          elif word == 'PageCount':
+            self._has_pagecount = True
+      elif self._lines[ln].startswith('RunID: '):
+        self._run_id = self._lines[ln][7:].strip()
+      elif (self._lines[ln].startswith('MMAP_LIST:') or
+            self._lines[ln].startswith('GLOBAL_STATS:')):
+        # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
+        break
+      else:
+        pass
+      ln += 1
+
+  def _parse_mmap_list(self):
+    """Parses lines in self._lines as a mmap list."""
+    (ln, found) = skip_while(
+        0, len(self._lines),
+        lambda n: self._lines[n] != 'MMAP_LIST:\n')
+    if not found:
+      return {}
+
+    ln += 1
+    self._map = {}
+    current_vma = {}
+    pageframe_list = []
+    while True:
+      entry = procfs.ProcMaps.parse_line(self._lines[ln])
+      if entry:
+        current_vma = {}
+        for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
+          for key, value in entry.as_dict().iteritems():
+            attr[key] = value
+            current_vma[key] = value
+        ln += 1
+        continue
+
+      if self._lines[ln].startswith('  PF: '):
+        for pageframe in self._lines[ln][5:].split():
+          pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
+        ln += 1
+        continue
+
+      matched = self._HOOK_PATTERN.match(self._lines[ln])
+      if not matched:
+        break
+      # 2: starting address
+      # 5: end address
+      # 7: hooked or unhooked
+      # 8: additional information
+      if matched.group(7) == 'hooked':
+        submatched = self._HOOKED_PATTERN.match(matched.group(8))
+        if not submatched:
+          submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
+      elif matched.group(7) == 'unhooked':
+        submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
+        if not submatched:
+          submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
+      else:
+        assert matched.group(7) in ['hooked', 'unhooked']
+
+      submatched_dict = submatched.groupdict()
+      region_info = { 'vma': current_vma }
+      if submatched_dict.get('TYPE'):
+        region_info['type'] = submatched_dict['TYPE'].strip()
+      if submatched_dict.get('COMMITTED'):
+        region_info['committed'] = int(submatched_dict['COMMITTED'])
+      if submatched_dict.get('RESERVED'):
+        region_info['reserved'] = int(submatched_dict['RESERVED'])
+      if submatched_dict.get('BUCKETID'):
+        region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
+
+      if matched.group(1) == '(':
+        start = current_vma['begin']
+      else:
+        start = int(matched.group(2), 16)
+      if matched.group(4) == '(':
+        end = current_vma['end']
+      else:
+        end = int(matched.group(5), 16)
+
+      if pageframe_list and pageframe_list[0].start_truncated:
+        pageframe_list[0].set_size(
+            pageframe_list[0].size - start % self._pagesize)
+      if pageframe_list and pageframe_list[-1].end_truncated:
+        pageframe_list[-1].set_size(
+            pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
+      region_info['pageframe'] = pageframe_list
+      pageframe_list = []
+
+      self._map[(start, end)] = (matched.group(7), region_info)
+      ln += 1
+
+  def _extract_stacktrace_lines(self, line_number):
+    """Extracts the position of stacktrace lines.
+
+    Valid stacktrace lines are stored into self._stacktrace_lines.
+
+    Args:
+        line_number: A line number to start parsing in lines.
+
+    Raises:
+        ParsingException for invalid dump versions.
+    """
+    if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
+      (line_number, _) = skip_while(
+          line_number, len(self._lines),
+          lambda n: not self._lines[n].split()[0].isdigit())
+      stacktrace_start = line_number
+      (line_number, _) = skip_while(
+          line_number, len(self._lines),
+          lambda n: self._check_stacktrace_line(self._lines[n]))
+      self._stacktrace_lines = self._lines[stacktrace_start:line_number]
+
+    elif self._version in DUMP_DEEP_OBSOLETE:
+      raise ObsoleteDumpVersionException(self._version)
+
+    else:
+      raise InvalidDumpException('Invalid version: %s' % self._version)
+
+  @staticmethod
+  def _check_stacktrace_line(stacktrace_line):
+    """Checks if a given stacktrace_line is valid as stacktrace.
+
+    Args:
+        stacktrace_line: A string to be checked.
+
+    Returns:
+        True if the given stacktrace_line is valid.
+    """
+    words = stacktrace_line.split()
+    if len(words) < BUCKET_ID + 1:
+      return False
+    if words[BUCKET_ID - 1] != '@':
+      return False
+    return True
+
+
+class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
+  """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
+  _DUMMY_ENTRY = procfs.ProcMapsEntry(
+      0,     # begin
+      0,     # end
+      '-',   # readable
+      '-',   # writable
+      '-',   # executable
+      '-',   # private
+      0,     # offset
+      '00',  # major
+      '00',  # minor
+      0,     # inode
+      ''     # name
+      )
+
+  def __init__(self):
+    super(ProcMapsEntryAttribute, self).__init__()
+    self._entry = self._DUMMY_ENTRY.as_dict()
+
+  def __str__(self):
+    return str(self._entry)
+
+  def __repr__(self):
+    return 'ProcMapsEntryAttribute' + str(self._entry)
+
+  def __getitem__(self, key):
+    return self._entry[key]
+
+  def __setitem__(self, key, value):
+    if key not in self._entry:
+      raise KeyError(key)
+    self._entry[key] = value
+
+  def copy(self):
+    new_entry = ProcMapsEntryAttribute()
+    for key, value in self._entry.iteritems():
+      new_entry[key] = copy.deepcopy(value)
+    return new_entry
+
+
+def skip_while(index, max_index, skipping_condition):
+  """Increments |index| until |skipping_condition|(|index|) is False.
+
+  Returns:
+      A pair of an integer indicating a line number after skipped, and a
+      boolean value which is True if found a line which skipping_condition
+      is False for.
+  """
+  while skipping_condition(index):
+    index += 1
+    if index >= max_index:
+      return index, False
+  return index, True
diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py
index 1fa4fc3..798763a 100644
--- a/tools/deep_memory_profiler/lib/dump.py
+++ b/tools/deep_memory_profiler/lib/dump.py
@@ -2,138 +2,60 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-import copy
-import datetime
 import logging
 import os
-import re
-import time
-
-from lib.exceptions import EmptyDumpException, InvalidDumpException
-from lib.exceptions import ObsoleteDumpVersionException, ParsingException
-from lib.pageframe import PageFrame
-from lib.range_dict import ExclusiveRangeDict
-from lib.symbol import procfs
 
 
 LOGGER = logging.getLogger('dmprof')
-VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
-
-
-# Heap Profile Dump versions
-
-# DUMP_DEEP_[1-4] are obsolete.
-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
-DUMP_DEEP_1 = 'DUMP_DEEP_1'
-DUMP_DEEP_2 = 'DUMP_DEEP_2'
-DUMP_DEEP_3 = 'DUMP_DEEP_3'
-DUMP_DEEP_4 = 'DUMP_DEEP_4'
-
-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
-
-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
-# malloc and mmap are identified in bucket files.
-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
-DUMP_DEEP_5 = 'DUMP_DEEP_5'
-
-# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
-DUMP_DEEP_6 = 'DUMP_DEEP_6'
 
 
 class Dump(object):
   """Represents a heap profile dump."""
-
-  _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
-
-  _HOOK_PATTERN = re.compile(
-      r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
-      r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
-
-  _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
-                               '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
-  _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
-                                 '(?P<RESERVED>[0-9]+)')
-
-  _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
-  _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
-
-  _TIME_PATTERN_FORMAT = re.compile(
-      r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
-  _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
-
-  def __init__(self, path, modified_time):
-    self._path = path
-    matched = self._PATH_PATTERN.match(path)
-    self._pid = int(matched.group(2))
-    self._count = int(matched.group(3))
-    self._time = modified_time
-    self._map = {}
-    self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
-    self._stacktrace_lines = []
-    self._global_stats = {} # used only in apply_policy
-
-    self._run_id = ''
-    self._pagesize = 4096
-    self._pageframe_length = 0
-    self._pageframe_encoding = ''
-    self._has_pagecount = False
-
-    self._version = ''
-    self._lines = []
+  def __init__(self):
+    pass
 
   @property
   def path(self):
-    return self._path
+    raise NotImplementedError
 
   @property
   def count(self):
-    return self._count
+    raise NotImplementedError
 
   @property
   def time(self):
-    return self._time
+    raise NotImplementedError
 
   @property
   def iter_map(self):
-    for region in sorted(self._map.iteritems()):
-      yield region[0], region[1]
+    raise NotImplementedError
 
   @property
   def iter_stacktrace(self):
-    for line in self._stacktrace_lines:
-      words = line.split()
-      yield (int(words[BUCKET_ID]),
-             int(words[VIRTUAL]),
-             int(words[COMMITTED]),
-             int(words[ALLOC_COUNT]),
-             int(words[FREE_COUNT]))
+    raise NotImplementedError
 
   def global_stat(self, name):
-    return self._global_stats[name]
+    raise NotImplementedError
 
   @property
   def run_id(self):
-    return self._run_id
+    raise NotImplementedError
 
   @property
   def pagesize(self):
-    return self._pagesize
+    raise NotImplementedError
 
   @property
   def pageframe_length(self):
-    return self._pageframe_length
+    raise NotImplementedError
 
   @property
   def pageframe_encoding(self):
-    return self._pageframe_encoding
+    raise NotImplementedError
 
   @property
   def has_pagecount(self):
-    return self._has_pagecount
+    raise NotImplementedError
 
   @staticmethod
   def load(path, log_header='Loading a heap profile dump: '):
@@ -149,263 +71,12 @@ class Dump(object):
     Raises:
         ParsingException for invalid heap profile dumps.
     """
-    dump = Dump(path, os.stat(path).st_mtime)
+    from lib.deep_dump import DeepDump
+    dump = DeepDump(path, os.stat(path).st_mtime)
     with open(path, 'r') as f:
       dump.load_file(f, log_header)
     return dump
 
-  def load_file(self, f, log_header):
-    self._lines = [line for line in f
-                   if line and not line.startswith('#')]
-
-    try:
-      self._version, ln = self._parse_version()
-      self._parse_meta_information()
-      if self._version == DUMP_DEEP_6:
-        self._parse_mmap_list()
-      self._parse_global_stats()
-      self._extract_stacktrace_lines(ln)
-    except EmptyDumpException:
-      LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
-    except ParsingException, e:
-      LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
-      raise
-    else:
-      LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
-
-  def _parse_version(self):
-    """Parses a version string in self._lines.
-
-    Returns:
-        A pair of (a string representing a version of the stacktrace dump,
-        and an integer indicating a line number next to the version string).
-
-    Raises:
-        ParsingException for invalid dump versions.
-    """
-    version = ''
-
-    # Skip until an identifiable line.
-    headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
-    if not self._lines:
-      raise EmptyDumpException('Empty heap dump file.')
-    (ln, found) = skip_while(
-        0, len(self._lines),
-        lambda n: not self._lines[n].startswith(headers))
-    if not found:
-      raise InvalidDumpException('No version header.')
-
-    # Identify a version.
-    if self._lines[ln].startswith('heap profile: '):
-      version = self._lines[ln][13:].strip()
-      if version in (DUMP_DEEP_5, DUMP_DEEP_6):
-        (ln, _) = skip_while(
-            ln, len(self._lines),
-            lambda n: self._lines[n] != 'STACKTRACES:\n')
-      elif version in DUMP_DEEP_OBSOLETE:
-        raise ObsoleteDumpVersionException(version)
-      else:
-        raise InvalidDumpException('Invalid version: %s' % version)
-    elif self._lines[ln] == 'STACKTRACES:\n':
-      raise ObsoleteDumpVersionException(DUMP_DEEP_1)
-    elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
-      raise ObsoleteDumpVersionException(DUMP_DEEP_2)
-
-    return (version, ln)
-
-  def _parse_global_stats(self):
-    """Parses lines in self._lines as global stats."""
-    (ln, _) = skip_while(
-        0, len(self._lines),
-        lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
-
-    global_stat_names = [
-        'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
-        'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
-        'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
-        'nonprofiled-stack', 'nonprofiled-other',
-        'profiled-mmap', 'profiled-malloc']
-
-    for prefix in global_stat_names:
-      (ln, _) = skip_while(
-          ln, len(self._lines),
-          lambda n: self._lines[n].split()[0] != prefix)
-      words = self._lines[ln].split()
-      self._global_stats[prefix + '_virtual'] = int(words[-2])
-      self._global_stats[prefix + '_committed'] = int(words[-1])
-
-  def _parse_meta_information(self):
-    """Parses lines in self._lines for meta information."""
-    (ln, found) = skip_while(
-        0, len(self._lines),
-        lambda n: self._lines[n] != 'META:\n')
-    if not found:
-      return
-    ln += 1
-
-    while True:
-      if self._lines[ln].startswith('Time:'):
-        matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
-        matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
-        if matched_format:
-          self._time = time.mktime(datetime.datetime.strptime(
-              matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
-          if matched_format.group(2):
-            self._time += float(matched_format.group(2)[1:]) / 1000.0
-        elif matched_seconds:
-          self._time = float(matched_seconds.group(1))
-      elif self._lines[ln].startswith('Reason:'):
-        pass  # Nothing to do for 'Reason:'
-      elif self._lines[ln].startswith('PageSize: '):
-        self._pagesize = int(self._lines[ln][10:])
-      elif self._lines[ln].startswith('CommandLine:'):
-        pass
-      elif (self._lines[ln].startswith('PageFrame: ') or
-            self._lines[ln].startswith('PFN: ')):
-        if self._lines[ln].startswith('PageFrame: '):
-          words = self._lines[ln][11:].split(',')
-        else:
-          words = self._lines[ln][5:].split(',')
-        for word in words:
-          if word == '24':
-            self._pageframe_length = 24
-          elif word == 'Base64':
-            self._pageframe_encoding = 'base64'
-          elif word == 'PageCount':
-            self._has_pagecount = True
-      elif self._lines[ln].startswith('RunID: '):
-        self._run_id = self._lines[ln][7:].strip()
-      elif (self._lines[ln].startswith('MMAP_LIST:') or
-            self._lines[ln].startswith('GLOBAL_STATS:')):
-        # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
-        break
-      else:
-        pass
-      ln += 1
-
-  def _parse_mmap_list(self):
-    """Parses lines in self._lines as a mmap list."""
-    (ln, found) = skip_while(
-        0, len(self._lines),
-        lambda n: self._lines[n] != 'MMAP_LIST:\n')
-    if not found:
-      return {}
-
-    ln += 1
-    self._map = {}
-    current_vma = {}
-    pageframe_list = []
-    while True:
-      entry = procfs.ProcMaps.parse_line(self._lines[ln])
-      if entry:
-        current_vma = {}
-        for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
-          for key, value in entry.as_dict().iteritems():
-            attr[key] = value
-            current_vma[key] = value
-        ln += 1
-        continue
-
-      if self._lines[ln].startswith('  PF: '):
-        for pageframe in self._lines[ln][5:].split():
-          pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
-        ln += 1
-        continue
-
-      matched = self._HOOK_PATTERN.match(self._lines[ln])
-      if not matched:
-        break
-      # 2: starting address
-      # 5: end address
-      # 7: hooked or unhooked
-      # 8: additional information
-      if matched.group(7) == 'hooked':
-        submatched = self._HOOKED_PATTERN.match(matched.group(8))
-        if not submatched:
-          submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
-      elif matched.group(7) == 'unhooked':
-        submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
-        if not submatched:
-          submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
-      else:
-        assert matched.group(7) in ['hooked', 'unhooked']
-
-      submatched_dict = submatched.groupdict()
-      region_info = { 'vma': current_vma }
-      if submatched_dict.get('TYPE'):
-        region_info['type'] = submatched_dict['TYPE'].strip()
-      if submatched_dict.get('COMMITTED'):
-        region_info['committed'] = int(submatched_dict['COMMITTED'])
-      if submatched_dict.get('RESERVED'):
-        region_info['reserved'] = int(submatched_dict['RESERVED'])
-      if submatched_dict.get('BUCKETID'):
-        region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
-
-      if matched.group(1) == '(':
-        start = current_vma['begin']
-      else:
-        start = int(matched.group(2), 16)
-      if matched.group(4) == '(':
-        end = current_vma['end']
-      else:
-        end = int(matched.group(5), 16)
-
-      if pageframe_list and pageframe_list[0].start_truncated:
-        pageframe_list[0].set_size(
-            pageframe_list[0].size - start % self._pagesize)
-      if pageframe_list and pageframe_list[-1].end_truncated:
-        pageframe_list[-1].set_size(
-            pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
-      region_info['pageframe'] = pageframe_list
-      pageframe_list = []
-
-      self._map[(start, end)] = (matched.group(7), region_info)
-      ln += 1
-
-  def _extract_stacktrace_lines(self, line_number):
-    """Extracts the position of stacktrace lines.
-
-    Valid stacktrace lines are stored into self._stacktrace_lines.
-
-    Args:
-        line_number: A line number to start parsing in lines.
-
-    Raises:
-        ParsingException for invalid dump versions.
-    """
-    if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
-      (line_number, _) = skip_while(
-          line_number, len(self._lines),
-          lambda n: not self._lines[n].split()[0].isdigit())
-      stacktrace_start = line_number
-      (line_number, _) = skip_while(
-          line_number, len(self._lines),
-          lambda n: self._check_stacktrace_line(self._lines[n]))
-      self._stacktrace_lines = self._lines[stacktrace_start:line_number]
-
-    elif self._version in DUMP_DEEP_OBSOLETE:
-      raise ObsoleteDumpVersionException(self._version)
-
-    else:
-      raise InvalidDumpException('Invalid version: %s' % self._version)
-
-  @staticmethod
-  def _check_stacktrace_line(stacktrace_line):
-    """Checks if a given stacktrace_line is valid as stacktrace.
-
-    Args:
-        stacktrace_line: A string to be checked.
-
-    Returns:
-        True if the given stacktrace_line is valid.
-    """
-    words = stacktrace_line.split()
-    if len(words) < BUCKET_ID + 1:
-      return False
-    if words[BUCKET_ID - 1] != '@':
-      return False
-    return True
-
 
 class DumpList(object):
   """Represents a sequence of heap profile dumps.
@@ -432,59 +103,3 @@ class DumpList(object):
 
   def __getitem__(self, index):
     return Dump.load(self._dump_path_list[index])
-
-
-class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
-  """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
-  _DUMMY_ENTRY = procfs.ProcMapsEntry(
-      0,     # begin
-      0,     # end
-      '-',   # readable
-      '-',   # writable
-      '-',   # executable
-      '-',   # private
-      0,     # offset
-      '00',  # major
-      '00',  # minor
-      0,     # inode
-      ''     # name
-      )
-
-  def __init__(self):
-    super(ProcMapsEntryAttribute, self).__init__()
-    self._entry = self._DUMMY_ENTRY.as_dict()
-
-  def __str__(self):
-    return str(self._entry)
-
-  def __repr__(self):
-    return 'ProcMapsEntryAttribute' + str(self._entry)
-
-  def __getitem__(self, key):
-    return self._entry[key]
-
-  def __setitem__(self, key, value):
-    if key not in self._entry:
-      raise KeyError(key)
-    self._entry[key] = value
-
-  def copy(self):
-    new_entry = ProcMapsEntryAttribute()
-    for key, value in self._entry.iteritems():
-      new_entry[key] = copy.deepcopy(value)
-    return new_entry
-
-
-def skip_while(index, max_index, skipping_condition):
-  """Increments |index| until |skipping_condition|(|index|) is False.
-
-  Returns:
-      A pair of an integer indicating a line number after skipped, and a
-      boolean value which is True if found a line which skipping_condition
-      is False for.
-  """
-  while skipping_condition(index):
-    index += 1
-    if index >= max_index:
-      return index, False
-  return index, True
author	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-07-09 06:26:32 +0000
committer	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-07-09 06:26:32 +0000
commit	e755f1a1cb3b4a193c95716a6ce5d0987dec2697 (patch)
tree	171e6ad58777ac36bbb10d4b0feae23b4a873c6a /tools/deep_memory_profiler
parent	b3a97b552e3e46a0172c6b92645a2f8319f5397d (diff)
download	chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.zip chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.gz chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.bz2