summaryrefslogtreecommitdiffstats
path: root/tools/deep_memory_profiler
diff options
context:
space:
mode:
authordmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-07-09 06:26:32 +0000
committerdmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-07-09 06:26:32 +0000
commite755f1a1cb3b4a193c95716a6ce5d0987dec2697 (patch)
tree171e6ad58777ac36bbb10d4b0feae23b4a873c6a /tools/deep_memory_profiler
parentb3a97b552e3e46a0172c6b92645a2f8319f5397d (diff)
downloadchromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.zip
chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.gz
chromium_src-e755f1a1cb3b4a193c95716a6ce5d0987dec2697.tar.bz2
Refactor dmprof: split lib.Dump into lib.Dump and lib.DeepDump.
It's the second step to accept Android's heap profiler discussed in http://crbug.com/382489. BUG=391212 NOTRY=True Review URL: https://codereview.chromium.org/371303002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@281979 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/deep_memory_profiler')
-rw-r--r--tools/deep_memory_profiler/lib/deep_dump.py465
-rw-r--r--tools/deep_memory_profiler/lib/dump.py415
2 files changed, 480 insertions, 400 deletions
diff --git a/tools/deep_memory_profiler/lib/deep_dump.py b/tools/deep_memory_profiler/lib/deep_dump.py
new file mode 100644
index 0000000..dc37ea0
--- /dev/null
+++ b/tools/deep_memory_profiler/lib/deep_dump.py
@@ -0,0 +1,465 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import copy
+import datetime
+import logging
+import os
+import re
+import time
+
+from lib.dump import Dump
+from lib.exceptions import EmptyDumpException, InvalidDumpException
+from lib.exceptions import ObsoleteDumpVersionException, ParsingException
+from lib.pageframe import PageFrame
+from lib.range_dict import ExclusiveRangeDict
+from lib.symbol import procfs
+
+
+LOGGER = logging.getLogger('dmprof')
+VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
+
+
+# Heap Profile Dump versions
+
+# DUMP_DEEP_[1-4] are obsolete.
+# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
+# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
+# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
+# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
+# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
+DUMP_DEEP_1 = 'DUMP_DEEP_1'
+DUMP_DEEP_2 = 'DUMP_DEEP_2'
+DUMP_DEEP_3 = 'DUMP_DEEP_3'
+DUMP_DEEP_4 = 'DUMP_DEEP_4'
+
+DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
+
+# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
+# malloc and mmap are identified in bucket files.
+# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
+DUMP_DEEP_5 = 'DUMP_DEEP_5'
+
+# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
+DUMP_DEEP_6 = 'DUMP_DEEP_6'
+
+
+class DeepDump(Dump):
+ """Represents a heap profile dump."""
+
+ _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
+
+ _HOOK_PATTERN = re.compile(
+ r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
+ r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
+
+ _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
+ '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
+ _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
+ '(?P<RESERVED>[0-9]+)')
+
+ _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
+ _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
+
+ _TIME_PATTERN_FORMAT = re.compile(
+ r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
+ _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
+
+ def __init__(self, path, modified_time):
+ super(DeepDump, self).__init__()
+ self._path = path
+ matched = self._PATH_PATTERN.match(path)
+ self._pid = int(matched.group(2))
+ self._count = int(matched.group(3))
+ self._time = modified_time
+ self._map = {}
+ self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
+ self._stacktrace_lines = []
+ self._global_stats = {} # used only in apply_policy
+
+ self._run_id = ''
+ self._pagesize = 4096
+ self._pageframe_length = 0
+ self._pageframe_encoding = ''
+ self._has_pagecount = False
+
+ self._version = ''
+ self._lines = []
+
+ @property
+ def path(self):
+ return self._path
+
+ @property
+ def count(self):
+ return self._count
+
+ @property
+ def time(self):
+ return self._time
+
+ @property
+ def iter_map(self):
+ for region in sorted(self._map.iteritems()):
+ yield region[0], region[1]
+
+ @property
+ def iter_stacktrace(self):
+ for line in self._stacktrace_lines:
+ words = line.split()
+ yield (int(words[BUCKET_ID]),
+ int(words[VIRTUAL]),
+ int(words[COMMITTED]),
+ int(words[ALLOC_COUNT]),
+ int(words[FREE_COUNT]))
+
+ def global_stat(self, name):
+ return self._global_stats[name]
+
+ @property
+ def run_id(self):
+ return self._run_id
+
+ @property
+ def pagesize(self):
+ return self._pagesize
+
+ @property
+ def pageframe_length(self):
+ return self._pageframe_length
+
+ @property
+ def pageframe_encoding(self):
+ return self._pageframe_encoding
+
+ @property
+ def has_pagecount(self):
+ return self._has_pagecount
+
+ @staticmethod
+ def load(path, log_header='Loading a heap profile dump: '):
+ """Loads a heap profile dump.
+
+ Args:
+ path: A file path string to load.
+ log_header: A preceding string for log messages.
+
+ Returns:
+ A loaded Dump object.
+
+ Raises:
+ ParsingException for invalid heap profile dumps.
+ """
+ dump = Dump(path, os.stat(path).st_mtime)
+ with open(path, 'r') as f:
+ dump.load_file(f, log_header)
+ return dump
+
+ def load_file(self, f, log_header):
+ self._lines = [line for line in f
+ if line and not line.startswith('#')]
+
+ try:
+ self._version, ln = self._parse_version()
+ self._parse_meta_information()
+ if self._version == DUMP_DEEP_6:
+ self._parse_mmap_list()
+ self._parse_global_stats()
+ self._extract_stacktrace_lines(ln)
+ except EmptyDumpException:
+ LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
+ except ParsingException, e:
+ LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
+ raise
+ else:
+ LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
+
+ def _parse_version(self):
+ """Parses a version string in self._lines.
+
+ Returns:
+ A pair of (a string representing a version of the stacktrace dump,
+ and an integer indicating a line number next to the version string).
+
+ Raises:
+ ParsingException for invalid dump versions.
+ """
+ version = ''
+
+ # Skip until an identifiable line.
+ headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
+ if not self._lines:
+ raise EmptyDumpException('Empty heap dump file.')
+ (ln, found) = skip_while(
+ 0, len(self._lines),
+ lambda n: not self._lines[n].startswith(headers))
+ if not found:
+ raise InvalidDumpException('No version header.')
+
+ # Identify a version.
+ if self._lines[ln].startswith('heap profile: '):
+ version = self._lines[ln][13:].strip()
+ if version in (DUMP_DEEP_5, DUMP_DEEP_6):
+ (ln, _) = skip_while(
+ ln, len(self._lines),
+ lambda n: self._lines[n] != 'STACKTRACES:\n')
+ elif version in DUMP_DEEP_OBSOLETE:
+ raise ObsoleteDumpVersionException(version)
+ else:
+ raise InvalidDumpException('Invalid version: %s' % version)
+ elif self._lines[ln] == 'STACKTRACES:\n':
+ raise ObsoleteDumpVersionException(DUMP_DEEP_1)
+ elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
+ raise ObsoleteDumpVersionException(DUMP_DEEP_2)
+
+ return (version, ln)
+
+ def _parse_global_stats(self):
+ """Parses lines in self._lines as global stats."""
+ (ln, _) = skip_while(
+ 0, len(self._lines),
+ lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
+
+ global_stat_names = [
+ 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
+ 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
+ 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
+ 'nonprofiled-stack', 'nonprofiled-other',
+ 'profiled-mmap', 'profiled-malloc']
+
+ for prefix in global_stat_names:
+ (ln, _) = skip_while(
+ ln, len(self._lines),
+ lambda n: self._lines[n].split()[0] != prefix)
+ words = self._lines[ln].split()
+ self._global_stats[prefix + '_virtual'] = int(words[-2])
+ self._global_stats[prefix + '_committed'] = int(words[-1])
+
+ def _parse_meta_information(self):
+ """Parses lines in self._lines for meta information."""
+ (ln, found) = skip_while(
+ 0, len(self._lines),
+ lambda n: self._lines[n] != 'META:\n')
+ if not found:
+ return
+ ln += 1
+
+ while True:
+ if self._lines[ln].startswith('Time:'):
+ matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
+ matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
+ if matched_format:
+ self._time = time.mktime(datetime.datetime.strptime(
+ matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
+ if matched_format.group(2):
+ self._time += float(matched_format.group(2)[1:]) / 1000.0
+ elif matched_seconds:
+ self._time = float(matched_seconds.group(1))
+ elif self._lines[ln].startswith('Reason:'):
+ pass # Nothing to do for 'Reason:'
+ elif self._lines[ln].startswith('PageSize: '):
+ self._pagesize = int(self._lines[ln][10:])
+ elif self._lines[ln].startswith('CommandLine:'):
+ pass
+ elif (self._lines[ln].startswith('PageFrame: ') or
+ self._lines[ln].startswith('PFN: ')):
+ if self._lines[ln].startswith('PageFrame: '):
+ words = self._lines[ln][11:].split(',')
+ else:
+ words = self._lines[ln][5:].split(',')
+ for word in words:
+ if word == '24':
+ self._pageframe_length = 24
+ elif word == 'Base64':
+ self._pageframe_encoding = 'base64'
+ elif word == 'PageCount':
+ self._has_pagecount = True
+ elif self._lines[ln].startswith('RunID: '):
+ self._run_id = self._lines[ln][7:].strip()
+ elif (self._lines[ln].startswith('MMAP_LIST:') or
+ self._lines[ln].startswith('GLOBAL_STATS:')):
+ # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
+ break
+ else:
+ pass
+ ln += 1
+
+ def _parse_mmap_list(self):
+ """Parses lines in self._lines as a mmap list."""
+ (ln, found) = skip_while(
+ 0, len(self._lines),
+ lambda n: self._lines[n] != 'MMAP_LIST:\n')
+ if not found:
+ return {}
+
+ ln += 1
+ self._map = {}
+ current_vma = {}
+ pageframe_list = []
+ while True:
+ entry = procfs.ProcMaps.parse_line(self._lines[ln])
+ if entry:
+ current_vma = {}
+ for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
+ for key, value in entry.as_dict().iteritems():
+ attr[key] = value
+ current_vma[key] = value
+ ln += 1
+ continue
+
+ if self._lines[ln].startswith(' PF: '):
+ for pageframe in self._lines[ln][5:].split():
+ pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
+ ln += 1
+ continue
+
+ matched = self._HOOK_PATTERN.match(self._lines[ln])
+ if not matched:
+ break
+ # 2: starting address
+ # 5: end address
+ # 7: hooked or unhooked
+ # 8: additional information
+ if matched.group(7) == 'hooked':
+ submatched = self._HOOKED_PATTERN.match(matched.group(8))
+ if not submatched:
+ submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
+ elif matched.group(7) == 'unhooked':
+ submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
+ if not submatched:
+ submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
+ else:
+ assert matched.group(7) in ['hooked', 'unhooked']
+
+ submatched_dict = submatched.groupdict()
+ region_info = { 'vma': current_vma }
+ if submatched_dict.get('TYPE'):
+ region_info['type'] = submatched_dict['TYPE'].strip()
+ if submatched_dict.get('COMMITTED'):
+ region_info['committed'] = int(submatched_dict['COMMITTED'])
+ if submatched_dict.get('RESERVED'):
+ region_info['reserved'] = int(submatched_dict['RESERVED'])
+ if submatched_dict.get('BUCKETID'):
+ region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
+
+ if matched.group(1) == '(':
+ start = current_vma['begin']
+ else:
+ start = int(matched.group(2), 16)
+ if matched.group(4) == '(':
+ end = current_vma['end']
+ else:
+ end = int(matched.group(5), 16)
+
+ if pageframe_list and pageframe_list[0].start_truncated:
+ pageframe_list[0].set_size(
+ pageframe_list[0].size - start % self._pagesize)
+ if pageframe_list and pageframe_list[-1].end_truncated:
+ pageframe_list[-1].set_size(
+ pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
+ region_info['pageframe'] = pageframe_list
+ pageframe_list = []
+
+ self._map[(start, end)] = (matched.group(7), region_info)
+ ln += 1
+
+ def _extract_stacktrace_lines(self, line_number):
+ """Extracts the position of stacktrace lines.
+
+ Valid stacktrace lines are stored into self._stacktrace_lines.
+
+ Args:
+ line_number: A line number to start parsing in lines.
+
+ Raises:
+ ParsingException for invalid dump versions.
+ """
+ if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
+ (line_number, _) = skip_while(
+ line_number, len(self._lines),
+ lambda n: not self._lines[n].split()[0].isdigit())
+ stacktrace_start = line_number
+ (line_number, _) = skip_while(
+ line_number, len(self._lines),
+ lambda n: self._check_stacktrace_line(self._lines[n]))
+ self._stacktrace_lines = self._lines[stacktrace_start:line_number]
+
+ elif self._version in DUMP_DEEP_OBSOLETE:
+ raise ObsoleteDumpVersionException(self._version)
+
+ else:
+ raise InvalidDumpException('Invalid version: %s' % self._version)
+
+ @staticmethod
+ def _check_stacktrace_line(stacktrace_line):
+ """Checks if a given stacktrace_line is valid as stacktrace.
+
+ Args:
+ stacktrace_line: A string to be checked.
+
+ Returns:
+ True if the given stacktrace_line is valid.
+ """
+ words = stacktrace_line.split()
+ if len(words) < BUCKET_ID + 1:
+ return False
+ if words[BUCKET_ID - 1] != '@':
+ return False
+ return True
+
+
+class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
+ """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
+ _DUMMY_ENTRY = procfs.ProcMapsEntry(
+ 0, # begin
+ 0, # end
+ '-', # readable
+ '-', # writable
+ '-', # executable
+ '-', # private
+ 0, # offset
+ '00', # major
+ '00', # minor
+ 0, # inode
+ '' # name
+ )
+
+ def __init__(self):
+ super(ProcMapsEntryAttribute, self).__init__()
+ self._entry = self._DUMMY_ENTRY.as_dict()
+
+ def __str__(self):
+ return str(self._entry)
+
+ def __repr__(self):
+ return 'ProcMapsEntryAttribute' + str(self._entry)
+
+ def __getitem__(self, key):
+ return self._entry[key]
+
+ def __setitem__(self, key, value):
+ if key not in self._entry:
+ raise KeyError(key)
+ self._entry[key] = value
+
+ def copy(self):
+ new_entry = ProcMapsEntryAttribute()
+ for key, value in self._entry.iteritems():
+ new_entry[key] = copy.deepcopy(value)
+ return new_entry
+
+
+def skip_while(index, max_index, skipping_condition):
+ """Increments |index| until |skipping_condition|(|index|) is False.
+
+ Returns:
+ A pair of an integer indicating a line number after skipped, and a
+ boolean value which is True if found a line which skipping_condition
+ is False for.
+ """
+ while skipping_condition(index):
+ index += 1
+ if index >= max_index:
+ return index, False
+ return index, True
diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py
index 1fa4fc3..798763a 100644
--- a/tools/deep_memory_profiler/lib/dump.py
+++ b/tools/deep_memory_profiler/lib/dump.py
@@ -2,138 +2,60 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-import copy
-import datetime
import logging
import os
-import re
-import time
-
-from lib.exceptions import EmptyDumpException, InvalidDumpException
-from lib.exceptions import ObsoleteDumpVersionException, ParsingException
-from lib.pageframe import PageFrame
-from lib.range_dict import ExclusiveRangeDict
-from lib.symbol import procfs
LOGGER = logging.getLogger('dmprof')
-VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
-
-
-# Heap Profile Dump versions
-
-# DUMP_DEEP_[1-4] are obsolete.
-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
-DUMP_DEEP_1 = 'DUMP_DEEP_1'
-DUMP_DEEP_2 = 'DUMP_DEEP_2'
-DUMP_DEEP_3 = 'DUMP_DEEP_3'
-DUMP_DEEP_4 = 'DUMP_DEEP_4'
-
-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
-
-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
-# malloc and mmap are identified in bucket files.
-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
-DUMP_DEEP_5 = 'DUMP_DEEP_5'
-
-# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
-DUMP_DEEP_6 = 'DUMP_DEEP_6'
class Dump(object):
"""Represents a heap profile dump."""
-
- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
-
- _HOOK_PATTERN = re.compile(
- r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
- r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
-
- _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
- '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
- _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
- '(?P<RESERVED>[0-9]+)')
-
- _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
- _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
-
- _TIME_PATTERN_FORMAT = re.compile(
- r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
- _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
-
- def __init__(self, path, modified_time):
- self._path = path
- matched = self._PATH_PATTERN.match(path)
- self._pid = int(matched.group(2))
- self._count = int(matched.group(3))
- self._time = modified_time
- self._map = {}
- self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
- self._stacktrace_lines = []
- self._global_stats = {} # used only in apply_policy
-
- self._run_id = ''
- self._pagesize = 4096
- self._pageframe_length = 0
- self._pageframe_encoding = ''
- self._has_pagecount = False
-
- self._version = ''
- self._lines = []
+ def __init__(self):
+ pass
@property
def path(self):
- return self._path
+ raise NotImplementedError
@property
def count(self):
- return self._count
+ raise NotImplementedError
@property
def time(self):
- return self._time
+ raise NotImplementedError
@property
def iter_map(self):
- for region in sorted(self._map.iteritems()):
- yield region[0], region[1]
+ raise NotImplementedError
@property
def iter_stacktrace(self):
- for line in self._stacktrace_lines:
- words = line.split()
- yield (int(words[BUCKET_ID]),
- int(words[VIRTUAL]),
- int(words[COMMITTED]),
- int(words[ALLOC_COUNT]),
- int(words[FREE_COUNT]))
+ raise NotImplementedError
def global_stat(self, name):
- return self._global_stats[name]
+ raise NotImplementedError
@property
def run_id(self):
- return self._run_id
+ raise NotImplementedError
@property
def pagesize(self):
- return self._pagesize
+ raise NotImplementedError
@property
def pageframe_length(self):
- return self._pageframe_length
+ raise NotImplementedError
@property
def pageframe_encoding(self):
- return self._pageframe_encoding
+ raise NotImplementedError
@property
def has_pagecount(self):
- return self._has_pagecount
+ raise NotImplementedError
@staticmethod
def load(path, log_header='Loading a heap profile dump: '):
@@ -149,263 +71,12 @@ class Dump(object):
Raises:
ParsingException for invalid heap profile dumps.
"""
- dump = Dump(path, os.stat(path).st_mtime)
+ from lib.deep_dump import DeepDump
+ dump = DeepDump(path, os.stat(path).st_mtime)
with open(path, 'r') as f:
dump.load_file(f, log_header)
return dump
- def load_file(self, f, log_header):
- self._lines = [line for line in f
- if line and not line.startswith('#')]
-
- try:
- self._version, ln = self._parse_version()
- self._parse_meta_information()
- if self._version == DUMP_DEEP_6:
- self._parse_mmap_list()
- self._parse_global_stats()
- self._extract_stacktrace_lines(ln)
- except EmptyDumpException:
- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
- except ParsingException, e:
- LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
- raise
- else:
- LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
-
- def _parse_version(self):
- """Parses a version string in self._lines.
-
- Returns:
- A pair of (a string representing a version of the stacktrace dump,
- and an integer indicating a line number next to the version string).
-
- Raises:
- ParsingException for invalid dump versions.
- """
- version = ''
-
- # Skip until an identifiable line.
- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
- if not self._lines:
- raise EmptyDumpException('Empty heap dump file.')
- (ln, found) = skip_while(
- 0, len(self._lines),
- lambda n: not self._lines[n].startswith(headers))
- if not found:
- raise InvalidDumpException('No version header.')
-
- # Identify a version.
- if self._lines[ln].startswith('heap profile: '):
- version = self._lines[ln][13:].strip()
- if version in (DUMP_DEEP_5, DUMP_DEEP_6):
- (ln, _) = skip_while(
- ln, len(self._lines),
- lambda n: self._lines[n] != 'STACKTRACES:\n')
- elif version in DUMP_DEEP_OBSOLETE:
- raise ObsoleteDumpVersionException(version)
- else:
- raise InvalidDumpException('Invalid version: %s' % version)
- elif self._lines[ln] == 'STACKTRACES:\n':
- raise ObsoleteDumpVersionException(DUMP_DEEP_1)
- elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
- raise ObsoleteDumpVersionException(DUMP_DEEP_2)
-
- return (version, ln)
-
- def _parse_global_stats(self):
- """Parses lines in self._lines as global stats."""
- (ln, _) = skip_while(
- 0, len(self._lines),
- lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
-
- global_stat_names = [
- 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
- 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
- 'nonprofiled-stack', 'nonprofiled-other',
- 'profiled-mmap', 'profiled-malloc']
-
- for prefix in global_stat_names:
- (ln, _) = skip_while(
- ln, len(self._lines),
- lambda n: self._lines[n].split()[0] != prefix)
- words = self._lines[ln].split()
- self._global_stats[prefix + '_virtual'] = int(words[-2])
- self._global_stats[prefix + '_committed'] = int(words[-1])
-
- def _parse_meta_information(self):
- """Parses lines in self._lines for meta information."""
- (ln, found) = skip_while(
- 0, len(self._lines),
- lambda n: self._lines[n] != 'META:\n')
- if not found:
- return
- ln += 1
-
- while True:
- if self._lines[ln].startswith('Time:'):
- matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
- matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
- if matched_format:
- self._time = time.mktime(datetime.datetime.strptime(
- matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
- if matched_format.group(2):
- self._time += float(matched_format.group(2)[1:]) / 1000.0
- elif matched_seconds:
- self._time = float(matched_seconds.group(1))
- elif self._lines[ln].startswith('Reason:'):
- pass # Nothing to do for 'Reason:'
- elif self._lines[ln].startswith('PageSize: '):
- self._pagesize = int(self._lines[ln][10:])
- elif self._lines[ln].startswith('CommandLine:'):
- pass
- elif (self._lines[ln].startswith('PageFrame: ') or
- self._lines[ln].startswith('PFN: ')):
- if self._lines[ln].startswith('PageFrame: '):
- words = self._lines[ln][11:].split(',')
- else:
- words = self._lines[ln][5:].split(',')
- for word in words:
- if word == '24':
- self._pageframe_length = 24
- elif word == 'Base64':
- self._pageframe_encoding = 'base64'
- elif word == 'PageCount':
- self._has_pagecount = True
- elif self._lines[ln].startswith('RunID: '):
- self._run_id = self._lines[ln][7:].strip()
- elif (self._lines[ln].startswith('MMAP_LIST:') or
- self._lines[ln].startswith('GLOBAL_STATS:')):
- # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
- break
- else:
- pass
- ln += 1
-
- def _parse_mmap_list(self):
- """Parses lines in self._lines as a mmap list."""
- (ln, found) = skip_while(
- 0, len(self._lines),
- lambda n: self._lines[n] != 'MMAP_LIST:\n')
- if not found:
- return {}
-
- ln += 1
- self._map = {}
- current_vma = {}
- pageframe_list = []
- while True:
- entry = procfs.ProcMaps.parse_line(self._lines[ln])
- if entry:
- current_vma = {}
- for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
- for key, value in entry.as_dict().iteritems():
- attr[key] = value
- current_vma[key] = value
- ln += 1
- continue
-
- if self._lines[ln].startswith(' PF: '):
- for pageframe in self._lines[ln][5:].split():
- pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
- ln += 1
- continue
-
- matched = self._HOOK_PATTERN.match(self._lines[ln])
- if not matched:
- break
- # 2: starting address
- # 5: end address
- # 7: hooked or unhooked
- # 8: additional information
- if matched.group(7) == 'hooked':
- submatched = self._HOOKED_PATTERN.match(matched.group(8))
- if not submatched:
- submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
- elif matched.group(7) == 'unhooked':
- submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
- if not submatched:
- submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
- else:
- assert matched.group(7) in ['hooked', 'unhooked']
-
- submatched_dict = submatched.groupdict()
- region_info = { 'vma': current_vma }
- if submatched_dict.get('TYPE'):
- region_info['type'] = submatched_dict['TYPE'].strip()
- if submatched_dict.get('COMMITTED'):
- region_info['committed'] = int(submatched_dict['COMMITTED'])
- if submatched_dict.get('RESERVED'):
- region_info['reserved'] = int(submatched_dict['RESERVED'])
- if submatched_dict.get('BUCKETID'):
- region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
-
- if matched.group(1) == '(':
- start = current_vma['begin']
- else:
- start = int(matched.group(2), 16)
- if matched.group(4) == '(':
- end = current_vma['end']
- else:
- end = int(matched.group(5), 16)
-
- if pageframe_list and pageframe_list[0].start_truncated:
- pageframe_list[0].set_size(
- pageframe_list[0].size - start % self._pagesize)
- if pageframe_list and pageframe_list[-1].end_truncated:
- pageframe_list[-1].set_size(
- pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
- region_info['pageframe'] = pageframe_list
- pageframe_list = []
-
- self._map[(start, end)] = (matched.group(7), region_info)
- ln += 1
-
- def _extract_stacktrace_lines(self, line_number):
- """Extracts the position of stacktrace lines.
-
- Valid stacktrace lines are stored into self._stacktrace_lines.
-
- Args:
- line_number: A line number to start parsing in lines.
-
- Raises:
- ParsingException for invalid dump versions.
- """
- if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
- (line_number, _) = skip_while(
- line_number, len(self._lines),
- lambda n: not self._lines[n].split()[0].isdigit())
- stacktrace_start = line_number
- (line_number, _) = skip_while(
- line_number, len(self._lines),
- lambda n: self._check_stacktrace_line(self._lines[n]))
- self._stacktrace_lines = self._lines[stacktrace_start:line_number]
-
- elif self._version in DUMP_DEEP_OBSOLETE:
- raise ObsoleteDumpVersionException(self._version)
-
- else:
- raise InvalidDumpException('Invalid version: %s' % self._version)
-
- @staticmethod
- def _check_stacktrace_line(stacktrace_line):
- """Checks if a given stacktrace_line is valid as stacktrace.
-
- Args:
- stacktrace_line: A string to be checked.
-
- Returns:
- True if the given stacktrace_line is valid.
- """
- words = stacktrace_line.split()
- if len(words) < BUCKET_ID + 1:
- return False
- if words[BUCKET_ID - 1] != '@':
- return False
- return True
-
class DumpList(object):
"""Represents a sequence of heap profile dumps.
@@ -432,59 +103,3 @@ class DumpList(object):
def __getitem__(self, index):
return Dump.load(self._dump_path_list[index])
-
-
-class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
- """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
- _DUMMY_ENTRY = procfs.ProcMapsEntry(
- 0, # begin
- 0, # end
- '-', # readable
- '-', # writable
- '-', # executable
- '-', # private
- 0, # offset
- '00', # major
- '00', # minor
- 0, # inode
- '' # name
- )
-
- def __init__(self):
- super(ProcMapsEntryAttribute, self).__init__()
- self._entry = self._DUMMY_ENTRY.as_dict()
-
- def __str__(self):
- return str(self._entry)
-
- def __repr__(self):
- return 'ProcMapsEntryAttribute' + str(self._entry)
-
- def __getitem__(self, key):
- return self._entry[key]
-
- def __setitem__(self, key, value):
- if key not in self._entry:
- raise KeyError(key)
- self._entry[key] = value
-
- def copy(self):
- new_entry = ProcMapsEntryAttribute()
- for key, value in self._entry.iteritems():
- new_entry[key] = copy.deepcopy(value)
- return new_entry
-
-
-def skip_while(index, max_index, skipping_condition):
- """Increments |index| until |skipping_condition|(|index|) is False.
-
- Returns:
- A pair of an integer indicating a line number after skipped, and a
- boolean value which is True if found a line which skipping_condition
- is False for.
- """
- while skipping_condition(index):
- index += 1
- if index >= max_index:
- return index, False
- return index, True