summaryrefslogtreecommitdiffstats
path: root/tools/purify/purify_message.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/purify/purify_message.py')
-rw-r--r--tools/purify/purify_message.py634
1 files changed, 634 insertions, 0 deletions
diff --git a/tools/purify/purify_message.py b/tools/purify/purify_message.py
new file mode 100644
index 0000000..d093461
--- /dev/null
+++ b/tools/purify/purify_message.py
@@ -0,0 +1,634 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_message.py
+
+''' Utility objects and functions to parse and unique Purify messages '''
+
+import cStringIO
+import logging
+import re
+import sys
+
+import google.logging_utils
+
+# used to represent one or more elided frames
+ELIDE = "..."
+# used to represent stack truncation at a known entry point
+TRUNCATE = "^^^"
+# a file that's outside of our source directory
+EXTERNAL_FILE = "EXTERNAL_FILE"
+
+# mapping of purify message types to descriptions
+message_type = {
+ "ABR": "Array Bounds Read",
+ "ABW": "Array Bounds Write",
+ "ABWL": "Array Bounds Write (late detect)",
+ "BSR": "Beyond Stack Read",
+ "BSW": "Beyond Stack Write",
+ "COM": "COM API/Interface Failure",
+ "EXC": "Continued Exception",
+ "EXH": "Handled Exception",
+ "EXI": "Ignored Exception",
+ "EXU": "Unhandled Exception",
+ "FFM": "Freeing Freed Memory",
+ "FIM": "Freeing Invalid Memory",
+ "FMM": "Freeing Mismatched Memory",
+ "FMR": "Free Memory Read",
+ "FMW": "Free Memory Write",
+ "FMWL": "Free Memory Write (late detect)",
+ "HAN": "Invalid Handle",
+ "HIU": "Handle In Use",
+ "ILK": "COM Interface Leak",
+ "IPR": "Invalid Pointer Read",
+ "IPW": "Invalid Pointer Write",
+ "MAF": "Memory Allocation Failure",
+ "MIU": "Memory In Use",
+ "MLK": "Memory Leak",
+ "MPK": "Potential Memory Leak",
+ "NPR": "Null Pointer Read",
+ "NPW": "Null Pointer Write",
+ "PAR": "Bad Parameter",
+ "UMC": "Uninitialized Memory Copy",
+ "UMR": "Uninitialized Memory Read",
+}
+
+# a magic message type which is not enumerated with the normal message type dict
+FATAL = "FATAL"
+
+def GetMessageType(key):
+ if key in message_type:
+ return message_type[key]
+ elif key == FATAL:
+ return key
+ logging.warn("unknown message type %s" % key)
+ return "UNKNOWN"
+
+# currently unused, but here for documentation purposes
+message_severity = {
+ "I": "Informational",
+ "E": "Error",
+ "W": "Warning",
+ "O": "Internal Purify Error",
+}
+
+
+class Stack:
+ ''' A normalized Purify Stack. The stack is constructed by adding one line
+ at a time from a stack in a Purify text file via AddLine.
+ Supports cmp and hash so that stacks which normalize the same can be sorted
+ and uniqued.
+ The original stack contents are preserved so that it's possible to drill
+ down into the full details if necessary. '''
+
+ # The top of the source tree. This is stripped from the filename as part
+ # of normalization.
+ source_dir = ""
+
+ @classmethod
+ def SetSourceDir(cls, dir):
+ # normalize the dir
+ cls.source_dir = dir.replace("\\", "/").lower()
+ logging.debug("Stack.source_dir = %s" % cls.source_dir)
+
+ # a line in a stack trace
+ pat_stack_line = re.compile('(.*)\[(\w:)?([^\:\s]*)(:\d+)?(\s+.*)?]')
+
+ # Known stack entry points that allow us to truncate the rest of the stack
+ # below that point.
+ pat_known_entries = (
+ re.compile('RunnableMethod::Run\(void\)'),
+ re.compile('ChromeMain'),
+ re.compile('BrowserMain'),
+ re.compile('wWinMain'),
+ re.compile('TimerManager::ProcessPendingTimer\(void\)'),
+ re.compile('RunnableMethod::RunableMethod\(.*\)'),
+ re.compile('RenderViewHost::OnMessageReceived\(Message::IPC const&\)'),
+ re.compile('testing::Test::Run\(void\)'),
+ re.compile('testing::TestInfoImpl::Run\(void\)'),
+ re.compile('Thread::ThreadFunc\\(void \*\)'),
+ re.compile('TimerTask::Run\(void\)'),
+ re.compile('MessageLoop::RunTask\(Task \*\)'),
+ re.compile('.DispatchToMethod\@.*'),
+ )
+
+ # if functions match the following, elide them from the stack
+ pat_func_elide = (re.compile('^std::'), re.compile('^new\('))
+ # if files match the following, elide them from the stack
+ pat_file_elide = (re.compile('.*platformsdk_vista.*'),
+ re.compile('.*.(dll|DLL)$'),
+ # bug 1069902
+ re.compile('webkit/pending/wtf/fastmalloc\.h'),
+ # When we leak sqlite stuff, we leak a lot, and the stacks
+ # are all over the place. For now, let's assume that
+ # sqlite itself is leak free and focus on our calling code.
+ re.compile('chrome/third_party/sqlite/.*'),
+ )
+
+ pat_unit_test = re.compile('^([a-zA-Z0-9]+)_(\w+)_Test::.*')
+
+ def __init__(self, title):
+ self._title = title.lstrip()
+ self._stack = []
+ self._orig = ""
+ # are we currently in an eliding block
+ self._eliding = False
+ # have we truncated the stack?
+ self._truncated = False
+ # is the stack made up completely of external code? (i.e. elided)
+ self._all_external = True
+ # a logical group that this stack belongs to
+ self._group = None
+ # top stack line (preserved even if elided)
+ self._top_stack_line = None
+
+ def GetLines(self):
+ return self._stack
+
+ def GetTopStackLine(self):
+ return self._top_stack_line
+
+ def GetTopVisibleStackLine(self):
+ for line in self._stack:
+ if line['function']:
+ return line
+ return {}
+
+ def GetGroup(self):
+ '''A logical grouping for this stack, allowing related stacks to be grouped
+ together. Subgroups within a group are separated by ".".
+ (e.g. group.subgroup.subsubgroup)
+ '''
+ return self._group;
+
+ def _ComputeStackLine(self, line):
+ line = line.lstrip()
+ m = Stack.pat_stack_line.match(line)
+ if m:
+ func = m.group(1).rstrip()
+ func = self._DemangleSymbol(func)
+ func = self._DetemplatizeSymbol(func)
+ if m.group(2):
+ file = m.group(2) + m.group(3)
+ else:
+ file = m.group(3)
+ # paths are normalized to use / and be lower case
+ file = file.replace("\\", "/").lower()
+ if not file.startswith(Stack.source_dir):
+ file = EXTERNAL_FILE
+ else:
+ file = file[len(Stack.source_dir):]
+ # trim leading / if present
+ if file[0] == "/":
+ file = file[1:]
+ loc = m.group(4)
+ if loc:
+ loc = int(loc[1:])
+ else:
+ loc = 0
+ return {'function': func, 'file': file, 'line_number': loc}
+ return None
+
+ def _ShouldElide(self, stack_line):
+ func = stack_line['function']
+ file = stack_line['file']
+ # elide certain common functions from the stack such as the STL
+ for pat in Stack.pat_func_elide:
+ if pat.match(func):
+ logging.debug("eliding due to func pat match: %s" % func)
+ return True
+ if file == EXTERNAL_FILE:
+ # if it's not in our source tree, then elide
+ logging.debug("eliding due to external file: %s" % file)
+ return True
+ # elide certain common file sources from the stack, usually this
+ # involves system libraries
+ for pat in Stack.pat_file_elide:
+ if pat.match(file):
+ logging.debug("eliding due to file pat match: %s" % file)
+ return True
+
+ return False
+
+ def AddLine(self, line):
+ ''' Add one line from a stack in a Purify text file. Lines must be
+ added in order (top down). Lines are added to two internal structures:
+ an original string copy and an array of normalized lines, split into
+ (function, file, line number).
+ Stack normalization does several things:
+ * elides sections of the stack that are in external code
+ * truncates the stack at so called "known entry points"
+ * removes template type information from symbols
+ Returns False if the line was elided or otherwise omitted.
+ '''
+ self._orig += line + "\n"
+ stack_line = self._ComputeStackLine(line)
+ if stack_line:
+ if not self._top_stack_line:
+ self._top_stack_line = stack_line
+ # Unit test entry points are good groupings. Even if we already have a
+ # group set, a later unit-test stack line will override.
+ # Note that we also do this even if the stack has already been truncated
+ # since this is useful information.
+ # TODO(erikkay): Maybe in this case, the truncation should be overridden?
+ test_match = Stack.pat_unit_test.match(stack_line["function"])
+ if test_match:
+ self._group = test_match.group(1) + "." + test_match.group(2)
+
+ if self._truncated:
+ return False
+
+ if self._ShouldElide(stack_line):
+ if not self._eliding:
+ self._eliding = True
+ self._stack.append({'function': "", 'file': ELIDE, 'line_number': 0})
+ return False
+ else:
+ self._stack.append(stack_line)
+ self._eliding = False
+ self._all_external = False
+
+ # when we reach one of the known common stack entry points, truncate
+ # the stack to avoid printing overly redundant information
+ if len(self._stack) > 1:
+ for f in Stack.pat_known_entries:
+ if f.match(stack_line["function"]):
+ if not self._group:
+ # we're at the end of the stack, so use the path to the file
+ # as the group if we don't already have one
+ # This won't be incredibly reliable, but might still be useful.
+ prev = self._stack[-2]
+ if prev['file']:
+ self._group = '.'.join(prev['file'].split('/')[:-1])
+ self._stack.append({'function': "", 'file': TRUNCATE,
+ 'line_number': 0})
+ self._truncated = True
+ return False
+ return True
+ else:
+ # skip these lines
+ logging.debug(">>>" + line)
+ return False
+
+ def _DemangleSymbol(self, symbol):
+ # TODO(erikkay) - I'm not sure why Purify prepends an address on the
+ # front of some of these as if it were a namespace (?A<addr>::). From an
+ # analysis standpoint, it seems meaningless and can change from machine to
+ # machine, so it's best if it's thrown away
+ if symbol.startswith("?A0x"):
+ skipto = symbol.find("::")
+ if skipto >= 0:
+ symbol = symbol[(skipto+2):]
+ else:
+ logging.warn("unable to strip address off of symbol (%s)" % symbol)
+ # TODO(erikkay) there are more symbols not being properly demangled
+ # in Purify's output. Some of these look like template-related issues.
+ return symbol
+
+ def _DetemplatizeSymbol(self, symbol):
+ ''' remove all of the template arguments and return values from the
+ symbol, normalizing it, making it more readable, and less precise '''
+ ret = ""
+ nested = 0
+ for i in range(len(symbol)):
+ if nested > 0:
+ if symbol[i] == '>':
+ nested -= 1
+ elif symbol[i] == '<':
+ nested += 1
+ elif symbol[i] == '<':
+ nested += 1
+ else:
+ ret += symbol[i]
+ return ret
+
+ def __hash__(self):
+ return hash(self.NormalizedStr())
+
+ def __cmp__(self, other):
+ if not other:
+ return 1
+ len_self = len(self._stack)
+ len_other = len(other._stack)
+ min_len = min(len_self, len_other)
+ # sort stacks from the bottom up
+ for i in range(-1, -(min_len + 1), -1):
+ # compare file, then func, but omit line number
+ ret = cmp((self._stack[i]['file'], self._stack[i]['function']),
+ (other._stack[i]['file'], other._stack[i]['function']))
+ if ret:
+ return ret
+ return cmp(len_self, len_other)
+
+ def NormalizedStr(self, verbose=False):
+ ''' String version of the normalized stack. See AddLine for normalization
+ details. '''
+ # use cStringIO for more efficient string building
+ out = cStringIO.StringIO()
+ for line in self._stack:
+ out.write(" ")
+ out.write(line['file'])
+ if verbose and line['line_number'] > 0:
+ out.write(":%d" % line['line_number'])
+ out.write(" ")
+ out.write(line['function'])
+ out.write("\n")
+ ret = out.getvalue()
+ out.close()
+ return ret
+
+ def __str__(self):
+ return self._orig
+
+
+class Message:
+ '''A normalized message from a Purify text file. Messages all have a
+ severity, most have a type, and many have an error stack and/or an
+ allocation stack.
+ Supports cmp and hash so that messages which normalize the same can be
+ sorted and uniqued.'''
+
+ pat_count = re.compile('^(.*) \{(\d+) occurrences?\}')
+ pat_leak = re.compile('(Potential )?[Mm]emory leak of (\d+) bytes? '
+ 'from (\d+) blocks? allocated in (.+)')
+ pat_miu = re.compile('Memory use of (\d+) bytes? '
+ '(\((\d+)% initialized\) )?from (\d+) blocks? '
+ 'allocated .. (.+)')
+ # these are headings to different types of stack traces
+ pat_loc_error = re.compile('\s*(Exception|Error|Call) location')
+ pat_loc_alloc = re.compile('\s*Allocation location')
+ pat_loc_free = re.compile('\s*Free location')
+ pat_loc_free2 = re.compile('\s*Location of free attempt')
+
+ def __init__(self, severity, type, title):
+ self._severity = severity
+ self._type = type
+ self._program = None
+ self._head = ""
+ self._loc_alloc = None
+ self._loc_error = None
+ self._loc_free = None
+ self._stack = None
+ self._count = 1
+ self._bytes = 0
+ self._blocks = 0
+ m = Message.pat_count.match(title)
+ if m:
+ self._title = m.group(1)
+ self._count = int(m.group(2))
+ else:
+ m = Message.pat_leak.match(title)
+ if m:
+ self._title = m.group(4)
+ self._bytes = int(m.group(2))
+ self._blocks = int(m.group(3))
+ else:
+ m = Message.pat_miu.match(title)
+ if m:
+ self._title = m.group(5)
+ self._bytes = int(m.group(1))
+ self._blocks = int(m.group(4))
+ #print "%d/%d - %s" % (self._bytes, self._blocks, title[0:60])
+ elif type == "MIU":
+ logging.error("%s didn't match" % title)
+ sys.exit(-1)
+ else:
+ self._title = title
+
+ def GetAllocStack(self):
+ return self._loc_alloc
+
+ def GetErrorStack(self):
+ return self._loc_error
+
+ def GetGroup(self):
+ '''An attempted logical grouping for this Message computed by the contained
+ Stack objects.
+ '''
+ group = None
+ if self._loc_alloc:
+ group = self._loc_alloc.GetGroup()
+ if not group and self._loc_error:
+ group = self._loc_error.GetGroup()
+ if not group and self._loc_free:
+ group = self._loc_free.GetGroup()
+ if not group:
+ group = "UNKNOWN"
+ return group
+
+ def AddLine(self, line):
+ '''Add a line one at a time (in order from the Purify text file) to
+ build up the message and its associated stacks. '''
+
+ if Message.pat_loc_error.match(line):
+ self._stack = Stack(line)
+ self._loc_error = self._stack
+ elif Message.pat_loc_alloc.match(line):
+ self._stack = Stack(line)
+ self._loc_alloc = self._stack
+ elif Message.pat_loc_free.match(line) or Message.pat_loc_free2.match(line):
+ self._stack = Stack(line)
+ self._loc_free = self._stack
+ elif self._stack:
+ if not line.startswith(" "):
+ logging.debug("*** " + line)
+ self._stack.AddLine(line)
+ else:
+ self._head += line.lstrip()
+
+ def Type(self):
+ return self._type
+
+ def Program(self):
+ return self._program
+
+ def SetProgram(self, program):
+ self._program = program
+
+ def StacksAllExternal(self):
+ '''Returns True if the stacks it contains are made up completely of
+ external (elided) symbols'''
+ return ((not self._loc_error or self._loc_error._all_external) and
+ (not self._loc_alloc or self._loc_alloc._all_external) and
+ (not self._loc_free or self._loc_free._all_external))
+
+ def __hash__(self):
+ # NOTE: see also _MessageHashesFromFile. If this method changes, then
+ # _MessageHashesFromFile must be updated to match.
+ s = ""
+ if self._loc_error:
+ s += "Error Location\n" + self._loc_error.NormalizedStr()
+ if self._loc_alloc:
+ s += "Alloc Location\n" + self._loc_alloc.NormalizedStr()
+ if self._loc_free:
+ s += "Free Location\n" + self._loc_free.NormalizedStr()
+ return hash(s)
+
+ def NormalizedStr(self, verbose=False):
+ '''String version of the normalized message. Only includes title
+ and normalized versions of error and allocation stacks if present.
+ Example:
+ Unitialized Memory Read in Foo::Bar()
+ Error Location
+ foo/Foo.cc Foo::Bar(void)
+ foo/main.cc start(void)
+ foo/main.cc main(void)
+ Alloc Location
+ foo/Foo.cc Foo::Foo(void)
+ foo/main.cc start(void)
+ foo/main.cc main(void)
+ '''
+ ret = ""
+ # some of the message types are more verbose than others and we
+ # don't need to indicate their type
+ if verbose and self._type not in ["UMR", "IPR", "IPW"]:
+ ret += GetMessageType(self._type) + ": "
+ if verbose and self._bytes > 0:
+ ret += "(%d bytes, %d blocks) " % (self._bytes, self._blocks)
+ ret += "%s\n" % self._title
+ if self._loc_error:
+ ret += "Error Location\n" + self._loc_error.NormalizedStr(verbose)
+ if self._loc_alloc:
+ ret += "Alloc Location\n" + self._loc_alloc.NormalizedStr(verbose)
+ if self._loc_free:
+ ret += "Free Location\n" + self._loc_free.NormalizedStr(verbose)
+ return ret
+
+ def __str__(self):
+ ret = self._title + "\n" + self._head
+ if self._loc_error:
+ ret += "Error Location\n" + str(self._loc_error)
+ if self._loc_alloc:
+ ret += "Alloc Location\n" + str(self._loc_alloc)
+ if self._loc_free:
+ ret += "Free Location\n" + str(self._loc_free)
+ return ret
+
+ def __cmp__(self, other):
+ if not other:
+ return 1
+ ret = 0
+ if self._loc_error:
+ ret = cmp(self._loc_error, other._loc_error)
+ if ret == 0 and self._loc_alloc:
+ ret = cmp(self._loc_alloc, other._loc_alloc)
+ if ret == 0 and self._loc_free:
+ ret = cmp(self._loc_free, other._loc_free)
+ # since title is often not very interesting, we sort against that last
+ if ret == 0:
+ ret = cmp(self._title, other._title)
+ return ret
+
+
+class MessageList:
+ '''A collection of Message objects of a given message type.'''
+ def __init__(self, type):
+ self._type = type
+ self._messages = []
+ self._unique_messages = None
+ self._sublists = None
+ self._bytes = 0
+
+ def GetType(self):
+ return self._type
+
+ def BeginNewSublist(self):
+ '''Some message types are logically grouped into sets of messages which
+ should not be mixed in the same list. Specifically, Memory In Use (MIU),
+ Memory Leak (MLK) and Potential Memory Leak (MPK) are generated in a set
+ all at once, but this generation can happen at multiple distinct times,
+ either via the Purify UI or through Purify API calls. For example, if
+ Purify is told to dump a list all memory leaks once, and then a few minutes
+ later, the two lists will certainly overlap, so they should be kept
+ in separate lists.
+ In order to accommodate this, MessageList supports the notion of sublists.
+ When the caller determines that one list of messages of a type has ended
+ and a new list has begun, it calls BeginNewSublist() which takes the current
+ set of messages, puts them into a new MessageList and puts that into the
+ sublists array. Later, when the caller needs to get at these messages,
+ GetSublists() should be called.
+ '''
+ if len(self._messages):
+ # if this is the first list, no need to make a new one
+ list = MessageList(self._type)
+ list._messages = self._messages
+ if not self._sublists:
+ self._sublists = [list]
+ else:
+ self._sublists.append(list)
+ self._messages = []
+ logging.info("total size: %d" % self._bytes)
+ self._bytes = 0
+
+ def GetSublists(self):
+ '''Returns the current list of sublists. If there are currently sublists
+ and there are any messages that aren't in a sublist, BeginNewSublist() is
+ called implicitly by this method to force those ungrouped messages into
+ their own sublist.
+ '''
+ if self._sublists and len(self._sublists) and len(self._messages):
+ self.BeginNewSublist()
+ return self._sublists
+
+ def AddMessage(self, msg):
+ '''Adds a message to this MessageList.'''
+ # TODO(erikkay): assert if _unique_messages exists
+ self._messages.append(msg)
+ self._bytes += msg._bytes
+
+ def AllMessages(self):
+ '''Returns an array of all Message objects in this MessageList. '''
+ # TODO(erikkay): handle case with sublists
+ return self._messages
+
+ def UniqueMessages(self):
+ '''Returns an array of the unique normalized Message objects in this
+ MessageList.
+ '''
+ # the list is lazily computed since we have to create a sorted list,
+ # which is only valid once all messages have been added
+ # TODO(erikkay): handle case with sublists
+ if not self._unique_messages:
+ self._unique_messages = list(set(self._messages))
+ self._unique_messages.sort()
+ return self._unique_messages
+
+ def UniqueMessageGroups(self):
+ '''Returns a dictionary mapping Message group names to arrays of uniqued
+ normalized Message objects in this MessageList.
+ '''
+ unique = self.UniqueMessages()
+ groups = {}
+ for msg in unique:
+ group = msg.GetGroup()
+ if not group in groups:
+ groups[group] = []
+ groups[group].append(msg)
+ return groups