Patch from timurrrr: Re-factor valgrind scripts to add tsan support

First reviewed at http://codereview.chromium.org/125272 BUG=none TEST=none Review URL: http://codereview.chromium.org/155528 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20870 0039d316-1c4b-4281-b951-d872f2087c98
author: dkegel@google.com <dkegel@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-16 15:55:52 +0000
committer: dkegel@google.com <dkegel@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-16 15:55:52 +0000
commit: 68e3b47230aa88a848ba6cb7daee852993f51a62 (patch)
tree: 15203472cb668e0a6277c99a1ed1c5b0d67fec2c /tools/valgrind/memcheck_analyze.py
parent: 9229b2de068f352b3d184996206870d1d4ffc2b5 (diff)
download: chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.zip
chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.tar.gz
chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.tar.bz2
1 files changed, 276 insertions, 0 deletions
diff --git a/tools/valgrind/memcheck_analyze.py b/tools/valgrind/memcheck_analyze.py
new file mode 100755
index 0000000..a108d36
--- /dev/null
+++ b/tools/valgrind/memcheck_analyze.py
@@ -0,0 +1,276 @@
+#!/usr/bin/python
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# memcheck_analyze.py
+
+''' Given a valgrind XML file, parses errors and uniques them.'''
+
+import logging
+import optparse
+import os
+import sys
+import time
+from xml.dom.minidom import parse
+from xml.parsers.expat import ExpatError
+
+# These are functions (using C++ mangled names) that we look for in stack
+# traces. We don't show stack frames while pretty printing when they are below
+# any of the following:
+_TOP_OF_STACK_POINTS = [
+  # Don't show our testing framework.
+  "testing::Test::Run()",
+  # Also don't show the internals of libc/pthread.
+  "start_thread"
+]
+
+def getTextOf(top_node, name):
+  ''' Returns all text in all DOM nodes with a certain |name| that are children
+  of |top_node|.
+  '''
+
+  text = ""
+  for nodes_named in top_node.getElementsByTagName(name):
+    text += "".join([node.data for node in nodes_named.childNodes
+                     if node.nodeType == node.TEXT_NODE])
+  return text
+
+def removeCommonRoot(source_dir, directory):
+  '''Returns a string with the string prefix |source_dir| removed from
+  |directory|.'''
+  if source_dir:
+    # Do this for safety, just in case directory is an absolute path outside of
+    # source_dir.
+    prefix = os.path.commonprefix([source_dir, directory])
+    return directory[len(prefix) + 1:]
+
+  return directory
+
+# Constants that give real names to the abbreviations in valgrind XML output.
+INSTRUCTION_POINTER = "ip"
+OBJECT_FILE = "obj"
+FUNCTION_NAME = "fn"
+SRC_FILE_DIR = "dir"
+SRC_FILE_NAME = "file"
+SRC_LINE = "line"
+
+def gatherFrames(node, source_dir):
+  frames = []
+  for frame in node.getElementsByTagName("frame"):
+    frame_dict = {
+      INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
+      OBJECT_FILE         : getTextOf(frame, OBJECT_FILE),
+      FUNCTION_NAME       : getTextOf(frame, FUNCTION_NAME),
+      SRC_FILE_DIR        : removeCommonRoot(
+          source_dir, getTextOf(frame, SRC_FILE_DIR)),
+      SRC_FILE_NAME       : getTextOf(frame, SRC_FILE_NAME),
+      SRC_LINE            : getTextOf(frame, SRC_LINE)
+    }
+    frames += [frame_dict]
+    if frame_dict[FUNCTION_NAME] in _TOP_OF_STACK_POINTS:
+      break
+  return frames
+
+class ValgrindError:
+  ''' Takes a <DOM Element: error> node and reads all the data from it. A
+  ValgrindError is immutable and is hashed on its pretty printed output.
+  '''
+
+  def __init__(self, source_dir, error_node):
+    ''' Copies all the relevant information out of the DOM and into object
+    properties.
+
+    Args:
+      error_node: The <error></error> DOM node we're extracting from.
+      source_dir: Prefix that should be stripped from the <dir> node.
+    '''
+
+    # Valgrind errors contain one <what><stack> pair, plus an optional
+    # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>.
+    # (Origin is nicely enclosed; too bad the other two aren't.)
+    # The most common way to see all three in one report is
+    # a syscall with a parameter that points to uninitialized memory, e.g.
+    # Format:
+    # <error>
+    #   <unique>0x6d</unique>
+    #   <tid>1</tid>
+    #   <kind>SyscallParam</kind>
+    #   <what>Syscall param write(buf) points to uninitialised byte(s)</what>
+    #   <stack>
+    #     <frame>
+    #     ...
+    #     </frame>
+    #   </stack>
+    #   <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
+    #   <stack>
+    #     <frame>
+    #     ...
+    #     </frame>
+    #   </stack>
+    #   <origin>
+    #   <what>Uninitialised value was created by a heap allocation</what>
+    #   <stack>
+    #     <frame>
+    #     ...
+    #     </frame>
+    #   </stack>
+    #   </origin>
+
+    self._kind = getTextOf(error_node, "kind")
+    self._backtraces = []
+
+    # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
+    description = None
+    for node in error_node.childNodes:
+      if node.localName == "what" or node.localName == "auxwhat":
+        description = "".join([n.data for n in node.childNodes
+                              if n.nodeType == n.TEXT_NODE])
+      elif node.localName == "stack":
+        self._backtraces.append([description, gatherFrames(node, source_dir)])
+        description = None
+      elif node.localName == "origin":
+        description = getTextOf(node, "what")
+        stack = node.getElementsByTagName("stack")[0]
+        frames = gatherFrames(stack, source_dir)
+        self._backtraces.append([description, frames])
+        description = None
+        stack = None
+        frames = None
+
+  def __str__(self):
+    ''' Pretty print the type and backtrace(s) of this specific error.'''
+    output = self._kind + "\n"
+    for backtrace in self._backtraces:
+      output += backtrace[0] + "\n"
+      for frame in backtrace[1]:
+        output += ("  " + (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) +
+                   " (")
+
+        if frame[SRC_FILE_DIR] != "":
+          output += (frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] + ":" +
+                     frame[SRC_LINE])
+        else:
+          output += frame[OBJECT_FILE]
+        output += ")\n"
+
+    return output
+
+  def UniqueString(self):
+    ''' String to use for object identity. Don't print this, use str(obj)
+    instead.'''
+    rep = self._kind + " "
+    for backtrace in self._backtraces:
+      for frame in backtrace[1]:
+        rep += frame[FUNCTION_NAME]
+
+        if frame[SRC_FILE_DIR] != "":
+          rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
+        else:
+          rep += frame[OBJECT_FILE]
+
+    return rep
+
+  def __hash__(self):
+    return hash(self.UniqueString())
+  def __eq__(self, rhs):
+    return self.UniqueString() == rhs
+
+class MemcheckAnalyze:
+  ''' Given a set of Valgrind XML files, parse all the errors out of them,
+  unique them and output the results.'''
+
+  def __init__(self, source_dir, files, show_all_leaks=False):
+    '''Reads in a set of files.
+
+    Args:
+      source_dir: Path to top of source tree for this build
+      files: A list of filenames.
+      show_all_leaks: whether to show even less important leaks
+    '''
+
+    self._errors = set()
+    badfiles = set()
+    start = time.time()
+    self._parse_failed = False
+    for file in files:
+      # Wait up to three minutes for valgrind to finish writing all files,
+      # but after that, just skip incomplete files and warn.
+      f = open(file, "r")
+      found = False
+      firstrun = True
+      while (firstrun or ((time.time() - start) < 180.0)):
+        firstrun = False
+        f.seek(0)
+        if sum((1 for line in f if '</valgrindoutput>' in line)) > 0:
+          found = True
+          break
+        time.sleep(1)
+      f.close()
+      if not found:
+        badfiles.add(file)
+      else:
+        try:
+          raw_errors = parse(file).getElementsByTagName("error")
+          for raw_error in raw_errors:
+            # Ignore "possible" leaks for now by default.
+            if (show_all_leaks or
+                getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
+              self._errors.add(ValgrindError(source_dir, raw_error))
+        except ExpatError, e:
+          self._parse_failed = True
+          logging.warn("could not parse %s: %s" % (file, e))
+          lineno = e.lineno - 1
+          context_lines = 5
+          context_start = max(0, lineno - context_lines)
+          context_end = lineno + context_lines + 1
+          context_file = open(file, "r")
+          for i in range(0, context_start):
+            context_file.readline()
+          for i in range(context_start, context_end):
+            context_data = context_file.readline().rstrip()
+            if i != lineno:
+              logging.warn("  %s" % context_data)
+            else:
+              logging.warn("> %s" % context_data)
+          context_file.close()
+          continue
+    if len(badfiles) > 0:
+      logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
+
+  def Report(self):
+    if self._parse_failed:
+      logging.error("FAIL! Couldn't parse Valgrind output file")
+      return -2
+
+    if self._errors:
+      logging.error("FAIL! There were %s errors: " % len(self._errors))
+
+      for error in self._errors:
+        logging.error(error)
+
+      return -1
+
+    logging.info("PASS! No errors found!")
+    return 0
+
+def _main():
+  '''For testing only. The MemcheckAnalyze class should be imported instead.'''
+  retcode = 0
+  parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
+  parser.add_option("", "--source_dir",
+                    help="path to top of source tree for this build"
+                    "(used to normalize source paths in baseline)")
+
+  (options, args) = parser.parse_args()
+  if not len(args) >= 1:
+    parser.error("no filename specified")
+  filenames = args
+
+  analyzer = MemcheckAnalyze(options.source_dir, filenames)
+  retcode = analyzer.Report()
+
+  sys.exit(retcode)
+
+if __name__ == "__main__":
+  _main()
author	dkegel@google.com <dkegel@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-16 15:55:52 +0000
committer	dkegel@google.com <dkegel@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-16 15:55:52 +0000
commit	68e3b47230aa88a848ba6cb7daee852993f51a62 (patch)
tree	15203472cb668e0a6277c99a1ed1c5b0d67fec2c /tools/valgrind/memcheck_analyze.py
parent	9229b2de068f352b3d184996206870d1d4ffc2b5 (diff)
download	chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.zip chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.tar.gz chromium_src-68e3b47230aa88a848ba6cb7daee852993f51a62.tar.bz2