Add tools to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-27 00:12:16 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-27 00:12:16 +0000
commit: 920c091ac3ee15079194c82ae8a7a18215f3f23c (patch)
tree: d28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/purify
parent: ae2c20f398933a9e86c387dcc465ec0f71065ffc (diff)
download: chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2
11 files changed, 2642 insertions, 0 deletions
diff --git a/tools/purify/chrome_tests.py b/tools/purify/chrome_tests.py
new file mode 100644
index 0000000..9ae22dd
--- /dev/null
+++ b/tools/purify/chrome_tests.py
@@ -0,0 +1,259 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# chrome_tests.py
+
+''' Runs various chrome tests through purify_test.py
+'''
+
+import logging
+import optparse
+import os
+import stat
+import sys
+
+import google.logging_utils
+import google.path_utils
+import google.platform_utils
+
+import common
+
+class TestNotFound(Exception): pass
+
+class ChromeTests:
+
+  def __init__(self, options, args, test):
+    # the known list of tests
+    self._test_list = {"test_shell": self.TestTestShell,
+                       "unit": self.TestUnit,
+                       "net": self.TestNet,
+                       "ipc": self.TestIpc,
+                       "base": self.TestBase,
+                       "layout": self.TestLayout,
+                       "ui": self.TestUI}
+
+    if test not in self._test_list:
+      raise TestNotFound("Unknown test: %s" % test)
+
+    self._options = options
+    self._args = args
+    self._test = test
+
+    script_dir = google.path_utils.ScriptDir()
+    utility = google.platform_utils.PlatformUtility(script_dir)
+    # Compute the top of the tree (the "source dir") from the script dir (where
+    # this script lives).  We assume that the script dir is in tools/purify
+    # relative to the top of the tree.
+    self._source_dir = os.path.dirname(os.path.dirname(script_dir))
+    # since this path is used for string matching, make sure it's always
+    # an absolute Windows-style path
+    self._source_dir = utility.GetAbsolutePath(self._source_dir)
+    purify_test = os.path.join(script_dir, "purify_test.py")
+    self._command_preamble = ["python.exe", purify_test, "--echo_to_stdout", 
+                              "--source_dir=%s" % (self._source_dir),
+                              "--save_cache"]
+
+  def _DefaultCommand(self, module, exe=None):
+    '''Generates the default command array that most tests will use.'''
+    module_dir = os.path.join(self._source_dir, module)
+    if module == "chrome":
+      # unfortunately, not all modules have the same directory structure
+      self._data_dir = os.path.join(module_dir, "test", "data", "purify")
+    else:
+      self._data_dir = os.path.join(module_dir, "data", "purify")
+    if not self._options.build_dir:
+      dir_chrome = os.path.join(self._source_dir, "chrome", "Release")
+      dir_module = os.path.join(module_dir, "Release")
+      if exe:
+        exe_chrome = os.path.join(dir_chrome, exe)
+        exe_module = os.path.join(dir_module, exe)
+        if os.path.isfile(exe_chrome) and not os.path.isfile(exe_module):
+          self._options.build_dir = dir_chrome
+        elif os.path.isfile(exe_module) and not os.path.isfile(exe_chrome):
+          self._options.build_dir = dir_module
+        elif os.stat(exe_module)[stat.ST_MTIME] > os.stat(exe_chrome)[stat.ST_MTIME]:
+          self._options.build_dir = dir_module
+        else:
+          self._options.build_dir = dir_chrome
+      else:
+        if os.path.isdir(dir_chrome) and not os.path.isdir(dir_module):
+          self._options.build_dir = dir_chrome
+        elif os.path.isdir(dir_module) and not os.path.isdir(dir_chrome):
+          self._options.build_dir = dir_module
+        elif os.stat(dir_module)[stat.ST_MTIME] > os.stat(dir_chrome)[stat.ST_MTIME]:
+          self._options.build_dir = dir_module
+        else:
+          self._options.build_dir = dir_chrome
+
+    cmd = self._command_preamble
+    cmd.append("--data_dir=%s" % self._data_dir)
+    if self._options.baseline:
+      cmd.append("--baseline")
+    if self._options.verbose:
+      cmd.append("--verbose")
+    if exe:
+      cmd.append(os.path.join(self._options.build_dir, exe))
+    return cmd
+
+  def Run(self):
+    ''' Runs the test specified by command-line argument --test '''
+    logging.info("running test %s" % (self._test))
+    return self._test_list[self._test]()
+    
+  def _ReadGtestFilterFile(self, name, cmd):
+    '''Read a file which is a list of tests to filter out with --gtest_filter
+    and append the command-line option to cmd.
+    '''
+    filters = []
+    filename = os.path.join(self._data_dir, name + ".gtest.txt")
+    if os.path.exists(filename):
+      f = open(filename, 'r')
+      for line in f.readlines():
+        if line.startswith("#") or line.startswith("//") or line.isspace():
+          continue
+        line = line.rstrip()
+        filters.append(line)
+    gtest_filter = self._options.gtest_filter
+    if len(filters):
+      if gtest_filter:
+        gtest_filter += ":"
+        if gtest_filter.find("-") < 0:
+          gtest_filter += "-"
+      else:
+        gtest_filter = "-"
+      gtest_filter += ":".join(filters)
+    if gtest_filter:
+      cmd.append("--gtest_filter=%s" % gtest_filter)
+
+  def SimpleTest(self, module, name):
+    cmd = self._DefaultCommand(module, name)
+    self._ReadGtestFilterFile(name, cmd)
+    return common.RunSubprocess(cmd, 0)
+
+  def ScriptedTest(self, module, exe, name, script, multi=False, cmd_args=None):
+    '''Purify a target exe, which will be executed one or more times via a 
+       script or driver program.
+    Args:
+      module - which top level component this test is from (webkit, base, etc.)
+      exe - the name of the exe (it's assumed to exist in build_dir)
+      name - the name of this test (used to name output files)
+      script - the driver program or script.  If it's python.exe, we use 
+        search-path behavior to execute, otherwise we assume that it is in
+        build_dir.
+      multi - a boolean hint that the exe will be run multiple times, generating
+        multiple output files (without this option, only the last run will be
+        recorded and analyzed)
+      cmd_args - extra arguments to pass to the purify_test.py script
+    '''
+    cmd = self._DefaultCommand(module)
+    exe = os.path.join(self._options.build_dir, exe)
+    cmd.append("--exe=%s" % exe)
+    cmd.append("--name=%s" % name)
+    if multi:
+      out = os.path.join(google.path_utils.ScriptDir(),
+                         "latest", "%s%%5d.txt" % name)
+      cmd.append("--out_file=%s" % out)
+    if cmd_args:
+      cmd.extend(cmd_args)
+    if script[0] != "python.exe" and not os.path.exists(script[0]):
+      script[0] = os.path.join(self._options.build_dir, script[0])
+    cmd.extend(script)
+    self._ReadGtestFilterFile(name, cmd)
+    return common.RunSubprocess(cmd, 0)
+
+  def TestBase(self):
+    return self.SimpleTest("base", "base_unittests.exe")
+
+  def TestIpc(self):
+    return self.SimpleTest("chrome", "ipc_tests.exe")
+    
+  def TestNet(self):
+    return self.SimpleTest("net", "net_unittests.exe")
+
+  def TestTestShell(self):
+    return self.SimpleTest("webkit", "test_shell_tests.exe")
+
+  def TestUnit(self):
+    return self.SimpleTest("chrome", "unit_tests.exe")
+
+  def TestLayout(self):
+    script = os.path.join(self._source_dir, "webkit", "tools", "layout_tests",
+                          "run_webkit_tests.py")
+    script_cmd = ["python.exe", script, "--run-singly", "-v",
+                  "--noshow-results", "--time-out-ms=200000"]
+    if len(self._args):
+      # if the arg is a txt file, then treat it as a list of tests
+      if os.path.isfile(self._args[0]) and self._args[0][-4:] == ".txt":
+        script_cmd.append("--test-list=%s" % self._args[0])
+      else:
+        script_cmd.extend(self._args)
+    self.ScriptedTest("webkit", "test_shell.exe", "layout",
+        script_cmd, multi=True, cmd_args=["--timeout=0"])
+    # since layout tests take so long to run, having the test red on buildbot
+    # isn't very useful
+    return 0
+
+  def TestUI(self):
+    return self.ScriptedTest("chrome", "chrome.exe", "ui_tests", 
+        ["ui_tests.exe", "--single-process", "--test-timeout=100000000"], multi=True)
+
+def _main(argv):
+  parser = optparse.OptionParser("usage: %prog -b <dir> -t <test> "
+                                 "[-t <test> ...]")
+  parser.disable_interspersed_args()
+  parser.add_option("-b", "--build_dir",
+                    help="the location of the output of the compiler output")
+  parser.add_option("-t", "--test", action="append",
+                    help="which test to run")
+  parser.add_option("", "--baseline", action="store_true", default=False,
+                    help="generate baseline data instead of validating")
+  parser.add_option("", "--gtest_filter",
+                    help="additional arguments to --gtest_filter")
+  parser.add_option("-v", "--verbose", action="store_true", default=False,
+                    help="verbose output - enable debug log messages")
+  (options, args) = parser.parse_args()
+
+  if options.verbose:
+    google.logging_utils.config_root(logging.DEBUG)
+  else:
+    google.logging_utils.config_root()
+
+  if not options.test or not len(options.test):
+    parser.error("--test not specified")
+
+  for t in options.test:
+    tests = ChromeTests(options, args, t)
+    ret = tests.Run()
+    if ret: return ret
+  return 0
+
+if __name__ == "__main__":
+  ret = _main(sys.argv)
+  sys.exit(ret)
diff --git a/tools/purify/chrome_tests.sh b/tools/purify/chrome_tests.sh
new file mode 100755
index 0000000..3c9c7da
--- /dev/null
+++ b/tools/purify/chrome_tests.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+system_root=`cygpath "$SYSTEMROOT"`
+export PATH="/usr/bin:$system_root/system32:$system_root:$system_root/system32/WBEM"
+
+exec_dir=$(dirname $0)
+
+"$exec_dir/../../third_party/python_24/python.exe" \
+    "$exec_dir/chrome_tests.py" "$@"
diff --git a/tools/purify/common.py b/tools/purify/common.py
new file mode 100644
index 0000000..3dd72af
--- /dev/null
+++ b/tools/purify/common.py
@@ -0,0 +1,293 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# common.py
+
+""" Common code used by purify_test.py and quantify_test.py in order to automate
+running of Rational Purify and Quantify in a consistent manner.
+"""
+
+# Purify and Quantify have a front-end (e.g. quantifyw.exe) which talks to a
+# back-end engine (e.g. quantifye.exe).  The back-end seems to handle 
+# instrumentation, while the front-end controls program execution and 
+# measurement.  The front-end will dynamically launch the back-end if
+# instrumentation is needed (sometimes in the middle of a run if a dll is 
+# loaded dynamically).
+# In an ideal world, this script would simply execute the front-end and check
+# the output.  However, purify is not the most reliable or well-documented app
+# on the planet, and my attempts to get it to run this way led to the back-end
+# engine hanging during instrumentation.  The workaround to this was to run two
+# passes, first running the engine to do instrumentation rather than letting 
+# the front-end do it for you, then running the front-end to actually do the 
+# run.  Each time through we're deleting all of the instrumented files in the
+# cache to ensure that we're testing that instrumentation works from scratch.
+# (although this can be changed with an option)
+
+import datetime
+import logging
+import optparse
+import os
+import subprocess
+import sys
+import tempfile
+import time
+
+import google.logging_utils
+
+# hard-coded location of Rational files and directories
+RATIONAL_PATH = os.path.join("C:\\", "Program Files", "Rational")
+COMMON_PATH = os.path.join(RATIONAL_PATH, "common")
+PPLUS_PATH = os.path.join(RATIONAL_PATH, "PurifyPlus")
+PURIFY_PATH = os.path.join(COMMON_PATH, "purify.exe")
+PURIFYW_PATH = os.path.join(PPLUS_PATH, "purifyW.exe")
+PURIFYE_PATH = os.path.join(PPLUS_PATH, "purifye.exe")
+QUANTIFYE_PATH = os.path.join(PPLUS_PATH, "quantifye.exe")
+QUANTIFYW_PATH = os.path.join(PPLUS_PATH, "quantifyw.exe")
+
+class TimeoutError(Exception): pass
+
+def RunSubprocess(proc, timeout=0, detach=False):
+  """ Runs a subprocess, polling every .2 seconds until it finishes or until
+  timeout is reached.  Then kills the process with taskkill.  A timeout <= 0
+  means no timeout.
+  
+  Args:
+    proc: list of process components (exe + args)
+    timeout: how long to wait before killing, <= 0 means wait forever
+    detach: Whether to pass the DETACHED_PROCESS argument to CreateProcess
+        on Windows.  This is used by Purify subprocesses on buildbot which
+        seem to get confused by the parent console that buildbot sets up.
+  """
+  logging.info("running %s" % (" ".join(proc)))
+  if detach:
+    # see MSDN docs for "Process Creation Flags"
+    DETACHED_PROCESS = 0x8
+    p = subprocess.Popen(proc, creationflags=DETACHED_PROCESS)
+  else:
+    p = subprocess.Popen(proc)
+  if timeout <= 0:
+    while p.poll() is None:
+      time.sleep(0.2)
+  else:
+    wait_until = time.time() + timeout
+    while p.poll() is None and time.time() < wait_until:
+      time.sleep(0.2)
+  result = p.poll()
+  if result is None:
+    subprocess.call(["taskkill", "/T", "/F", "/PID", str(p.pid)])
+    logging.error("KILLED %d" % (p.pid))
+    # give the process a chance to actually die before continuing
+    # so that cleanup can happen safely
+    time.sleep(1.0)
+    logging.error("TIMEOUT waiting for %s" % (proc[0]))
+    raise TimeoutError(proc[0])
+  if result:
+    logging.error("%s exited with non-zero result code %d" % (proc[0], result))
+  return result
+
+def FixPath(path):
+  """We pass computed paths to Rational as arguments, so these paths must be
+  valid windows paths.  When running in cygwin's python, computed paths
+  wind up looking like /cygdrive/c/..., so we need to call out to cygpath
+  to fix them up.
+  """
+  if sys.platform != "cygwin":
+    return path
+  p = subprocess.Popen(["cygpath", "-a", "-m", path], stdout=subprocess.PIPE)
+  return p.communicate()[0].rstrip()
+
+class Rational(object):
+  ''' Common superclass for Purify and Quantify automation objects.  Handles
+  common argument parsing as well as the general program flow of Instrument,
+  Execute, Analyze.
+  '''
+  
+  def __init__(self):
+    google.logging_utils.config_root()
+    self._out_file = None
+
+  def Run(self):
+    '''Call this to run through the whole process: 
+    Setup, Instrument, Execute, Analyze'''
+    start = datetime.datetime.now()
+    retcode = -1
+    if self.Setup():
+      if self.Instrument():
+        if self.Execute():
+          retcode = self.Analyze()
+          if not retcode:
+            logging.info("instrumentation and execution completed successfully.")
+          else:
+            logging.error("Analyze failed")
+        else:
+          logging.error("Execute failed")
+      else:
+        logging.error("Instrument failed")
+      self.Cleanup()
+    else:
+      logging.error("Setup failed")
+    end = datetime.datetime.now()
+    seconds = (end - start).seconds
+    hours = seconds / 3600
+    seconds = seconds % 3600
+    minutes = seconds / 60
+    seconds = seconds % 60
+    logging.info("elapsed time: %02d:%02d:%02d" % (hours, minutes, seconds))
+    return retcode
+
+  def CreateOptionParser(self):
+    '''Creates OptionParser with shared arguments.  Overridden by subclassers
+    to add custom arguments.'''
+    parser = optparse.OptionParser("usage: %prog [options] <program to test>")
+    # since the trailing program likely has command-line args of itself
+    # we need to stop parsing when we reach the first positional arg
+    parser.disable_interspersed_args()
+    parser.add_option("-o", "--out_file", dest="out_file", metavar="OUTFILE",
+                      default="",
+                      help="output data is written to OUTFILE")
+    parser.add_option("-s", "--save_cache", 
+                      dest="save_cache", action="store_true", default=False,
+                      help="don't delete instrumentation cache")
+    parser.add_option("-c", "--cache_dir", dest="cache_dir", metavar="CACHEDIR",
+                      default="",
+                      help="location of instrumentation cache is CACHEDIR")
+    parser.add_option("-m", "--manual",
+                      dest="manual_run", action="store_true", default=False,
+                      help="target app is being run manually, don't timeout")
+    parser.add_option("-t", "--timeout",
+                      dest="timeout", metavar="TIMEOUT", default=10000,
+                      help="timeout in seconds for the run (default 10000)")
+    parser.add_option("-v", "--verbose", action="store_true", default=False,
+                      help="verbose output - enable debug log messages")
+    self._parser = parser
+
+  def Setup(self):
+    if self.ParseArgv():
+      logging.info("instrumentation cache in %s" % self._cache_dir)
+      logging.info("output saving to %s" % self._out_file)
+      # Ensure that Rational's common dir and cache dir are in the front of the 
+      # path.  The common dir is required for purify to run in any case, and
+      # the cache_dir is required when using the /Replace=yes option.
+      os.environ["PATH"] = (COMMON_PATH + ";" + self._cache_dir + ";" + 
+          os.environ["PATH"])
+      # clear the cache to make sure we're starting clean
+      self.__ClearInstrumentationCache()
+      return True
+    return False
+
+  def Instrument(self, proc):
+    '''Instrument the app to be tested.  Full instrumentation command-line
+    provided by subclassers via proc.'''
+    logging.info("starting instrumentation...")
+    if RunSubprocess(proc, self._timeout, detach=True) == 0:
+      if "/Replace=yes" in proc:
+        if os.path.exists(self._exe + ".Original"):
+          return True
+      elif os.path.isdir(self._cache_dir):
+        for cfile in os.listdir(self._cache_dir):
+          # TODO(erikkay): look for the actual munged purify filename
+          ext = os.path.splitext(cfile)[1]
+          if ext == ".exe":
+            return True
+      logging.error("no instrumentation data generated")
+    return False
+
+  def Execute(self, proc):
+    ''' Execute the app to be tested after successful instrumentation.  
+    Full execution command-line provided by subclassers via proc.'''
+    logging.info("starting execution...")
+    # note that self._args begins with the exe to be run
+    proc += self._args
+    if RunSubprocess(proc, self._timeout) == 0:
+      return True
+    return False
+
+  def Analyze(self):
+    '''Analyze step after a successful Execution.  Should be overridden
+    by the subclasser if instrumentation is desired.
+    Returns 0 for success, -88 for warning (see ReturnCodeCommand) and anything
+    else for error
+    '''
+    return -1
+
+  def ParseArgv(self):
+    '''Parses arguments according to CreateOptionParser
+    Subclassers must override if they have extra arguments.'''
+    self.CreateOptionParser()
+    (self._options, self._args) = self._parser.parse_args()
+    if self._options.verbose:
+      google.logging_utils.config_root(logging.DEBUG)
+    self._save_cache = self._options.save_cache
+    self._manual_run = self._options.manual_run
+    if self._manual_run:
+      logging.info("manual run - timeout disabled")
+      self._timeout = 0
+    else:
+      self._timeout = int(self._options.timeout)
+      logging.info("timeout set to %ds" % (self._timeout))
+    if self._save_cache:
+      logging.info("saving instrumentation cache")
+    if not self._options.cache_dir:
+      try:
+        temp_dir = os.environ["TEMP"]
+      except KeyError:
+        temp_dir = tempfile.mkdtemp()
+      self._cache_dir = os.path.join(FixPath(temp_dir),
+                                     "instrumentation_cache")
+    else:
+      self._cache_dir = FixPath(os.path.abspath(self._options.cache_dir))
+    if self._options.out_file:
+      self._out_file = FixPath(os.path.abspath(self._options.out_file))
+    if len(self._args) == 0:
+      self._parser.error("missing program to %s" % (self.__class__.__name__,))
+      return False
+    self._exe = self._args[0]
+    self._exe_dir = FixPath(os.path.abspath(os.path.dirname(self._exe)))
+    return True
+
+  def Cleanup(self):
+    # delete the cache to avoid filling up the hard drive when we're using
+    # temporary directory names
+    self.__ClearInstrumentationCache()
+
+  def __ClearInstrumentationCache(self):
+    if not self._save_cache:
+      logging.info("clearing instrumentation cache %s" % self._cache_dir)
+      if os.path.isdir(self._cache_dir):
+        for cfile in os.listdir(self._cache_dir):
+          file = os.path.join(self._cache_dir, cfile);
+          if os.path.isfile(file):
+            try:
+              os.remove(file)
+            except:
+              logging.warning("unable to delete file %s: %s" % (file, 
+                              sys.exc_info()[0]))
+
+
diff --git a/tools/purify/data/filters.pft b/tools/purify/data/filters.pft
new file mode 100644
index 0000000..a353c12
--- /dev/null
+++ b/tools/purify/data/filters.pft
diff --git a/tools/purify/data/ignore.txt b/tools/purify/data/ignore.txt
new file mode 100644
index 0000000..16a11b2
--- /dev/null
+++ b/tools/purify/data/ignore.txt
@@ -0,0 +1,12 @@
+# See bug 1157381
+Pure: Trap bits found in live chunk
+
+# See bugs 1151263 and 1164562
+Memory leak .+ allocated in InitSecurityInterfaceA \[SECUR32\.DLL\]
+
+# See bug 1163766
+# Ugly regexps are trying to deal with Purify's demangling bugs.
+Memory leak .+ allocated in \?NewRunnableMethod.+ExpireHistoryBackend.+ScopedRunnableMethodFactory
+Memory leak .+ allocated in RevocableStore::RevokeAll\(void\)
+Memory leak .+ allocated in \?NewRunnableMethod.+CommitLaterTask.+CancelableTask.+CommitLaterTask
+Memory leak .+ allocated in history::HistoryBackend::ScheduleCommit\(void\)
diff --git a/tools/purify/purify_analyze.py b/tools/purify/purify_analyze.py
new file mode 100644
index 0000000..4625a70
--- /dev/null
+++ b/tools/purify/purify_analyze.py
@@ -0,0 +1,874 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_analyze.py
+
+''' Given a Purify text file, parses messages, normalizes and uniques them.
+If there's an existing baseline of this data, it can compare against that
+baseline and return an error code if there are any new errors not in the
+baseline. '''
+
+import logging
+import optparse
+import os
+import re
+import sys
+
+import google.logging_utils
+import google.path_utils
+
+import purify_message
+
+class MemoryTreeNode(object):
+  ''' A node in a tree representing stack traces of memory allocation.
+  Essentially, each node in the tree is a hashtable mapping a child
+  function name to a child node.  Each node contains the total number
+  of bytes of all of its descendants.
+  See also: PurifyAnalyze.PrintMemoryInUse()
+  '''
+
+  pat_initializer = re.compile('(.*)\`dynamic initializer for \'(.*)\'\'')
+
+  @classmethod
+  def CreateTree(cls, message_list):
+    '''Creates a tree from message_list. All of the Message objects are built
+       into a tree with a default "ROOT" root node that is then returned.
+    Args:
+      message_list: a MessageList object.
+    '''
+    root = MemoryTreeNode("ROOT", 0, 0)
+    msgs = message_list.AllMessages()
+    for msg in msgs:
+      bytes = msg._bytes
+      blocks = msg._blocks
+      stack = msg.GetAllocStack()
+      stack_lines = stack.GetLines()
+      size = len(stack_lines)
+      node = root
+      node._AddAlloc(bytes, blocks)
+      counted = False
+      # process stack lines from the bottom up to build a call-stack tree
+      functions = [line["function"] for line in stack_lines]
+      functions.reverse()
+      for func in functions:
+        if node == root:
+          m = MemoryTreeNode.pat_initializer.match(func)
+          if m:
+            node = node._AddChild("INITIALIZERS", bytes, blocks)
+            func = m.group(1) + m.group(2)
+        # don't process ellided or truncated stack lines
+        if func:
+          node = node._AddChild(func, bytes, blocks)
+          counted = True
+      if not counted:
+        # Nodes with no stack frames in our code wind up not being counted
+        # above.  These seem to be attributable to Windows DLL
+        # initialization, so just throw them into that bucket.
+        node._AddChild("WINDOWS", bytes, blocks)
+    return root
+
+  def __init__(self, function, bytes, blocks):
+    ''' 
+    Args:
+      function: A string representing a unique method or function.
+      bytes: initial number of bytes allocated in this node
+      blocks: initial number of blocks allocated in this node
+    '''
+    self._function = function
+    self._bytes = bytes
+    self._blocks = blocks
+    self._allocs = 1
+    self._children = {}
+
+  def _AddAlloc(self, bytes, blocks):
+    '''Adds bytes and blocks to this node's allocation totals
+    '''
+    self._allocs += 1
+    self._bytes += bytes
+    self._blocks += blocks
+
+  def _AddChild(self, function, bytes, blocks):
+    '''Adds a child node if not present.  Otherwise, adds
+    bytes and blocks to it's allocation total.
+    '''
+    if function not in self._children:
+      self._children[function] = MemoryTreeNode(function, bytes, blocks)
+    else:
+      self._children[function]._AddAlloc(bytes, blocks)
+    return self._children[function]
+
+  def __cmp__(self, other):
+    # sort by size, then blocks, then function name
+    return cmp((self._bytes, self._blocks, self._function),
+               (other._bytes, other._blocks, other._function))
+
+  def __str__(self):
+    return "(%d bytes, %d blocks, %d allocs) %s" % ( 
+        self._bytes, self._blocks, self._allocs, self._function)
+
+  def PrintRecursive(self, padding="", byte_filter=0):
+    '''Print the tree and all of its children recursively (depth-first).  All
+    nodes at a given level of the tree are sorted in descending order by size.
+    
+    Args:
+      padding: Printed at the front of the line.  Each recursive call adds a 
+        single space character.
+      byte_filter: a number of bytes below which we'll prune the tree
+    '''
+    print "%s%s" % (padding, self)
+    padding = padding + " "
+    # sort the children in descending order (see __cmp__)
+    swapped = self._children.values()
+    swapped.sort(reverse=True)
+    rest_bytes = 0
+    rest_blocks = 0
+    rest_allocs = 0
+    for node in swapped:
+      if node._bytes < byte_filter:
+        rest_bytes += node._bytes
+        rest_blocks += node._blocks
+        rest_allocs += node._allocs
+      else:
+        node.PrintRecursive(padding, byte_filter)
+    if rest_bytes:
+      print "%s(%d bytes, %d blocks, %d allocs) PRUNED" % (padding,
+        rest_bytes, rest_blocks, rest_allocs)
+
+class PurifyAnalyze:
+  ''' Given a Purify text file, parses all of the messages inside of it and
+  normalizes them.  Provides a mechanism for comparing this normalized set
+  against a baseline and detecting if new errors have been introduced. '''
+
+  # a line which is the start of a new message
+  pat_msg_start = re.compile('^\[([A-Z])\] (.*)$')
+  # a message with a specific type
+  pat_msg_type = re.compile('^([A-Z]{3}): (.*)$')
+  pat_leak_summary = re.compile("Summary of ... memory leaks")
+  pat_miu_summary = re.compile("Summary of ... memory in use")
+  pat_starting = re.compile("Starting Purify'd ([^\\s]+\\\\[^\\s]+)")
+  pat_arguments = re.compile("\s+Command line arguments:\s+([^\s].*)")
+  pat_terminate = re.compile('Message: TerminateProcess called with code')
+  # Purify treats this as a warning, but for us it's a fatal error.
+  pat_instrumentation_failed = re.compile('^.* file not instrumented')
+  # misc to ignore
+  pat_ignore = (re.compile('^(Start|Exit)ing'),
+                re.compile('^Program terminated'),
+                re.compile('^Terminating thread'),
+                re.compile('^Message: CryptVerifySignature'))
+  # message types that aren't analyzed
+  # handled, ignored and continued exceptions will likely never be interesting
+  # TODO(erikkay): MPK ("potential" memory leaks) may be worth turning on
+  types_excluded = ("EXH", "EXI", "EXC", "MPK")
+
+
+  def __init__(self, files, echo, name=None, source_dir=None, data_dir=None):
+    # The input file we're analyzing.
+    self._files = files
+    # Whether the input file contents should be echoed to stdout.
+    self._echo = echo
+    # A symbolic name for the run being analyzed, often the name of the
+    # exe which was purified.
+    self._name = name
+    # The top of the source code tree of the code we're analyzing.
+    # This prefix is stripped from all filenames in stacks for normalization.
+    if source_dir:
+      purify_message.Stack.SetSourceDir(source_dir)
+    if data_dir:
+      self._data_dir = data_dir
+    else:
+      self._data_dir = os.path.join(google.path_utils.ScriptDir(), "data")
+    # A map of message_type to a MessageList of that type.
+    self._message_lists = {}
+    self._ReadIgnoreFile()
+
+  def _ReadIgnoreFile(self):
+    '''Read a file which is a list of regexps for either the title or the
+    top-most visible stack line.
+    '''
+    self._pat_ignore = []
+    filenames = [os.path.join(self._data_dir, "ignore.txt"),
+        os.path.join(google.path_utils.ScriptDir(), "data", "ignore.txt")]
+    for filename in filenames:
+      if os.path.exists(filename):
+        f = open(filename, 'r')
+        for line in f.readlines():
+          if line.startswith("#") or line.startswith("//") or line.isspace():
+            continue
+          line = line.rstrip()
+          pat = re.compile(line)
+          if pat:
+            self._pat_ignore.append(pat)
+
+  def ShouldIgnore(self, msg):
+    '''Should the message be ignored as irrelevant to analysis '''
+    # never ignore memory in use
+    if msg.Type() == "MIU":
+      return False
+
+    # check ignore patterns against title and top-most visible stack frames
+    strings = [msg._title]    
+    err = msg.GetErrorStack()
+    if err:
+      line = err.GetTopVisibleStackLine().get('function', None)
+      if line:
+        strings.append(line)
+    alloc = msg.GetAllocStack()
+    if alloc:
+      line = alloc.GetTopVisibleStackLine().get('function', None)
+      if line:
+        strings.append(line)
+    for pat in self._pat_ignore:
+      for str in strings:
+        if pat.match(str):
+          logging.debug("Igorning message based on ignore.txt")
+          logging.debug(msg.NormalizedStr(verbose=True))
+          return True
+
+    # unless it's explicitly in the ignore file, never ignore these
+    if msg.Type() == purify_message.FATAL:
+      return False
+
+    # certain message types aren't that interesting
+    if msg.Type() in PurifyAnalyze.types_excluded:
+      logging.debug("Igorning message because type is excluded")
+      logging.debug(msg.NormalizedStr(verbose=True))
+      return True
+    # if the message stacks have no local stack frames, we can ignore them
+    if msg.StacksAllExternal():
+      logging.debug("Igorning message because stacks are all external")
+      logging.debug(msg.NormalizedStr(verbose=True))
+      return True
+
+    # Microsoft's STL has a bunch of non-harmful UMRs in it.  Most of them
+    # are filtered out by Purify's default filters and by our explicit ignore
+    # list.  This code notices ones that have made it through so we can add
+    # them to the ignore list later.
+    if msg.Type() == "UMR":
+      if err.GetTopStackLine()['file'].endswith('.'):
+        logging.debug("non-ignored UMR in STL: %s" % msg._title)
+
+    return False
+
+  def AddMessage(self, msg):
+    ''' Append the message to an array for its type.  Returns boolean indicating
+    whether the message was actually added or was ignored.'''
+    if msg:
+      if self.ShouldIgnore(msg):
+        return False
+      if msg.Type() not in self._message_lists:
+        self._message_lists[msg.Type()] = purify_message.MessageList(msg.Type())
+      self._message_lists[msg.Type()].AddMessage(msg)
+      return True
+    return False
+
+  def _BeginNewSublist(self, key):
+    '''See MessageList.BeginNewSublist for details.
+    '''
+    if key in self._message_lists:
+      self._message_lists[key].BeginNewSublist()
+
+  def ReadFile(self):
+    ''' Reads a Purify ASCII file and parses and normalizes the messages in
+    the file.
+    Returns False if a fatal error was detected, True otherwise.
+    '''
+    # Purify files consist of a series of "messages". These messages have a type
+    # (designated as a three letter code - see message_type), a severity
+    # (designated by a one letter code - see message_severity) and some
+    # textual details.  It will often also have a stack trace of the error
+    # location, and (for memory errors) may also have a stack trace of the
+    # allocation location.
+
+    fatal_errors = 0
+    fatal_exe = ""
+
+    for file in self._files:
+      exe = ""
+      error = None
+      message = None
+      for line in open(file, mode='rb'):
+        line = line.rstrip()
+        m = PurifyAnalyze.pat_msg_start.match(line)
+        if m:
+          if exe == fatal_exe:
+            # since we hit a fatal error in this program, ignore all messages
+            # until the program changes
+            continue
+          # we matched a new message, so if there's an existing one, it's time
+          # to finish processing it
+          if message:
+            message.SetProgram(exe)
+            if not self.AddMessage(message):
+              # error is only set if the message we just tried to add would
+              # otherwise be considered a fatal error.  Since AddMessage failed
+              # (presumably the messages matched the ignore list), we reset
+              # error to None
+              error = None
+          message = None
+          if error:
+            if error.Type() == "EXU":
+              # Don't treat EXU as fatal, since unhandled exceptions
+              # in other threads don't necessarily lead to the app to exit.
+              # TODO(erikkay): verify that we still do trap exceptions that lead
+              # to early termination.
+              logging.warning(error.NormalizedStr(verbose=True))
+              error = None
+            else:
+              if len(self._files) > 1:
+                logging.error("Fatal error in program: %s" % error.Program())
+              logging.error(error.NormalizedStr(verbose=True))
+              fatal_errors += 1
+              error = None
+              fatal_exe = exe
+              continue
+          severity = m.group(1)
+          line = m.group(2)
+          m = PurifyAnalyze.pat_msg_type.match(line)
+          if m:
+            type = m.group(1)
+            message = purify_message.Message(severity, type, m.group(2))
+            if type == "EXU":
+              error = message
+          elif severity == "O":
+            message = purify_message.Message(severity, purify_message.FATAL,
+                                             line)
+            # This is an internal Purify error, and it means that this run can't
+            # be trusted and analysis should be aborted.
+            error = message
+          elif PurifyAnalyze.pat_instrumentation_failed.match(line):
+            message = purify_message.Message(severity, purify_message.FATAL,
+                                             line)
+            error = message
+          elif PurifyAnalyze.pat_terminate.match(line):
+            message = purify_message.Message(severity, purify_message.FATAL,
+                                             line)
+            error = message
+          elif PurifyAnalyze.pat_leak_summary.match(line):
+            # TODO(erikkay): should we do sublists for MLK and MPK too?
+            # Maybe that means we need to handle "new" and "all" messages
+            # separately.
+            #self._BeginNewSublist("MLK")
+            #self._BeginNewSublist("MPK")
+            pass
+          elif PurifyAnalyze.pat_miu_summary.match(line):
+            # Each time Purify is asked to do generate a list of all memory in use
+            # or new memory in use, it first emits this summary line.  Since the
+            # different lists can overlap, we need to tell MessageList to begin
+            # a new sublist.
+            # TODO(erikkay): should we tag "new" and "all" sublists explicitly
+            # somehow?
+            self._BeginNewSublist("MIU")
+          elif PurifyAnalyze.pat_starting.match(line):
+            m = PurifyAnalyze.pat_starting.match(line)
+            exe = m.group(1)
+            last_slash = exe.rfind("\\")
+            if not purify_message.Stack.source_dir:
+              path = os.path.abspath(os.path.join(exe[:last_slash], "..", ".."))
+              purify_message.Stack.SetSourceDir(path)
+            if not self._name:
+              self._name = exe[(last_slash+1):]
+          else:
+            unknown = True
+            for pat in PurifyAnalyze.pat_ignore:
+              if pat.match(line):
+                unknown = False
+                break
+            if unknown:
+              logging.error("unknown line " + line)
+        else:
+          if message:
+            message.AddLine(line)
+          elif PurifyAnalyze.pat_arguments.match(line):
+            m = PurifyAnalyze.pat_arguments.match(line)
+            exe += " " + m.group(1)
+
+      # Purify output should never end with a real message
+      if message:
+        logging.error("Unexpected message at end of file %s" % file)
+  
+    return fatal_errors == 0
+
+  def GetMessageList(self, key):
+    if key in self._message_lists:
+      return self._message_lists[key]
+    else:
+      return None
+
+  def PrintSummary(self, echo=None):
+    ''' Print a summary of how many messages of each type were found. '''
+    # make sure everyone else is done first
+    sys.stderr.flush()
+    sys.stdout.flush()
+    if echo == None:
+      echo = self._echo
+    logging.info("summary of Purify messages:")
+    for key in self._message_lists:
+      list = self._message_lists[key]
+      unique = list.UniqueMessages()
+      all = list.AllMessages()
+      count = 0
+      for msg in all:
+        count += msg._count
+      logging.info("%s(%s) unique:%d total:%d" % (self._name, 
+          purify_message.GetMessageType(key), len(unique), count))
+      if key not in ["MIU"]:
+        ignore_file = "%s_%s_ignore.txt" % (self._name, key)
+        ignore_hashes = self._MessageHashesFromFile(ignore_file)
+        ignored = 0
+        
+        groups = list.UniqueMessageGroups()
+        group_keys = groups.keys()
+        group_keys.sort(cmp=lambda x,y: len(groups[y]) - len(groups[x]))
+        for group in group_keys:
+          # filter out ignored messages
+          kept_msgs= [x for x in groups[group] if hash(x) not in ignore_hashes]
+          ignored += len(groups[group]) - len(kept_msgs)
+          groups[group] = kept_msgs
+        if ignored:
+          logging.info("%s(%s) ignored:%d" % (self._name, 
+            purify_message.GetMessageType(key), ignored))
+        total = reduce(lambda x, y: x + len(groups[y]), group_keys, 0)
+        if total:
+          print "%s(%s) group summary:" % (self._name, 
+            purify_message.GetMessageType(key))
+          print "   TOTAL: %d" % total
+          for group in group_keys:
+            if len(groups[group]):
+              print "   %s: %d" % (group, len(groups[group]))
+        if echo:
+          for group in group_keys:
+            msgs = groups[group]
+            if len(msgs) == 0:
+              continue
+            print "messages from %s (%d)" % (group, len(msgs))
+            print "="*79
+            for msg in msgs:
+              # for the summary output, line numbers are useful
+              print msg.NormalizedStr(verbose=True)
+        # make sure stdout is flushed to avoid weird overlaps with logging
+        sys.stdout.flush()
+
+  def PrintMemoryInUse(self, byte_filter=16384):
+    ''' Print one or more trees showing a hierarchy of memory allocations.
+    Args:
+      byte_filter: a number of bytes below which we'll prune the tree
+    '''
+    list = self.GetMessageList("MIU")
+    sublists = list.GetSublists()
+    if not sublists:
+      sublists = [list]
+    trees = []
+    summaries = []
+    # create the trees and summaries
+    for sublist in sublists:
+      tree = MemoryTreeNode.CreateTree(sublist)
+      trees.append(tree)
+      
+      # while the tree is a hierarchical assignment from the root/bottom of the
+      # stack down, the summary is simply adding the total of the top-most
+      # stack item from our code
+      summary = {}
+      total = 0
+      summaries.append(summary)
+      for msg in sublist.AllMessages():
+        total += msg._bytes
+        stack = msg.GetAllocStack()
+        if stack._all_external:
+          alloc_caller = "WINDOWS"
+        else:
+          lines = stack.GetLines()
+          for line in lines:
+            alloc_caller = line["function"]
+            if alloc_caller:
+              break
+        summary[alloc_caller] = summary.get(alloc_caller, 0) + msg._bytes
+      summary["TOTAL"] = total
+
+    # print out the summaries and trees.
+    # TODO(erikkay): perhaps we should be writing this output to a file
+    # instead?
+    tree_number = 1
+    num_trees = len(trees)
+    for tree, summary in zip(trees, summaries):
+      print "MEMORY SNAPSHOT %d of %d" % (tree_number, num_trees)
+      lines = summary.keys()
+      lines.sort(cmp=lambda x,y: summary[y] - summary[x])
+      rest = 0
+      for line in lines:
+        bytes = summary[line]
+        if bytes < byte_filter:
+          rest += bytes
+        else:
+          print "%d: %s" % (bytes, line)
+      print "%d: REST" % rest
+      print
+      print "BEGIN TREE"
+      tree.PrintRecursive(byte_filter=byte_filter)
+      tree_number += 1
+
+    # make sure stdout is flushed to avoid weird overlaps with logging
+    sys.stdout.flush()
+
+  def PrintBugReport(self):
+    ''' Print a summary of how many messages of each type were found. '''
+    # make sure everyone else is done first
+    sys.stderr.flush()
+    sys.stdout.flush()
+    logging.info("summary of Purify bugs:")
+    # This is a specialized set of counters for layout tests, with some
+    # unfortunate hard-coded knowledge.
+    layout_test_counts = {}
+    for key in self._message_lists:
+      bug = {}
+      list = self._message_lists[key]
+      unique = list.UniqueMessages()
+      all = list.AllMessages()
+      count = 0
+      for msg in all:
+        if msg._title not in bug:
+          # use a single sample message to represent all messages
+          # that match this title
+          bug[msg._title] = {"message":msg,
+                             "total":0,
+                             "count":0,
+                             "programs":set()}
+        this_bug = bug[msg._title]
+        this_bug["total"] += msg._count
+        this_bug["count"] += 1
+        this_bug["programs"].add(msg.Program())
+        # try to summarize the problem areas for layout tests
+        if self._name == "layout":
+          prog = msg.Program()
+          prog_args = prog.split(" ")
+          if len(prog_args):
+            path = prog_args[-1].replace('\\', '/')
+            index = path.rfind("layout_tests/")
+            if index >= 0:
+              path = path[(index + len("layout_tests/")):]
+            else:
+              index = path.rfind("127.0.0.1:")
+              if index >= 0:
+                # the port number is 8000 or 9000, but length is the same
+                path = "http: " + path[(index + len("127.0.0.1:8000/")):]
+            path = "/".join(path.split('/')[0:-1])
+            count = 1 + layout_test_counts.get(path, 0)
+            layout_test_counts[path] = count
+      for title in bug:
+        b = bug[title]
+        print "[%s] %s" % (key, title)
+        print "%d tests, %d stacks, %d instances" % (len(b["programs"]),
+            b["count"], b["total"])
+        print "Reproducible with:"
+        for program in b["programs"]:
+          print "   %s" % program
+        print "Sample error details:"
+        print "====================="
+        print b["message"].NormalizedStr(verbose=True)
+    if len(layout_test_counts):
+      print
+      print "Layout test error counts"
+      print "========================"
+      paths = layout_test_counts.keys()
+      paths.sort()
+      for path in paths:
+        print "%s: %d" % (path, layout_test_counts[path])
+    # make sure stdout is flushed to avoid weird overlaps with logging
+    sys.stdout.flush()
+
+  def SaveLatestStrings(self, string_list, key, fname_extra=""):
+    '''Output a list of strings to a file in the "latest" dir.
+    '''
+    script_dir = google.path_utils.ScriptDir()
+    path = os.path.join(script_dir, "latest")
+    out = os.path.join(path, "%s_%s%s.txt" % (self._name, key, fname_extra))
+    logging.info("saving %s" % (out))
+    try:
+      f = open(out, "w+")
+      f.write('\n'.join(string_list))
+    except IOError, (errno, strerror):
+      logging.error("error writing to file %s (%d, %s)" % out, errno, strerror)
+    if f:
+      f.close()
+    return True
+
+  def SaveResults(self, path=None, verbose=False):
+    ''' Output normalized data to baseline files for future comparison runs.
+    Messages are saved in sorted order into a separate file for each message
+    type.  See Message.NormalizedStr() for details of what's written.
+    '''
+    if not path:
+      path = self._data_dir
+    for key in self._message_lists:
+      out = os.path.join(path, "%s_%s.txt" % (self._name, key))
+      logging.info("saving %s" % (out))
+      f = open(out, "w+")
+      list = self._message_lists[key].UniqueMessages()
+      # TODO(erikkay): should the count of each message be a diff factor?
+      # (i.e. the same error shows up, but more frequently)
+      for message in list:
+        f.write(message.NormalizedStr(verbose=verbose))
+        f.write("\n")
+      f.close()
+    return True
+
+  def _MessageHashesFromFile(self, filename):
+    ''' Reads a file of normalized messages (see SaveResults) and creates a
+    dictionary mapping the hash of each message to its text.
+    '''
+    # NOTE: this uses the same hashing algorithm as Message.__hash__.
+    # Unfortunately, we can't use the same code easily since Message is based
+    # on parsing an original Purify output file and this code is reading a file
+    # of already normalized messages.  This means that these two bits of code
+    # need to be kept in sync.
+    msgs = {}
+    if not os.path.isabs(filename):
+      filename = os.path.join(self._data_dir, filename)
+    if os.path.exists(filename):
+      logging.info("reading messages from %s" % filename)
+      file = open(filename, "r")
+      msg = ""
+      title = None
+      lines = file.readlines()
+      # in case the file doesn't end in a blank line
+      lines.append("\n")
+      for line in lines:
+        # allow these files to have comments in them
+        if line.startswith('#') or line.startswith('//'):
+          continue
+        if not title:
+          if not line.isspace():
+            # first line of each message is a title
+            title = line
+          continue
+        elif not line.isspace():
+          msg += line
+        else:
+          # note that the hash doesn't include the title, see Message.__hash__
+          h = hash(msg)
+          msgs[h] = title + msg
+          title = None
+          msg = ""
+      logging.info("%s: %d msgs" % (filename, len(msgs)))
+    return msgs
+
+  def _SaveLatestGroupSummary(self, message_list):
+    '''Save a summary of message groups and their counts to a file in "latest"
+    '''
+    string_list = []
+    groups = message_list.UniqueMessageGroups()
+    group_keys = groups.keys()
+
+    group_keys.sort(cmp=lambda x,y: len(groups[y]) - len(groups[x]))
+    for group in group_keys:
+      string_list.append("%s: %d" % (group, len(groups[group])))
+
+    self.SaveLatestStrings(string_list, message_list.GetType(), "_GROUPS")
+
+  def CompareResults(self):
+    ''' Compares the results from the current run with the baseline data
+    stored in data/<name>_<key>.txt returning False if it finds new errors
+    that are not in the baseline.  See ReadFile() and SaveResults() for
+    details of what's in the original file and what's in the baseline.
+    Errors that show up in the baseline but not the current run are not
+    considered errors (they're considered "fixed"), but they do suggest
+    that the baseline file could be re-generated.'''
+    errors = 0
+    fixes = 0
+    for type in purify_message.message_type:
+      if type in ["MIU"]:
+        continue
+      # number of new errors for this message type
+      type_errors = []
+      # number of new unexpected fixes for this message type
+      type_fixes = []
+      # the messages from the current run that are in the baseline
+      new_baseline = []
+      # a common prefix used to describe the program being analyzed and the
+      # type of message which is used to generate filenames and descriptive
+      # error messages
+      type_name = "%s_%s" % (self._name, type)
+      
+      # open the baseline file to compare against
+      baseline_file = "%s.txt" % type_name
+      baseline_hashes = self._MessageHashesFromFile(baseline_file)
+
+      # read the flakey file if it exists
+      flakey_file = "%s_flakey.txt" % type_name
+      flakey_hashes = self._MessageHashesFromFile(flakey_file)
+
+      # read the ignore file if it exists
+      ignore_file = "%s_ignore.txt" % type_name
+      ignore_hashes = self._MessageHashesFromFile(ignore_file)
+
+      # messages from the current run
+      current_list = self.GetMessageList(type)
+      if current_list:
+        # Since we're looking at the list of unique messages,
+        # if the number of occurrances of a given unique message 
+        # changes, it won't show up as an error.
+        current_messages = current_list.UniqueMessages()
+      else:
+        current_messages = []
+      current_hashes = {}
+      # compute errors and new baseline
+      for message in current_messages:
+        msg_hash = hash(message)
+        current_hashes[msg_hash] = message
+        if msg_hash in ignore_hashes or msg_hash in flakey_hashes:
+          continue
+        if msg_hash in baseline_hashes:
+          new_baseline.append(msg_hash)
+          continue
+        type_errors.append(msg_hash)
+      # compute unexpected fixes
+      for msg_hash in baseline_hashes:
+        if (msg_hash not in current_hashes and
+            msg_hash not in ignore_hashes and
+            msg_hash not in flakey_hashes):
+          type_fixes.append(baseline_hashes[msg_hash])
+
+      if len(current_messages) or len(type_errors) or len(type_fixes):
+        logging.info("%d '%s(%s)' messages "
+                     "(%d new, %d unexpectedly fixed)" % (len(current_messages),
+                     purify_message.GetMessageType(type), type,
+                     len(type_errors), len(type_fixes)))
+
+      if len(type_errors):
+        strs = [current_hashes[x].NormalizedStr(verbose=True) 
+                for x in type_errors]
+        logging.error("%d new '%s(%s)' errors found\n%s" % (len(type_errors),
+                      purify_message.GetMessageType(type), type, 
+                      '\n'.join(strs)))
+        strs = [current_hashes[x].NormalizedStr() for x in type_errors]
+        self.SaveLatestStrings(strs, type, "_NEW")
+        errors += len(type_errors)
+
+      if len(type_fixes):
+        # we don't have access to the original message, so all we can do is log
+        # the non-verbose normalized text
+        logging.warning("%d new '%s(%s)' unexpected fixes found\n%s" % (
+                        len(type_fixes), purify_message.GetMessageType(type), 
+                        type, '\n'.join(type_fixes)))
+        self.SaveLatestStrings(type_fixes, type, "_FIXED")
+        fixes += len(type_fixes)
+        if len(current_messages) == 0:
+          logging.warning("all errors fixed in %s" % baseline_file)
+
+      if len(type_fixes) or len(type_errors):
+        strs = [baseline_hashes[x] for x in new_baseline]
+        self.SaveLatestStrings(strs, type, "_BASELINE")
+
+      if current_list:
+        self._SaveLatestGroupSummary(current_list)
+
+    if errors:
+      logging.error("%d total new errors found" % errors)
+      return -1
+    else:
+      logging.info("no new errors found - yay!")
+      if fixes:
+        logging.warning("%d total errors unexpectedly fixed" % fixes)
+        # magic return code to turn the builder orange (via ReturnCodeCommand)
+        return -88
+    return 0
+
+
+# The following code is here for testing and development purposes.
+
+def _main():
+  retcode = 0
+
+  parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
+  parser.add_option("-b", "--baseline", action="store_true", default=False,
+                    help="save output to baseline files")
+  parser.add_option("-m", "--memory_in_use", 
+                    action="store_true", default=False,
+                    help="print memory in use summary")
+  parser.add_option("", "--validate", 
+                    action="store_true", default=False,
+                    help="validate results vs. baseline")
+  parser.add_option("-e", "--echo_to_stdout",
+                    action="store_true", default=False,
+                    help="echo purify output to standard output")
+  parser.add_option("", "--source_dir",
+                    help="path to top of source tree for this build"
+                    "(used to normalize source paths in output)")
+  parser.add_option("", "--byte_filter", default=16384,
+                    help="prune the tree below this number of bytes")
+  parser.add_option("-n", "--name",
+                    help="name of the test being run "
+                         "(used for output filenames)")
+  parser.add_option("", "--data_dir",
+                    help="path to where purify data files live")
+  parser.add_option("", "--bug_report", default=False,
+                    action="store_true",
+                    help="print output as an attempted summary of bugs")
+  parser.add_option("-v", "--verbose", action="store_true", default=False,
+                    help="verbose output - enable debug log messages")
+
+  (options, args) = parser.parse_args()
+  if not len(args) >= 1:
+    parser.error("no filename specified")
+  filenames = args
+
+  if options.verbose:
+    google.logging_utils.config_root(level=logging.DEBUG)
+  else:
+    google.logging_utils.config_root(level=logging.INFO)
+  pa = PurifyAnalyze(filenames, options.echo_to_stdout, options.name, 
+                     options.source_dir, options.data_dir)
+  execute_crash = not pa.ReadFile()
+  if options.bug_report:
+    pa.PrintBugReport()
+    pa.PrintSummary(False)
+  elif options.memory_in_use:
+    pa.PrintMemoryInUse(int(options.byte_filter))
+  elif execute_crash:
+    retcode = -1
+    logging.error("Fatal error during test execution.  Analysis skipped.")
+  elif options.validate:
+    if pa.CompareResults() != 0:
+      retcode = -1
+      script_dir = google.path_utils.ScriptDir()
+      latest_dir = os.path.join(script_dir, "latest")
+      pa.SaveResults(latest_dir)
+    pa.PrintSummary()
+  elif options.baseline:
+    if not pa.SaveResults(verbose=True):
+      retcode = -1
+    pa.PrintSummary(False)
+  else:
+    pa.PrintSummary(False)
+
+  sys.exit(retcode)
+
+if __name__ == "__main__":
+  _main()  
+
diff --git a/tools/purify/purify_coverage.py b/tools/purify/purify_coverage.py
new file mode 100644
index 0000000..0828e22
--- /dev/null
+++ b/tools/purify/purify_coverage.py
@@ -0,0 +1,111 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_coverage.py
+
+import logging
+import optparse
+import os
+import re
+import sys
+
+import google.path_utils
+
+# local modules
+import common
+import purify_analyze
+import purify_message
+
+
+class PurifyCoverage(common.Rational):
+  def __init__(self):
+    common.Rational.__init__(self)
+    script_dir = google.path_utils.ScriptDir()
+    self._latest_dir = os.path.join(script_dir, "latest")
+
+  def CreateOptionParser(self):
+    common.Rational.CreateOptionParser(self)
+    self._parser.description = __doc__
+    self._parser.add_option("-n", "--name",
+                      dest="name", default=None,
+                      help="name of the test being run "
+                           "(used for output filenames)")
+    self._parser.add_option("", "--source_dir",
+                            help="path to top of source tree for this build"
+                                 "(used to normalize source paths in baseline)")
+
+  def ParseArgv(self):
+    if common.Rational.ParseArgv(self):
+      self._name = self._options.name
+      if not self._name:
+        self._name = os.path.basename(self._exe)
+      # _out_file can be set in common.Rational.ParseArgv
+      if not self._out_file:
+        self._out_file = os.path.join(self._latest_dir, 
+                                      "%s_coverage.txt" % (self._name))
+      self._source_dir = self._options.source_dir
+      return True
+    return False
+    
+  def _PurifyCommand(self):
+    cmd = [common.PURIFYW_PATH, "/CacheDir=" + self._cache_dir, 
+           "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no",
+           "/AllocCallStackLength=30", "/Coverage", 
+           "/CoverageDefaultInstrumentationType=line"]
+    return cmd
+
+  def Instrument(self):
+    cmd = self._PurifyCommand()
+    # /Run=no means instrument only
+    cmd.append("/Run=no")
+    cmd.append(os.path.abspath(self._exe))
+    return common.Rational.Instrument(self, cmd)
+  
+  def Execute(self):
+    cmd = self._PurifyCommand()
+    cmd.append("/SaveTextData=" + self._out_file)
+    # TODO(erikkay): should we also do /SaveMergeTextData?
+    return common.Rational.Execute(self, cmd)
+
+  def Analyze(self):
+    if not os.path.isfile(self._out_file):
+      logging.info("no output file %s" % self._out_file)
+      return -1
+    # TODO(erikkay): parse the output into a form we could use on the buildbots
+    return 0
+
+if __name__ == "__main__":
+  rational = PurifyCoverage()
+  if rational.Run():
+    retcode = 0
+  else:
+    retcode = -1
+  sys.exit(retcode)
+
diff --git a/tools/purify/purify_inuse.py b/tools/purify/purify_inuse.py
new file mode 100644
index 0000000..d527189
--- /dev/null
+++ b/tools/purify/purify_inuse.py
@@ -0,0 +1,116 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_inuse.py
+
+import logging
+import optparse
+import os
+import re
+import sys
+
+import google.path_utils
+
+# local modules
+import common
+import purify_analyze
+import purify_message
+
+
+class PurifyInUse(common.Rational):
+  def __init__(self):
+    common.Rational.__init__(self)
+    script_dir = google.path_utils.ScriptDir()
+    self._latest_dir = os.path.join(script_dir, "latest")
+
+  def CreateOptionParser(self):
+    common.Rational.CreateOptionParser(self)
+    self._parser.description = __doc__
+    self._parser.add_option("-n", "--name",
+                      dest="name", default=None,
+                      help="name of the test being run "
+                           "(used for output filenames)")
+    self._parser.add_option("", "--source_dir",
+                            help="path to top of source tree for this build"
+                                 "(used to normalize source paths in baseline)")
+    self._parser.add_option("", "--byte_filter", default=16384,
+                            help="prune the tree below this number of bytes")
+
+  def ParseArgv(self):
+    if common.Rational.ParseArgv(self):
+      self._name = self._options.name
+      if not self._name:
+        self._name = os.path.basename(self._exe)
+      # _out_file can be set in common.Rational.ParseArgv
+      if not self._out_file:
+        self._out_file = os.path.join(self._latest_dir, "%s.txt" % (self._name))
+      self._source_dir = self._options.source_dir
+      self._byte_filter = int(self._options.byte_filter)
+      return True
+    return False
+    
+  def _PurifyCommand(self):
+    cmd = [common.PURIFYW_PATH, "/CacheDir=" + self._cache_dir, 
+           "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no",
+           "/AllocCallStackLength=30", "/ErrorCallStackLength=30",
+           "/LeaksAtExit=no", "/InUseAtExit=yes"]
+    return cmd
+
+  def Instrument(self):
+    cmd = self._PurifyCommand()
+    # /Run=no means instrument only
+    cmd.append("/Run=no")
+    cmd.append(os.path.abspath(self._exe))
+    return common.Rational.Instrument(self, cmd)
+  
+  def Execute(self):
+    cmd = self._PurifyCommand()
+    cmd.append("/SaveTextData=" + self._out_file)
+    return common.Rational.Execute(self, cmd)
+
+  def Analyze(self):
+    if not os.path.isfile(self._out_file):
+      logging.info("no output file %s" % self._out_file)
+      return -1
+    pa = purify_analyze.PurifyAnalyze(self._out_file, False, 
+                                      self._name, self._source_dir)
+    if not pa.ReadFile():
+      logging.warning("inuse summary suspect due to fatal error during run")
+    pa.PrintMemoryInUse(byte_filter=self._byte_filter)
+    return 0
+
+if __name__ == "__main__":
+  rational = PurifyInUse()
+  if rational.Run():
+    retcode = 0
+  else:
+    retcode = -1
+  sys.exit(retcode)
+
diff --git a/tools/purify/purify_message.py b/tools/purify/purify_message.py
new file mode 100644
index 0000000..d093461
--- /dev/null
+++ b/tools/purify/purify_message.py
@@ -0,0 +1,634 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_message.py
+
+''' Utility objects and functions to parse and unique Purify messages '''
+
+import cStringIO
+import logging
+import re
+import sys
+
+import google.logging_utils
+
+# used to represent one or more elided frames
+ELIDE = "..."
+# used to represent stack truncation at a known entry point
+TRUNCATE = "^^^"
+# a file that's outside of our source directory
+EXTERNAL_FILE = "EXTERNAL_FILE"
+
+# mapping of purify message types to descriptions
+message_type = {
+  "ABR": "Array Bounds Read",
+  "ABW": "Array Bounds Write",
+  "ABWL": "Array Bounds Write (late detect)",
+  "BSR": "Beyond Stack Read",
+  "BSW": "Beyond Stack Write",
+  "COM": "COM API/Interface Failure",
+  "EXC": "Continued Exception",
+  "EXH": "Handled Exception",
+  "EXI": "Ignored Exception",
+  "EXU": "Unhandled Exception",
+  "FFM": "Freeing Freed Memory",
+  "FIM": "Freeing Invalid Memory",
+  "FMM": "Freeing Mismatched Memory",
+  "FMR": "Free Memory Read",
+  "FMW": "Free Memory Write",
+  "FMWL": "Free Memory Write (late detect)",
+  "HAN": "Invalid Handle",
+  "HIU": "Handle In Use",
+  "ILK": "COM Interface Leak",
+  "IPR": "Invalid Pointer Read",
+  "IPW": "Invalid Pointer Write",
+  "MAF": "Memory Allocation Failure",
+  "MIU": "Memory In Use",
+  "MLK": "Memory Leak",
+  "MPK": "Potential Memory Leak",
+  "NPR": "Null Pointer Read",
+  "NPW": "Null Pointer Write",
+  "PAR": "Bad Parameter",
+  "UMC": "Uninitialized Memory Copy",
+  "UMR": "Uninitialized Memory Read",
+}
+
+# a magic message type which is not enumerated with the normal message type dict
+FATAL = "FATAL"
+
+def GetMessageType(key):
+  if key in message_type:
+    return message_type[key]
+  elif key == FATAL:
+    return key
+  logging.warn("unknown message type %s" % key)
+  return "UNKNOWN"
+
+# currently unused, but here for documentation purposes
+message_severity = {
+  "I": "Informational",
+  "E": "Error",
+  "W": "Warning",
+  "O": "Internal Purify Error",
+}
+
+
+class Stack:
+  ''' A normalized Purify Stack.  The stack is constructed by adding one line
+  at a time from a stack in a Purify text file via AddLine.
+  Supports cmp and hash so that stacks which normalize the same can be sorted
+  and uniqued.
+  The original stack contents are preserved so that it's possible to drill
+  down into the full details if necessary. '''
+
+  # The top of the source tree.  This is stripped from the filename as part
+  # of normalization.
+  source_dir = ""
+
+  @classmethod
+  def SetSourceDir(cls, dir):
+    # normalize the dir
+    cls.source_dir = dir.replace("\\", "/").lower()
+    logging.debug("Stack.source_dir = %s" % cls.source_dir)
+
+  # a line in a stack trace
+  pat_stack_line = re.compile('(.*)\[(\w:)?([^\:\s]*)(:\d+)?(\s+.*)?]')
+
+  # Known stack entry points that allow us to truncate the rest of the stack
+  # below that point.
+  pat_known_entries = (
+     re.compile('RunnableMethod::Run\(void\)'),
+     re.compile('ChromeMain'),
+     re.compile('BrowserMain'),
+     re.compile('wWinMain'),
+     re.compile('TimerManager::ProcessPendingTimer\(void\)'),
+     re.compile('RunnableMethod::RunableMethod\(.*\)'),
+     re.compile('RenderViewHost::OnMessageReceived\(Message::IPC const&\)'),
+     re.compile('testing::Test::Run\(void\)'),
+     re.compile('testing::TestInfoImpl::Run\(void\)'),
+     re.compile('Thread::ThreadFunc\\(void \*\)'),
+     re.compile('TimerTask::Run\(void\)'),
+     re.compile('MessageLoop::RunTask\(Task \*\)'),
+     re.compile('.DispatchToMethod\@.*'),
+     )
+
+  # if functions match the following, elide them from the stack
+  pat_func_elide = (re.compile('^std::'), re.compile('^new\('))
+  # if files match the following, elide them from the stack
+  pat_file_elide = (re.compile('.*platformsdk_vista.*'), 
+                    re.compile('.*.(dll|DLL)$'),
+                    # bug 1069902
+                    re.compile('webkit/pending/wtf/fastmalloc\.h'),
+                    # When we leak sqlite stuff, we leak a lot, and the stacks
+                    # are all over the place.  For now, let's assume that
+                    # sqlite itself is leak free and focus on our calling code.
+                    re.compile('chrome/third_party/sqlite/.*'),
+                    )
+
+  pat_unit_test = re.compile('^([a-zA-Z0-9]+)_(\w+)_Test::.*')
+
+  def __init__(self, title):
+    self._title = title.lstrip()
+    self._stack = []
+    self._orig = ""
+    # are we currently in an eliding block
+    self._eliding = False
+    # have we truncated the stack?
+    self._truncated = False
+    # is the stack made up completely of external code? (i.e. elided)
+    self._all_external = True
+    # a logical group that this stack belongs to
+    self._group = None
+    # top stack line (preserved even if elided)
+    self._top_stack_line = None
+
+  def GetLines(self):
+    return self._stack
+
+  def GetTopStackLine(self):
+    return self._top_stack_line
+
+  def GetTopVisibleStackLine(self):
+    for line in self._stack:
+      if line['function']:
+        return line
+    return {}
+
+  def GetGroup(self):
+    '''A logical grouping for this stack, allowing related stacks to be grouped
+    together.  Subgroups within a group are separated by ".".
+    (e.g. group.subgroup.subsubgroup)
+    '''
+    return self._group;
+    
+  def _ComputeStackLine(self, line):
+    line = line.lstrip()
+    m = Stack.pat_stack_line.match(line)
+    if m:
+      func = m.group(1).rstrip()
+      func = self._DemangleSymbol(func)
+      func = self._DetemplatizeSymbol(func)
+      if m.group(2):
+        file = m.group(2) + m.group(3)
+      else:
+        file = m.group(3)
+      # paths are normalized to use / and be lower case
+      file = file.replace("\\", "/").lower()
+      if not file.startswith(Stack.source_dir):
+        file = EXTERNAL_FILE
+      else:
+        file = file[len(Stack.source_dir):]
+        # trim leading / if present
+        if file[0] == "/":
+          file = file[1:]
+      loc = m.group(4)
+      if loc:
+        loc = int(loc[1:])
+      else:
+        loc = 0
+      return {'function': func, 'file': file, 'line_number': loc}
+    return None
+
+  def _ShouldElide(self, stack_line):
+    func = stack_line['function']
+    file = stack_line['file']
+    # elide certain common functions from the stack such as the STL
+    for pat in Stack.pat_func_elide:
+      if pat.match(func):
+        logging.debug("eliding due to func pat match: %s" % func)
+        return True
+    if file == EXTERNAL_FILE:
+      # if it's not in our source tree, then elide
+      logging.debug("eliding due to external file: %s" % file)
+      return True
+    # elide certain common file sources from the stack, usually this
+    # involves system libraries
+    for pat in Stack.pat_file_elide:
+      if pat.match(file):
+        logging.debug("eliding due to file pat match: %s" % file)
+        return True
+
+    return False
+
+  def AddLine(self, line):
+    ''' Add one line from a stack in a Purify text file.  Lines must be
+    added in order (top down).  Lines are added to two internal structures:
+    an original string copy and an array of normalized lines, split into
+    (function, file, line number).
+    Stack normalization does several things:
+      * elides sections of the stack that are in external code
+      * truncates the stack at so called "known entry points"
+      * removes template type information from symbols
+    Returns False if the line was elided or otherwise omitted.
+    '''
+    self._orig += line + "\n"
+    stack_line = self._ComputeStackLine(line)
+    if stack_line:
+      if not self._top_stack_line:
+        self._top_stack_line = stack_line
+      # Unit test entry points are good groupings.  Even if we already have a
+      # group set, a later unit-test stack line will override.
+      # Note that we also do this even if the stack has already been truncated
+      # since this is useful information.
+      # TODO(erikkay): Maybe in this case, the truncation should be overridden?
+      test_match = Stack.pat_unit_test.match(stack_line["function"])
+      if test_match:
+        self._group = test_match.group(1) + "." + test_match.group(2)
+
+      if self._truncated:
+        return False
+
+      if self._ShouldElide(stack_line):
+        if not self._eliding:
+          self._eliding = True
+          self._stack.append({'function': "", 'file': ELIDE, 'line_number': 0})
+        return False
+      else:
+        self._stack.append(stack_line)
+        self._eliding = False
+        self._all_external = False
+        
+        # when we reach one of the known common stack entry points, truncate
+        # the stack to avoid printing overly redundant information
+        if len(self._stack) > 1:
+          for f in Stack.pat_known_entries:
+            if f.match(stack_line["function"]):
+              if not self._group:
+                # we're at the end of the stack, so use the path to the file
+                # as the group if we don't already have one
+                # This won't be incredibly reliable, but might still be useful.
+                prev = self._stack[-2]
+                if prev['file']:
+                  self._group = '.'.join(prev['file'].split('/')[:-1])
+              self._stack.append({'function': "", 'file': TRUNCATE,
+                                 'line_number': 0})
+              self._truncated = True
+              return False
+      return True
+    else:
+      # skip these lines
+      logging.debug(">>>" + line)
+      return False
+
+  def _DemangleSymbol(self, symbol):
+    # TODO(erikkay) - I'm not sure why Purify prepends an address on the
+    # front of some of these as if it were a namespace (?A<addr>::).  From an
+    # analysis standpoint, it seems meaningless and can change from machine to
+    # machine, so it's best if it's thrown away
+    if symbol.startswith("?A0x"):
+      skipto = symbol.find("::")
+      if skipto >= 0:
+        symbol = symbol[(skipto+2):]
+      else:
+        logging.warn("unable to strip address off of symbol (%s)" % symbol)
+    # TODO(erikkay) there are more symbols not being properly demangled
+    # in Purify's output.  Some of these look like template-related issues.
+    return symbol
+
+  def _DetemplatizeSymbol(self, symbol):
+    ''' remove all of the template arguments and return values from the
+    symbol, normalizing it, making it more readable, and less precise '''
+    ret = ""
+    nested = 0
+    for i in range(len(symbol)):
+      if nested > 0:
+        if symbol[i] == '>':
+          nested -= 1
+        elif symbol[i] == '<':
+          nested += 1
+      elif symbol[i] == '<':
+        nested += 1
+      else:
+        ret += symbol[i]
+    return ret
+
+  def __hash__(self):
+    return hash(self.NormalizedStr())
+
+  def __cmp__(self, other):
+    if not other:
+      return 1
+    len_self = len(self._stack)
+    len_other = len(other._stack)
+    min_len = min(len_self, len_other)
+    # sort stacks from the bottom up    
+    for i in range(-1, -(min_len + 1), -1):
+      # compare file, then func, but omit line number
+      ret = cmp((self._stack[i]['file'], self._stack[i]['function']),
+                (other._stack[i]['file'], other._stack[i]['function']))
+      if ret:
+        return ret
+    return cmp(len_self, len_other)
+
+  def NormalizedStr(self, verbose=False):
+    ''' String version of the normalized stack.  See AddLine for normalization
+    details. '''
+    # use cStringIO for more efficient string building
+    out = cStringIO.StringIO()
+    for line in self._stack:
+      out.write("   ")
+      out.write(line['file'])
+      if verbose and line['line_number'] > 0:
+        out.write(":%d" % line['line_number'])
+      out.write("  ")
+      out.write(line['function'])
+      out.write("\n")
+    ret = out.getvalue()
+    out.close()
+    return ret
+
+  def __str__(self):
+    return self._orig
+
+
+class Message:
+  '''A normalized message from a Purify text file.  Messages all have a
+  severity, most have a type, and many have an error stack and/or an
+  allocation stack.
+  Supports cmp and hash so that messages which normalize the same can be
+  sorted and uniqued.'''
+
+  pat_count = re.compile('^(.*) \{(\d+) occurrences?\}')
+  pat_leak = re.compile('(Potential )?[Mm]emory leak of (\d+) bytes? '
+                        'from (\d+) blocks? allocated in (.+)')
+  pat_miu = re.compile('Memory use of (\d+) bytes? '
+                       '(\((\d+)% initialized\) )?from (\d+) blocks? '
+                       'allocated .. (.+)')
+  # these are headings to different types of stack traces
+  pat_loc_error = re.compile('\s*(Exception|Error|Call) location')
+  pat_loc_alloc = re.compile('\s*Allocation location')
+  pat_loc_free = re.compile('\s*Free location')
+  pat_loc_free2 = re.compile('\s*Location of free attempt')
+
+  def __init__(self, severity, type, title):
+    self._severity = severity
+    self._type = type
+    self._program = None
+    self._head = ""
+    self._loc_alloc = None
+    self._loc_error = None
+    self._loc_free = None
+    self._stack = None
+    self._count = 1
+    self._bytes = 0
+    self._blocks = 0
+    m = Message.pat_count.match(title)
+    if m:
+      self._title = m.group(1)
+      self._count = int(m.group(2))
+    else:
+      m = Message.pat_leak.match(title)
+      if m:
+        self._title = m.group(4)
+        self._bytes = int(m.group(2))
+        self._blocks = int(m.group(3))
+      else:
+        m = Message.pat_miu.match(title)
+        if m:
+          self._title = m.group(5)
+          self._bytes = int(m.group(1))
+          self._blocks = int(m.group(4))
+          #print "%d/%d - %s" % (self._bytes, self._blocks, title[0:60])
+        elif type == "MIU":
+          logging.error("%s didn't match" % title)
+          sys.exit(-1)
+        else:
+          self._title = title
+
+  def GetAllocStack(self):
+    return self._loc_alloc
+
+  def GetErrorStack(self):
+    return self._loc_error
+
+  def GetGroup(self):
+    '''An attempted logical grouping for this Message computed by the contained
+    Stack objects.
+    '''
+    group = None
+    if self._loc_alloc:
+      group = self._loc_alloc.GetGroup()
+    if not group and self._loc_error:
+      group = self._loc_error.GetGroup()
+    if not group and self._loc_free:
+      group = self._loc_free.GetGroup()
+    if not group:
+      group = "UNKNOWN"
+    return group
+
+  def AddLine(self, line):
+    '''Add a line one at a time (in order from the Purify text file) to
+    build up the message and its associated stacks. '''
+
+    if Message.pat_loc_error.match(line):
+      self._stack = Stack(line)
+      self._loc_error = self._stack
+    elif Message.pat_loc_alloc.match(line):
+      self._stack = Stack(line)
+      self._loc_alloc = self._stack
+    elif Message.pat_loc_free.match(line) or Message.pat_loc_free2.match(line):
+      self._stack = Stack(line)
+      self._loc_free = self._stack
+    elif self._stack:
+      if not line.startswith("            "):
+        logging.debug("*** " + line)
+      self._stack.AddLine(line)
+    else:
+      self._head += line.lstrip()
+
+  def Type(self):
+    return self._type
+
+  def Program(self):
+    return self._program
+
+  def SetProgram(self, program):
+    self._program = program
+
+  def StacksAllExternal(self):
+    '''Returns True if the stacks it contains are made up completely of
+    external (elided) symbols'''
+    return ((not self._loc_error or self._loc_error._all_external) and
+            (not self._loc_alloc or self._loc_alloc._all_external) and
+            (not self._loc_free or self._loc_free._all_external))
+
+  def __hash__(self):
+    # NOTE: see also _MessageHashesFromFile.  If this method changes, then
+    # _MessageHashesFromFile must be updated to match.
+    s = ""
+    if self._loc_error:
+      s += "Error Location\n" + self._loc_error.NormalizedStr()
+    if self._loc_alloc:
+      s += "Alloc Location\n" + self._loc_alloc.NormalizedStr()
+    if self._loc_free:
+      s += "Free Location\n" + self._loc_free.NormalizedStr()
+    return hash(s)
+
+  def NormalizedStr(self, verbose=False):
+    '''String version of the normalized message. Only includes title
+    and normalized versions of error and allocation stacks if present.
+    Example:
+    Unitialized Memory Read in Foo::Bar()
+    Error Location
+      foo/Foo.cc  Foo::Bar(void)
+      foo/main.cc start(void)
+      foo/main.cc main(void)
+    Alloc Location
+      foo/Foo.cc  Foo::Foo(void)
+      foo/main.cc start(void)
+      foo/main.cc main(void)
+    '''
+    ret = ""
+    # some of the message types are more verbose than others and we
+    # don't need to indicate their type
+    if verbose and self._type not in ["UMR", "IPR", "IPW"]:
+      ret += GetMessageType(self._type) + ": "
+    if verbose and self._bytes > 0:
+      ret += "(%d bytes, %d blocks) " % (self._bytes, self._blocks)
+    ret += "%s\n" % self._title
+    if self._loc_error:
+      ret += "Error Location\n" + self._loc_error.NormalizedStr(verbose)
+    if self._loc_alloc:
+      ret += "Alloc Location\n" + self._loc_alloc.NormalizedStr(verbose)
+    if self._loc_free:
+      ret += "Free Location\n" + self._loc_free.NormalizedStr(verbose)
+    return ret
+
+  def __str__(self):
+    ret = self._title + "\n" + self._head
+    if self._loc_error:
+      ret += "Error Location\n" + str(self._loc_error)
+    if self._loc_alloc:
+      ret += "Alloc Location\n" + str(self._loc_alloc)
+    if self._loc_free:
+      ret += "Free Location\n" + str(self._loc_free)
+    return ret
+
+  def __cmp__(self, other):
+    if not other:
+      return 1
+    ret = 0
+    if self._loc_error:
+      ret = cmp(self._loc_error, other._loc_error)
+    if ret == 0 and self._loc_alloc:
+      ret = cmp(self._loc_alloc, other._loc_alloc)
+    if ret == 0 and self._loc_free:
+      ret = cmp(self._loc_free, other._loc_free)
+    # since title is often not very interesting, we sort against that last
+    if ret == 0:
+      ret = cmp(self._title, other._title)
+    return ret
+
+
+class MessageList:
+  '''A collection of Message objects of a given message type.'''
+  def __init__(self, type):
+    self._type = type
+    self._messages = []
+    self._unique_messages = None
+    self._sublists = None
+    self._bytes = 0
+    
+  def GetType(self):
+    return self._type
+
+  def BeginNewSublist(self):  
+    '''Some message types are logically grouped into sets of messages which
+    should not be mixed in the same list.  Specifically, Memory In Use (MIU),
+    Memory Leak (MLK) and Potential Memory Leak (MPK) are generated in a set
+    all at once, but this generation can happen at multiple distinct times,
+    either via the Purify UI or through Purify API calls.  For example, if
+    Purify is told to dump a list all memory leaks once, and then a few minutes
+    later, the two lists will certainly overlap, so they should be kept
+    in separate lists.
+    In order to accommodate this, MessageList supports the notion of sublists.
+    When the caller determines that one list of messages of a type has ended
+    and a new list has begun, it calls BeginNewSublist() which takes the current
+    set of messages, puts them into a new MessageList and puts that into the
+    sublists array.  Later, when the caller needs to get at these messages, 
+    GetSublists() should be called.
+    '''
+    if len(self._messages):
+      # if this is the first list, no need to make a new one
+      list = MessageList(self._type)
+      list._messages = self._messages
+      if not self._sublists:
+        self._sublists = [list]
+      else:
+        self._sublists.append(list)
+      self._messages = []
+      logging.info("total size: %d" % self._bytes)
+      self._bytes = 0
+
+  def GetSublists(self):
+    '''Returns the current list of sublists.  If there are currently sublists
+    and there are any messages that aren't in a sublist, BeginNewSublist() is
+    called implicitly by this method to force those ungrouped messages into
+    their own sublist.
+    '''
+    if self._sublists and len(self._sublists) and len(self._messages):
+      self.BeginNewSublist()
+    return self._sublists
+
+  def AddMessage(self, msg):
+    '''Adds a message to this MessageList.'''
+    # TODO(erikkay): assert if _unique_messages exists
+    self._messages.append(msg)
+    self._bytes += msg._bytes
+
+  def AllMessages(self):
+    '''Returns an array of all Message objects in this MessageList. '''
+    # TODO(erikkay): handle case with sublists
+    return self._messages
+
+  def UniqueMessages(self):
+    '''Returns an array of the unique normalized Message objects in this 
+    MessageList.
+    '''
+    # the list is lazily computed since we have to create a sorted list,
+    # which is only valid once all messages have been added
+    # TODO(erikkay): handle case with sublists
+    if not self._unique_messages:
+      self._unique_messages = list(set(self._messages))
+      self._unique_messages.sort()
+    return self._unique_messages
+
+  def UniqueMessageGroups(self):
+    '''Returns a dictionary mapping Message group names to arrays of uniqued
+    normalized Message objects in this MessageList.
+    '''
+    unique = self.UniqueMessages()
+    groups = {}
+    for msg in unique:
+      group = msg.GetGroup()
+      if not group in groups:
+        groups[group] = []
+      groups[group].append(msg)
+    return groups
diff --git a/tools/purify/purify_test.py b/tools/purify/purify_test.py
new file mode 100644
index 0000000..c2c1b76
--- /dev/null
+++ b/tools/purify/purify_test.py
@@ -0,0 +1,249 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# purify_test.py
+
+'''Runs an exe  through Purify and verifies that Purify was
+able to successfully instrument and run it.  The original purpose was 
+to be able to identify when a change to our code breaks our ability to Purify 
+the app. This can happen with seemingly innocuous changes to code due to bugs 
+in Purify, and is notoriously difficult to track down when it does happen.
+Perhaps more importantly in the long run, this can also automate detection of
+leaks and other memory bugs.  It also may be useful to allow people to run
+Purify in a consistent manner without having to worry about broken PATHs,
+corrupt instrumentation, or other per-machine flakiness that Purify is
+sometimes subject to.
+'''
+
+import glob
+import logging
+import optparse
+import os
+import re
+import shutil
+import sys
+import time
+
+import google.path_utils
+
+# local modules
+import common
+import purify_analyze
+
+class Purify(common.Rational):
+  def __init__(self):
+    common.Rational.__init__(self)
+    self._data_dir = None
+
+  def CreateOptionParser(self):
+    common.Rational.CreateOptionParser(self)
+    self._parser.description = __doc__
+    self._parser.add_option("-e", "--echo_to_stdout",
+                      dest="echo_to_stdout", action="store_true", default=False,
+                      help="echo purify output to standard output")
+    self._parser.add_option("-b", "--baseline",
+                      dest="baseline", action="store_true", default=False,
+                      help="create baseline error files")
+    self._parser.add_option("-n", "--name",
+                      dest="name", default=None,
+                      help="name of the test being run "
+                           "(used for output filenames)")
+    self._parser.add_option("", "--source_dir",
+                            help="path to top of source tree for this build"
+                                 "(used to normalize source paths in baseline)")
+    self._parser.add_option("", "--exe",
+                            help="The actual exe to instrument which is "
+                                 "different than the program being run.  This "
+                                 "is useful when the exe you want to purify is "
+                                 "run by another script or program.")
+    self._parser.add_option("", "--data_dir",
+                            help="path to where purify data files live")
+
+  def ParseArgv(self):
+    if common.Rational.ParseArgv(self):
+      if self._options.exe:
+        self._exe = self._options.exe;
+        if not os.path.isfile(self._exe):
+          logging.error("file doesn't exist " + self._exe)
+          return False
+        self._exe_dir = common.FixPath(os.path.abspath(os.path.dirname(self._exe)))
+      self._echo_to_stdout = self._options.echo_to_stdout
+      self._baseline = self._options.baseline
+      self._name = self._options.name
+      if not self._name:
+        self._name = os.path.basename(self._exe)
+      # _out_file can be set in common.Rational.ParseArgv
+      if not self._out_file:
+        self._out_file = os.path.join(self._latest_dir, "%s.txt" % self._name)
+      self._source_dir = self._options.source_dir
+      self._data_dir = self._options.data_dir
+      if not self._data_dir:
+        self._data_dir = os.path.join(script_dir, "data")
+      return True
+    return False
+
+  def _PurifyCommand(self):
+    cmd = [common.PURIFY_PATH, "/CacheDir=" + self._cache_dir]
+    return cmd
+
+  def Setup(self):
+    script_dir = google.path_utils.ScriptDir()
+    self._latest_dir = os.path.join(script_dir, "latest")
+    if common.Rational.Setup(self):
+      pft_file = os.path.join(script_dir, "data", "filters.pft")
+      shutil.copyfile(pft_file, self._exe.replace(".exe", "_exe.pft"))
+      string_list = [
+          "[Purify]",
+          "option -cache-dir=\"%s\"" % (self._cache_dir),
+          "option -save-text-data=\"%s\"" % (common.FixPath(self._out_file)),
+          "option -alloc-call-stack-length=30",
+          "option -error-call-stack-length=30",
+          "option -free-call-stack-length=30",
+          "option -leaks-at-exit=yes",
+          "option -in-use-at-exit=no"
+          ]
+      ini_file = self._exe.replace(".exe", "_pure.ini")
+      if os.path.isfile(ini_file):
+        ini_file_orig = ini_file + ".Original"
+        if not os.path.isfile(ini_file_orig):
+          os.rename(ini_file, ini_file_orig)
+      try:
+        f = open(ini_file, "w+")
+        f.write('\n'.join(string_list))
+      except IOError, (errno, strerror):
+        logging.error("error writing to file %s (%d, %s)" % ini_file, errno,
+                      strerror)
+        return False
+      if f:
+        f.close()
+      return True
+    return False
+
+  def Instrument(self):
+    if not os.path.isfile(self._exe):
+      logging.error("file doesn't exist " + self._exe)
+      return False
+    cmd = self._PurifyCommand()
+    # /Run=no means instrument only, /Replace=yes means replace the exe in place
+    cmd.extend(["/Run=no", "/Replace=yes"])
+    cmd.append(os.path.abspath(self._exe))
+    return common.Rational.Instrument(self, cmd)
+
+  def _ExistingOutputFiles(self):
+    pat_multi =  re.compile('(.*)%[0-9]+d(.*)')
+    m = pat_multi.match(self._out_file)
+    if m:
+      g = m.group(1) + '[0-9]*' + m.group(2)
+      out = glob.glob(g)
+      if os.path.isfile(m.group(1) + m.group(2)):
+        out.append(m.group(1) + m.group(2))
+      return out
+    if not os.path.isfile(self._out_file):
+      return []
+    return [self._out_file]
+
+  def Execute(self):
+    # delete the old file(s) to make sure that this run actually generated
+    # something new
+    out_files = self._ExistingOutputFiles()
+    for f in out_files:
+      os.remove(f)
+    common.Rational.Execute(self, [])
+    # Unfortunately, when we replace the exe, there's no way here to figure out
+    # if purify is actually going to output a file or if the exe just crashed
+    # badly.  The reason is that it takes some small amount of time for purify
+    # to dump out the file.
+    count = 60
+    while count > 0 and not os.path.isfile(self._out_file):
+      time.sleep(0.2)
+      count -= 1
+    # Always return true, even if Execute failed - we'll depend on Analyze to
+    # determine if the run was valid.
+    return True
+
+  def Analyze(self):
+    out_files = self._ExistingOutputFiles()
+    if not len(out_files):
+      logging.info("no output files matching %s" % self._out_file)
+      return -1
+    pa = purify_analyze.PurifyAnalyze(out_files, self._echo_to_stdout,
+                                      self._name, self._source_dir,
+                                      self._data_dir)
+    if not pa.ReadFile():
+      # even though there was a fatal error during Purify, it's still useful
+      # to see the normalized output
+      pa.PrintSummary()
+      if self._baseline:
+        logging.warning("baseline not generated due to fatal error")
+      else:
+        logging.warning("baseline comparison skipped due to fatal error")
+      return -1
+    if self._baseline:
+      pa.PrintSummary(False)
+      if pa.SaveResults():
+        return 0
+      return -1
+    else:
+      retcode = pa.CompareResults()
+      if retcode != 0:
+        pa.SaveResults(self._latest_dir)
+      pa.PrintSummary()
+      # with more than one output file, it's also important to emit the bug
+      # report which includes info on the arguments that generated each stack
+      if len(out_files) > 1:
+        pa.PrintBugReport()
+      return retcode
+
+  def Cleanup(self):
+    common.Rational.Cleanup(self);
+    cmd = self._PurifyCommand()
+    # undo the /Replace=yes that was done in Instrument(), which means to 
+    # remove the instrumented exe, and then rename exe.Original back to exe.
+    cmd.append("/UndoReplace")
+    cmd.append(os.path.abspath(self._exe))
+    common.RunSubprocess(cmd, self._timeout, detach=True)
+    # if we overwrote an existing ini file, restore it
+    ini_file = self._exe.replace(".exe", "_pure.ini")
+    if os.path.isfile(ini_file):
+      os.remove(ini_file)
+    ini_file_orig = ini_file + ".Original"
+    if os.path.isfile(ini_file_orig):
+      os.rename(ini_file_orig, ini_file)
+    # remove the pft file we wrote out
+    pft_file = self._exe.replace(".exe", "_exe.pft")
+    if os.path.isfile(pft_file):
+      os.remove(pft_file)
+
+
+if __name__ == "__main__":
+  rational = Purify()
+  retcode = rational.Run()
+  sys.exit(retcode)
+
diff --git a/tools/purify/quantify_test.py b/tools/purify/quantify_test.py
new file mode 100644
index 0000000..054af5d
--- /dev/null
+++ b/tools/purify/quantify_test.py
@@ -0,0 +1,85 @@
+#!/bin/env python
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# quantify_test.py
+
+'''Runs an app through Quantify and verifies that Quantify was able to 
+successfully instrument and run it.  The original purpose was to allow people
+to run Quantify in a consistent manner without having to worry about broken 
+PATHs, corrupt instrumentation, or other per-machine flakiness that Quantify is
+sometimes subject to.  Unlike purify_test, the output from quantify_test is
+a binary file, which is much more useful in manual analysis.  As such, this
+tool is not particularly interesting for automated analysis yet.
+'''
+
+import os
+import sys
+
+# local modules
+import common
+
+class Quantify(common.Rational):
+  def __init__(self):
+    common.Rational.__init__(self)
+    
+  def CreateOptionParser(self):
+    common.Rational.CreateOptionParser(self)
+    self._parser.description = __doc__
+
+  def ParseArgv(self):
+    if common.Rational.ParseArgv(self):
+      if not self._out_file:
+        self._out_file = os.path.join(self._cache_dir,
+                                      "%s.qfy" % (os.path.basename(self._exe)))
+      return True
+    return False
+
+  def Instrument(self):
+    proc = [common.QUANTIFYE_PATH, "-quantify",
+            '-quantify_home="' + common.PPLUS_PATH + '"' ,
+            "/CacheDir=" + self._cache_dir,
+            "-first-search-dir=" + self._exe_dir, self._exe]
+    return common.Rational.Instrument(self, proc)
+  
+  def Execute(self):
+    # TODO(erikkay): add an option to also do /SaveTextData and add an
+    # Analyze method for automated analysis of that data.
+    proc = [common.QUANTIFYW_PATH, "/CacheDir=" + self._cache_dir, 
+            "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no",
+            "/SaveData=" + self._out_file]
+    return common.Rational.Execute(self, proc)
+
+if __name__ == "__main__":
+  retcode = -1
+  rational = Quantify()
+  if rational.Run():
+    retcode = 0
+  sys.exit(retcode)
+
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-27 00:12:16 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-27 00:12:16 +0000
commit	920c091ac3ee15079194c82ae8a7a18215f3f23c (patch)
tree	d28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/purify
parent	ae2c20f398933a9e86c387dcc465ec0f71065ffc (diff)
download	chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2