diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
commit | 920c091ac3ee15079194c82ae8a7a18215f3f23c (patch) | |
tree | d28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/purify | |
parent | ae2c20f398933a9e86c387dcc465ec0f71065ffc (diff) | |
download | chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2 |
Add tools to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/purify')
-rw-r--r-- | tools/purify/chrome_tests.py | 259 | ||||
-rwxr-xr-x | tools/purify/chrome_tests.sh | 9 | ||||
-rw-r--r-- | tools/purify/common.py | 293 | ||||
-rw-r--r-- | tools/purify/data/filters.pft | bin | 0 -> 11294 bytes | |||
-rw-r--r-- | tools/purify/data/ignore.txt | 12 | ||||
-rw-r--r-- | tools/purify/purify_analyze.py | 874 | ||||
-rw-r--r-- | tools/purify/purify_coverage.py | 111 | ||||
-rw-r--r-- | tools/purify/purify_inuse.py | 116 | ||||
-rw-r--r-- | tools/purify/purify_message.py | 634 | ||||
-rw-r--r-- | tools/purify/purify_test.py | 249 | ||||
-rw-r--r-- | tools/purify/quantify_test.py | 85 |
11 files changed, 2642 insertions, 0 deletions
diff --git a/tools/purify/chrome_tests.py b/tools/purify/chrome_tests.py new file mode 100644 index 0000000..9ae22dd --- /dev/null +++ b/tools/purify/chrome_tests.py @@ -0,0 +1,259 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# chrome_tests.py + +''' Runs various chrome tests through purify_test.py +''' + +import logging +import optparse +import os +import stat +import sys + +import google.logging_utils +import google.path_utils +import google.platform_utils + +import common + +class TestNotFound(Exception): pass + +class ChromeTests: + + def __init__(self, options, args, test): + # the known list of tests + self._test_list = {"test_shell": self.TestTestShell, + "unit": self.TestUnit, + "net": self.TestNet, + "ipc": self.TestIpc, + "base": self.TestBase, + "layout": self.TestLayout, + "ui": self.TestUI} + + if test not in self._test_list: + raise TestNotFound("Unknown test: %s" % test) + + self._options = options + self._args = args + self._test = test + + script_dir = google.path_utils.ScriptDir() + utility = google.platform_utils.PlatformUtility(script_dir) + # Compute the top of the tree (the "source dir") from the script dir (where + # this script lives). We assume that the script dir is in tools/purify + # relative to the top of the tree. + self._source_dir = os.path.dirname(os.path.dirname(script_dir)) + # since this path is used for string matching, make sure it's always + # an absolute Windows-style path + self._source_dir = utility.GetAbsolutePath(self._source_dir) + purify_test = os.path.join(script_dir, "purify_test.py") + self._command_preamble = ["python.exe", purify_test, "--echo_to_stdout", + "--source_dir=%s" % (self._source_dir), + "--save_cache"] + + def _DefaultCommand(self, module, exe=None): + '''Generates the default command array that most tests will use.''' + module_dir = os.path.join(self._source_dir, module) + if module == "chrome": + # unfortunately, not all modules have the same directory structure + self._data_dir = os.path.join(module_dir, "test", "data", "purify") + else: + self._data_dir = os.path.join(module_dir, "data", "purify") + if not self._options.build_dir: + dir_chrome = os.path.join(self._source_dir, "chrome", "Release") + dir_module = os.path.join(module_dir, "Release") + if exe: + exe_chrome = os.path.join(dir_chrome, exe) + exe_module = os.path.join(dir_module, exe) + if os.path.isfile(exe_chrome) and not os.path.isfile(exe_module): + self._options.build_dir = dir_chrome + elif os.path.isfile(exe_module) and not os.path.isfile(exe_chrome): + self._options.build_dir = dir_module + elif os.stat(exe_module)[stat.ST_MTIME] > os.stat(exe_chrome)[stat.ST_MTIME]: + self._options.build_dir = dir_module + else: + self._options.build_dir = dir_chrome + else: + if os.path.isdir(dir_chrome) and not os.path.isdir(dir_module): + self._options.build_dir = dir_chrome + elif os.path.isdir(dir_module) and not os.path.isdir(dir_chrome): + self._options.build_dir = dir_module + elif os.stat(dir_module)[stat.ST_MTIME] > os.stat(dir_chrome)[stat.ST_MTIME]: + self._options.build_dir = dir_module + else: + self._options.build_dir = dir_chrome + + cmd = self._command_preamble + cmd.append("--data_dir=%s" % self._data_dir) + if self._options.baseline: + cmd.append("--baseline") + if self._options.verbose: + cmd.append("--verbose") + if exe: + cmd.append(os.path.join(self._options.build_dir, exe)) + return cmd + + def Run(self): + ''' Runs the test specified by command-line argument --test ''' + logging.info("running test %s" % (self._test)) + return self._test_list[self._test]() + + def _ReadGtestFilterFile(self, name, cmd): + '''Read a file which is a list of tests to filter out with --gtest_filter + and append the command-line option to cmd. + ''' + filters = [] + filename = os.path.join(self._data_dir, name + ".gtest.txt") + if os.path.exists(filename): + f = open(filename, 'r') + for line in f.readlines(): + if line.startswith("#") or line.startswith("//") or line.isspace(): + continue + line = line.rstrip() + filters.append(line) + gtest_filter = self._options.gtest_filter + if len(filters): + if gtest_filter: + gtest_filter += ":" + if gtest_filter.find("-") < 0: + gtest_filter += "-" + else: + gtest_filter = "-" + gtest_filter += ":".join(filters) + if gtest_filter: + cmd.append("--gtest_filter=%s" % gtest_filter) + + def SimpleTest(self, module, name): + cmd = self._DefaultCommand(module, name) + self._ReadGtestFilterFile(name, cmd) + return common.RunSubprocess(cmd, 0) + + def ScriptedTest(self, module, exe, name, script, multi=False, cmd_args=None): + '''Purify a target exe, which will be executed one or more times via a + script or driver program. + Args: + module - which top level component this test is from (webkit, base, etc.) + exe - the name of the exe (it's assumed to exist in build_dir) + name - the name of this test (used to name output files) + script - the driver program or script. If it's python.exe, we use + search-path behavior to execute, otherwise we assume that it is in + build_dir. + multi - a boolean hint that the exe will be run multiple times, generating + multiple output files (without this option, only the last run will be + recorded and analyzed) + cmd_args - extra arguments to pass to the purify_test.py script + ''' + cmd = self._DefaultCommand(module) + exe = os.path.join(self._options.build_dir, exe) + cmd.append("--exe=%s" % exe) + cmd.append("--name=%s" % name) + if multi: + out = os.path.join(google.path_utils.ScriptDir(), + "latest", "%s%%5d.txt" % name) + cmd.append("--out_file=%s" % out) + if cmd_args: + cmd.extend(cmd_args) + if script[0] != "python.exe" and not os.path.exists(script[0]): + script[0] = os.path.join(self._options.build_dir, script[0]) + cmd.extend(script) + self._ReadGtestFilterFile(name, cmd) + return common.RunSubprocess(cmd, 0) + + def TestBase(self): + return self.SimpleTest("base", "base_unittests.exe") + + def TestIpc(self): + return self.SimpleTest("chrome", "ipc_tests.exe") + + def TestNet(self): + return self.SimpleTest("net", "net_unittests.exe") + + def TestTestShell(self): + return self.SimpleTest("webkit", "test_shell_tests.exe") + + def TestUnit(self): + return self.SimpleTest("chrome", "unit_tests.exe") + + def TestLayout(self): + script = os.path.join(self._source_dir, "webkit", "tools", "layout_tests", + "run_webkit_tests.py") + script_cmd = ["python.exe", script, "--run-singly", "-v", + "--noshow-results", "--time-out-ms=200000"] + if len(self._args): + # if the arg is a txt file, then treat it as a list of tests + if os.path.isfile(self._args[0]) and self._args[0][-4:] == ".txt": + script_cmd.append("--test-list=%s" % self._args[0]) + else: + script_cmd.extend(self._args) + self.ScriptedTest("webkit", "test_shell.exe", "layout", + script_cmd, multi=True, cmd_args=["--timeout=0"]) + # since layout tests take so long to run, having the test red on buildbot + # isn't very useful + return 0 + + def TestUI(self): + return self.ScriptedTest("chrome", "chrome.exe", "ui_tests", + ["ui_tests.exe", "--single-process", "--test-timeout=100000000"], multi=True) + +def _main(argv): + parser = optparse.OptionParser("usage: %prog -b <dir> -t <test> " + "[-t <test> ...]") + parser.disable_interspersed_args() + parser.add_option("-b", "--build_dir", + help="the location of the output of the compiler output") + parser.add_option("-t", "--test", action="append", + help="which test to run") + parser.add_option("", "--baseline", action="store_true", default=False, + help="generate baseline data instead of validating") + parser.add_option("", "--gtest_filter", + help="additional arguments to --gtest_filter") + parser.add_option("-v", "--verbose", action="store_true", default=False, + help="verbose output - enable debug log messages") + (options, args) = parser.parse_args() + + if options.verbose: + google.logging_utils.config_root(logging.DEBUG) + else: + google.logging_utils.config_root() + + if not options.test or not len(options.test): + parser.error("--test not specified") + + for t in options.test: + tests = ChromeTests(options, args, t) + ret = tests.Run() + if ret: return ret + return 0 + +if __name__ == "__main__": + ret = _main(sys.argv) + sys.exit(ret) diff --git a/tools/purify/chrome_tests.sh b/tools/purify/chrome_tests.sh new file mode 100755 index 0000000..3c9c7da --- /dev/null +++ b/tools/purify/chrome_tests.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +system_root=`cygpath "$SYSTEMROOT"` +export PATH="/usr/bin:$system_root/system32:$system_root:$system_root/system32/WBEM" + +exec_dir=$(dirname $0) + +"$exec_dir/../../third_party/python_24/python.exe" \ + "$exec_dir/chrome_tests.py" "$@" diff --git a/tools/purify/common.py b/tools/purify/common.py new file mode 100644 index 0000000..3dd72af --- /dev/null +++ b/tools/purify/common.py @@ -0,0 +1,293 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# common.py + +""" Common code used by purify_test.py and quantify_test.py in order to automate +running of Rational Purify and Quantify in a consistent manner. +""" + +# Purify and Quantify have a front-end (e.g. quantifyw.exe) which talks to a +# back-end engine (e.g. quantifye.exe). The back-end seems to handle +# instrumentation, while the front-end controls program execution and +# measurement. The front-end will dynamically launch the back-end if +# instrumentation is needed (sometimes in the middle of a run if a dll is +# loaded dynamically). +# In an ideal world, this script would simply execute the front-end and check +# the output. However, purify is not the most reliable or well-documented app +# on the planet, and my attempts to get it to run this way led to the back-end +# engine hanging during instrumentation. The workaround to this was to run two +# passes, first running the engine to do instrumentation rather than letting +# the front-end do it for you, then running the front-end to actually do the +# run. Each time through we're deleting all of the instrumented files in the +# cache to ensure that we're testing that instrumentation works from scratch. +# (although this can be changed with an option) + +import datetime +import logging +import optparse +import os +import subprocess +import sys +import tempfile +import time + +import google.logging_utils + +# hard-coded location of Rational files and directories +RATIONAL_PATH = os.path.join("C:\\", "Program Files", "Rational") +COMMON_PATH = os.path.join(RATIONAL_PATH, "common") +PPLUS_PATH = os.path.join(RATIONAL_PATH, "PurifyPlus") +PURIFY_PATH = os.path.join(COMMON_PATH, "purify.exe") +PURIFYW_PATH = os.path.join(PPLUS_PATH, "purifyW.exe") +PURIFYE_PATH = os.path.join(PPLUS_PATH, "purifye.exe") +QUANTIFYE_PATH = os.path.join(PPLUS_PATH, "quantifye.exe") +QUANTIFYW_PATH = os.path.join(PPLUS_PATH, "quantifyw.exe") + +class TimeoutError(Exception): pass + +def RunSubprocess(proc, timeout=0, detach=False): + """ Runs a subprocess, polling every .2 seconds until it finishes or until + timeout is reached. Then kills the process with taskkill. A timeout <= 0 + means no timeout. + + Args: + proc: list of process components (exe + args) + timeout: how long to wait before killing, <= 0 means wait forever + detach: Whether to pass the DETACHED_PROCESS argument to CreateProcess + on Windows. This is used by Purify subprocesses on buildbot which + seem to get confused by the parent console that buildbot sets up. + """ + logging.info("running %s" % (" ".join(proc))) + if detach: + # see MSDN docs for "Process Creation Flags" + DETACHED_PROCESS = 0x8 + p = subprocess.Popen(proc, creationflags=DETACHED_PROCESS) + else: + p = subprocess.Popen(proc) + if timeout <= 0: + while p.poll() is None: + time.sleep(0.2) + else: + wait_until = time.time() + timeout + while p.poll() is None and time.time() < wait_until: + time.sleep(0.2) + result = p.poll() + if result is None: + subprocess.call(["taskkill", "/T", "/F", "/PID", str(p.pid)]) + logging.error("KILLED %d" % (p.pid)) + # give the process a chance to actually die before continuing + # so that cleanup can happen safely + time.sleep(1.0) + logging.error("TIMEOUT waiting for %s" % (proc[0])) + raise TimeoutError(proc[0]) + if result: + logging.error("%s exited with non-zero result code %d" % (proc[0], result)) + return result + +def FixPath(path): + """We pass computed paths to Rational as arguments, so these paths must be + valid windows paths. When running in cygwin's python, computed paths + wind up looking like /cygdrive/c/..., so we need to call out to cygpath + to fix them up. + """ + if sys.platform != "cygwin": + return path + p = subprocess.Popen(["cygpath", "-a", "-m", path], stdout=subprocess.PIPE) + return p.communicate()[0].rstrip() + +class Rational(object): + ''' Common superclass for Purify and Quantify automation objects. Handles + common argument parsing as well as the general program flow of Instrument, + Execute, Analyze. + ''' + + def __init__(self): + google.logging_utils.config_root() + self._out_file = None + + def Run(self): + '''Call this to run through the whole process: + Setup, Instrument, Execute, Analyze''' + start = datetime.datetime.now() + retcode = -1 + if self.Setup(): + if self.Instrument(): + if self.Execute(): + retcode = self.Analyze() + if not retcode: + logging.info("instrumentation and execution completed successfully.") + else: + logging.error("Analyze failed") + else: + logging.error("Execute failed") + else: + logging.error("Instrument failed") + self.Cleanup() + else: + logging.error("Setup failed") + end = datetime.datetime.now() + seconds = (end - start).seconds + hours = seconds / 3600 + seconds = seconds % 3600 + minutes = seconds / 60 + seconds = seconds % 60 + logging.info("elapsed time: %02d:%02d:%02d" % (hours, minutes, seconds)) + return retcode + + def CreateOptionParser(self): + '''Creates OptionParser with shared arguments. Overridden by subclassers + to add custom arguments.''' + parser = optparse.OptionParser("usage: %prog [options] <program to test>") + # since the trailing program likely has command-line args of itself + # we need to stop parsing when we reach the first positional arg + parser.disable_interspersed_args() + parser.add_option("-o", "--out_file", dest="out_file", metavar="OUTFILE", + default="", + help="output data is written to OUTFILE") + parser.add_option("-s", "--save_cache", + dest="save_cache", action="store_true", default=False, + help="don't delete instrumentation cache") + parser.add_option("-c", "--cache_dir", dest="cache_dir", metavar="CACHEDIR", + default="", + help="location of instrumentation cache is CACHEDIR") + parser.add_option("-m", "--manual", + dest="manual_run", action="store_true", default=False, + help="target app is being run manually, don't timeout") + parser.add_option("-t", "--timeout", + dest="timeout", metavar="TIMEOUT", default=10000, + help="timeout in seconds for the run (default 10000)") + parser.add_option("-v", "--verbose", action="store_true", default=False, + help="verbose output - enable debug log messages") + self._parser = parser + + def Setup(self): + if self.ParseArgv(): + logging.info("instrumentation cache in %s" % self._cache_dir) + logging.info("output saving to %s" % self._out_file) + # Ensure that Rational's common dir and cache dir are in the front of the + # path. The common dir is required for purify to run in any case, and + # the cache_dir is required when using the /Replace=yes option. + os.environ["PATH"] = (COMMON_PATH + ";" + self._cache_dir + ";" + + os.environ["PATH"]) + # clear the cache to make sure we're starting clean + self.__ClearInstrumentationCache() + return True + return False + + def Instrument(self, proc): + '''Instrument the app to be tested. Full instrumentation command-line + provided by subclassers via proc.''' + logging.info("starting instrumentation...") + if RunSubprocess(proc, self._timeout, detach=True) == 0: + if "/Replace=yes" in proc: + if os.path.exists(self._exe + ".Original"): + return True + elif os.path.isdir(self._cache_dir): + for cfile in os.listdir(self._cache_dir): + # TODO(erikkay): look for the actual munged purify filename + ext = os.path.splitext(cfile)[1] + if ext == ".exe": + return True + logging.error("no instrumentation data generated") + return False + + def Execute(self, proc): + ''' Execute the app to be tested after successful instrumentation. + Full execution command-line provided by subclassers via proc.''' + logging.info("starting execution...") + # note that self._args begins with the exe to be run + proc += self._args + if RunSubprocess(proc, self._timeout) == 0: + return True + return False + + def Analyze(self): + '''Analyze step after a successful Execution. Should be overridden + by the subclasser if instrumentation is desired. + Returns 0 for success, -88 for warning (see ReturnCodeCommand) and anything + else for error + ''' + return -1 + + def ParseArgv(self): + '''Parses arguments according to CreateOptionParser + Subclassers must override if they have extra arguments.''' + self.CreateOptionParser() + (self._options, self._args) = self._parser.parse_args() + if self._options.verbose: + google.logging_utils.config_root(logging.DEBUG) + self._save_cache = self._options.save_cache + self._manual_run = self._options.manual_run + if self._manual_run: + logging.info("manual run - timeout disabled") + self._timeout = 0 + else: + self._timeout = int(self._options.timeout) + logging.info("timeout set to %ds" % (self._timeout)) + if self._save_cache: + logging.info("saving instrumentation cache") + if not self._options.cache_dir: + try: + temp_dir = os.environ["TEMP"] + except KeyError: + temp_dir = tempfile.mkdtemp() + self._cache_dir = os.path.join(FixPath(temp_dir), + "instrumentation_cache") + else: + self._cache_dir = FixPath(os.path.abspath(self._options.cache_dir)) + if self._options.out_file: + self._out_file = FixPath(os.path.abspath(self._options.out_file)) + if len(self._args) == 0: + self._parser.error("missing program to %s" % (self.__class__.__name__,)) + return False + self._exe = self._args[0] + self._exe_dir = FixPath(os.path.abspath(os.path.dirname(self._exe))) + return True + + def Cleanup(self): + # delete the cache to avoid filling up the hard drive when we're using + # temporary directory names + self.__ClearInstrumentationCache() + + def __ClearInstrumentationCache(self): + if not self._save_cache: + logging.info("clearing instrumentation cache %s" % self._cache_dir) + if os.path.isdir(self._cache_dir): + for cfile in os.listdir(self._cache_dir): + file = os.path.join(self._cache_dir, cfile); + if os.path.isfile(file): + try: + os.remove(file) + except: + logging.warning("unable to delete file %s: %s" % (file, + sys.exc_info()[0])) + + diff --git a/tools/purify/data/filters.pft b/tools/purify/data/filters.pft Binary files differnew file mode 100644 index 0000000..a353c12 --- /dev/null +++ b/tools/purify/data/filters.pft diff --git a/tools/purify/data/ignore.txt b/tools/purify/data/ignore.txt new file mode 100644 index 0000000..16a11b2 --- /dev/null +++ b/tools/purify/data/ignore.txt @@ -0,0 +1,12 @@ +# See bug 1157381 +Pure: Trap bits found in live chunk + +# See bugs 1151263 and 1164562 +Memory leak .+ allocated in InitSecurityInterfaceA \[SECUR32\.DLL\] + +# See bug 1163766 +# Ugly regexps are trying to deal with Purify's demangling bugs. +Memory leak .+ allocated in \?NewRunnableMethod.+ExpireHistoryBackend.+ScopedRunnableMethodFactory +Memory leak .+ allocated in RevocableStore::RevokeAll\(void\) +Memory leak .+ allocated in \?NewRunnableMethod.+CommitLaterTask.+CancelableTask.+CommitLaterTask +Memory leak .+ allocated in history::HistoryBackend::ScheduleCommit\(void\) diff --git a/tools/purify/purify_analyze.py b/tools/purify/purify_analyze.py new file mode 100644 index 0000000..4625a70 --- /dev/null +++ b/tools/purify/purify_analyze.py @@ -0,0 +1,874 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# purify_analyze.py + +''' Given a Purify text file, parses messages, normalizes and uniques them. +If there's an existing baseline of this data, it can compare against that +baseline and return an error code if there are any new errors not in the +baseline. ''' + +import logging +import optparse +import os +import re +import sys + +import google.logging_utils +import google.path_utils + +import purify_message + +class MemoryTreeNode(object): + ''' A node in a tree representing stack traces of memory allocation. + Essentially, each node in the tree is a hashtable mapping a child + function name to a child node. Each node contains the total number + of bytes of all of its descendants. + See also: PurifyAnalyze.PrintMemoryInUse() + ''' + + pat_initializer = re.compile('(.*)\`dynamic initializer for \'(.*)\'\'') + + @classmethod + def CreateTree(cls, message_list): + '''Creates a tree from message_list. All of the Message objects are built + into a tree with a default "ROOT" root node that is then returned. + Args: + message_list: a MessageList object. + ''' + root = MemoryTreeNode("ROOT", 0, 0) + msgs = message_list.AllMessages() + for msg in msgs: + bytes = msg._bytes + blocks = msg._blocks + stack = msg.GetAllocStack() + stack_lines = stack.GetLines() + size = len(stack_lines) + node = root + node._AddAlloc(bytes, blocks) + counted = False + # process stack lines from the bottom up to build a call-stack tree + functions = [line["function"] for line in stack_lines] + functions.reverse() + for func in functions: + if node == root: + m = MemoryTreeNode.pat_initializer.match(func) + if m: + node = node._AddChild("INITIALIZERS", bytes, blocks) + func = m.group(1) + m.group(2) + # don't process ellided or truncated stack lines + if func: + node = node._AddChild(func, bytes, blocks) + counted = True + if not counted: + # Nodes with no stack frames in our code wind up not being counted + # above. These seem to be attributable to Windows DLL + # initialization, so just throw them into that bucket. + node._AddChild("WINDOWS", bytes, blocks) + return root + + def __init__(self, function, bytes, blocks): + ''' + Args: + function: A string representing a unique method or function. + bytes: initial number of bytes allocated in this node + blocks: initial number of blocks allocated in this node + ''' + self._function = function + self._bytes = bytes + self._blocks = blocks + self._allocs = 1 + self._children = {} + + def _AddAlloc(self, bytes, blocks): + '''Adds bytes and blocks to this node's allocation totals + ''' + self._allocs += 1 + self._bytes += bytes + self._blocks += blocks + + def _AddChild(self, function, bytes, blocks): + '''Adds a child node if not present. Otherwise, adds + bytes and blocks to it's allocation total. + ''' + if function not in self._children: + self._children[function] = MemoryTreeNode(function, bytes, blocks) + else: + self._children[function]._AddAlloc(bytes, blocks) + return self._children[function] + + def __cmp__(self, other): + # sort by size, then blocks, then function name + return cmp((self._bytes, self._blocks, self._function), + (other._bytes, other._blocks, other._function)) + + def __str__(self): + return "(%d bytes, %d blocks, %d allocs) %s" % ( + self._bytes, self._blocks, self._allocs, self._function) + + def PrintRecursive(self, padding="", byte_filter=0): + '''Print the tree and all of its children recursively (depth-first). All + nodes at a given level of the tree are sorted in descending order by size. + + Args: + padding: Printed at the front of the line. Each recursive call adds a + single space character. + byte_filter: a number of bytes below which we'll prune the tree + ''' + print "%s%s" % (padding, self) + padding = padding + " " + # sort the children in descending order (see __cmp__) + swapped = self._children.values() + swapped.sort(reverse=True) + rest_bytes = 0 + rest_blocks = 0 + rest_allocs = 0 + for node in swapped: + if node._bytes < byte_filter: + rest_bytes += node._bytes + rest_blocks += node._blocks + rest_allocs += node._allocs + else: + node.PrintRecursive(padding, byte_filter) + if rest_bytes: + print "%s(%d bytes, %d blocks, %d allocs) PRUNED" % (padding, + rest_bytes, rest_blocks, rest_allocs) + +class PurifyAnalyze: + ''' Given a Purify text file, parses all of the messages inside of it and + normalizes them. Provides a mechanism for comparing this normalized set + against a baseline and detecting if new errors have been introduced. ''' + + # a line which is the start of a new message + pat_msg_start = re.compile('^\[([A-Z])\] (.*)$') + # a message with a specific type + pat_msg_type = re.compile('^([A-Z]{3}): (.*)$') + pat_leak_summary = re.compile("Summary of ... memory leaks") + pat_miu_summary = re.compile("Summary of ... memory in use") + pat_starting = re.compile("Starting Purify'd ([^\\s]+\\\\[^\\s]+)") + pat_arguments = re.compile("\s+Command line arguments:\s+([^\s].*)") + pat_terminate = re.compile('Message: TerminateProcess called with code') + # Purify treats this as a warning, but for us it's a fatal error. + pat_instrumentation_failed = re.compile('^.* file not instrumented') + # misc to ignore + pat_ignore = (re.compile('^(Start|Exit)ing'), + re.compile('^Program terminated'), + re.compile('^Terminating thread'), + re.compile('^Message: CryptVerifySignature')) + # message types that aren't analyzed + # handled, ignored and continued exceptions will likely never be interesting + # TODO(erikkay): MPK ("potential" memory leaks) may be worth turning on + types_excluded = ("EXH", "EXI", "EXC", "MPK") + + + def __init__(self, files, echo, name=None, source_dir=None, data_dir=None): + # The input file we're analyzing. + self._files = files + # Whether the input file contents should be echoed to stdout. + self._echo = echo + # A symbolic name for the run being analyzed, often the name of the + # exe which was purified. + self._name = name + # The top of the source code tree of the code we're analyzing. + # This prefix is stripped from all filenames in stacks for normalization. + if source_dir: + purify_message.Stack.SetSourceDir(source_dir) + if data_dir: + self._data_dir = data_dir + else: + self._data_dir = os.path.join(google.path_utils.ScriptDir(), "data") + # A map of message_type to a MessageList of that type. + self._message_lists = {} + self._ReadIgnoreFile() + + def _ReadIgnoreFile(self): + '''Read a file which is a list of regexps for either the title or the + top-most visible stack line. + ''' + self._pat_ignore = [] + filenames = [os.path.join(self._data_dir, "ignore.txt"), + os.path.join(google.path_utils.ScriptDir(), "data", "ignore.txt")] + for filename in filenames: + if os.path.exists(filename): + f = open(filename, 'r') + for line in f.readlines(): + if line.startswith("#") or line.startswith("//") or line.isspace(): + continue + line = line.rstrip() + pat = re.compile(line) + if pat: + self._pat_ignore.append(pat) + + def ShouldIgnore(self, msg): + '''Should the message be ignored as irrelevant to analysis ''' + # never ignore memory in use + if msg.Type() == "MIU": + return False + + # check ignore patterns against title and top-most visible stack frames + strings = [msg._title] + err = msg.GetErrorStack() + if err: + line = err.GetTopVisibleStackLine().get('function', None) + if line: + strings.append(line) + alloc = msg.GetAllocStack() + if alloc: + line = alloc.GetTopVisibleStackLine().get('function', None) + if line: + strings.append(line) + for pat in self._pat_ignore: + for str in strings: + if pat.match(str): + logging.debug("Igorning message based on ignore.txt") + logging.debug(msg.NormalizedStr(verbose=True)) + return True + + # unless it's explicitly in the ignore file, never ignore these + if msg.Type() == purify_message.FATAL: + return False + + # certain message types aren't that interesting + if msg.Type() in PurifyAnalyze.types_excluded: + logging.debug("Igorning message because type is excluded") + logging.debug(msg.NormalizedStr(verbose=True)) + return True + # if the message stacks have no local stack frames, we can ignore them + if msg.StacksAllExternal(): + logging.debug("Igorning message because stacks are all external") + logging.debug(msg.NormalizedStr(verbose=True)) + return True + + # Microsoft's STL has a bunch of non-harmful UMRs in it. Most of them + # are filtered out by Purify's default filters and by our explicit ignore + # list. This code notices ones that have made it through so we can add + # them to the ignore list later. + if msg.Type() == "UMR": + if err.GetTopStackLine()['file'].endswith('.'): + logging.debug("non-ignored UMR in STL: %s" % msg._title) + + return False + + def AddMessage(self, msg): + ''' Append the message to an array for its type. Returns boolean indicating + whether the message was actually added or was ignored.''' + if msg: + if self.ShouldIgnore(msg): + return False + if msg.Type() not in self._message_lists: + self._message_lists[msg.Type()] = purify_message.MessageList(msg.Type()) + self._message_lists[msg.Type()].AddMessage(msg) + return True + return False + + def _BeginNewSublist(self, key): + '''See MessageList.BeginNewSublist for details. + ''' + if key in self._message_lists: + self._message_lists[key].BeginNewSublist() + + def ReadFile(self): + ''' Reads a Purify ASCII file and parses and normalizes the messages in + the file. + Returns False if a fatal error was detected, True otherwise. + ''' + # Purify files consist of a series of "messages". These messages have a type + # (designated as a three letter code - see message_type), a severity + # (designated by a one letter code - see message_severity) and some + # textual details. It will often also have a stack trace of the error + # location, and (for memory errors) may also have a stack trace of the + # allocation location. + + fatal_errors = 0 + fatal_exe = "" + + for file in self._files: + exe = "" + error = None + message = None + for line in open(file, mode='rb'): + line = line.rstrip() + m = PurifyAnalyze.pat_msg_start.match(line) + if m: + if exe == fatal_exe: + # since we hit a fatal error in this program, ignore all messages + # until the program changes + continue + # we matched a new message, so if there's an existing one, it's time + # to finish processing it + if message: + message.SetProgram(exe) + if not self.AddMessage(message): + # error is only set if the message we just tried to add would + # otherwise be considered a fatal error. Since AddMessage failed + # (presumably the messages matched the ignore list), we reset + # error to None + error = None + message = None + if error: + if error.Type() == "EXU": + # Don't treat EXU as fatal, since unhandled exceptions + # in other threads don't necessarily lead to the app to exit. + # TODO(erikkay): verify that we still do trap exceptions that lead + # to early termination. + logging.warning(error.NormalizedStr(verbose=True)) + error = None + else: + if len(self._files) > 1: + logging.error("Fatal error in program: %s" % error.Program()) + logging.error(error.NormalizedStr(verbose=True)) + fatal_errors += 1 + error = None + fatal_exe = exe + continue + severity = m.group(1) + line = m.group(2) + m = PurifyAnalyze.pat_msg_type.match(line) + if m: + type = m.group(1) + message = purify_message.Message(severity, type, m.group(2)) + if type == "EXU": + error = message + elif severity == "O": + message = purify_message.Message(severity, purify_message.FATAL, + line) + # This is an internal Purify error, and it means that this run can't + # be trusted and analysis should be aborted. + error = message + elif PurifyAnalyze.pat_instrumentation_failed.match(line): + message = purify_message.Message(severity, purify_message.FATAL, + line) + error = message + elif PurifyAnalyze.pat_terminate.match(line): + message = purify_message.Message(severity, purify_message.FATAL, + line) + error = message + elif PurifyAnalyze.pat_leak_summary.match(line): + # TODO(erikkay): should we do sublists for MLK and MPK too? + # Maybe that means we need to handle "new" and "all" messages + # separately. + #self._BeginNewSublist("MLK") + #self._BeginNewSublist("MPK") + pass + elif PurifyAnalyze.pat_miu_summary.match(line): + # Each time Purify is asked to do generate a list of all memory in use + # or new memory in use, it first emits this summary line. Since the + # different lists can overlap, we need to tell MessageList to begin + # a new sublist. + # TODO(erikkay): should we tag "new" and "all" sublists explicitly + # somehow? + self._BeginNewSublist("MIU") + elif PurifyAnalyze.pat_starting.match(line): + m = PurifyAnalyze.pat_starting.match(line) + exe = m.group(1) + last_slash = exe.rfind("\\") + if not purify_message.Stack.source_dir: + path = os.path.abspath(os.path.join(exe[:last_slash], "..", "..")) + purify_message.Stack.SetSourceDir(path) + if not self._name: + self._name = exe[(last_slash+1):] + else: + unknown = True + for pat in PurifyAnalyze.pat_ignore: + if pat.match(line): + unknown = False + break + if unknown: + logging.error("unknown line " + line) + else: + if message: + message.AddLine(line) + elif PurifyAnalyze.pat_arguments.match(line): + m = PurifyAnalyze.pat_arguments.match(line) + exe += " " + m.group(1) + + # Purify output should never end with a real message + if message: + logging.error("Unexpected message at end of file %s" % file) + + return fatal_errors == 0 + + def GetMessageList(self, key): + if key in self._message_lists: + return self._message_lists[key] + else: + return None + + def PrintSummary(self, echo=None): + ''' Print a summary of how many messages of each type were found. ''' + # make sure everyone else is done first + sys.stderr.flush() + sys.stdout.flush() + if echo == None: + echo = self._echo + logging.info("summary of Purify messages:") + for key in self._message_lists: + list = self._message_lists[key] + unique = list.UniqueMessages() + all = list.AllMessages() + count = 0 + for msg in all: + count += msg._count + logging.info("%s(%s) unique:%d total:%d" % (self._name, + purify_message.GetMessageType(key), len(unique), count)) + if key not in ["MIU"]: + ignore_file = "%s_%s_ignore.txt" % (self._name, key) + ignore_hashes = self._MessageHashesFromFile(ignore_file) + ignored = 0 + + groups = list.UniqueMessageGroups() + group_keys = groups.keys() + group_keys.sort(cmp=lambda x,y: len(groups[y]) - len(groups[x])) + for group in group_keys: + # filter out ignored messages + kept_msgs= [x for x in groups[group] if hash(x) not in ignore_hashes] + ignored += len(groups[group]) - len(kept_msgs) + groups[group] = kept_msgs + if ignored: + logging.info("%s(%s) ignored:%d" % (self._name, + purify_message.GetMessageType(key), ignored)) + total = reduce(lambda x, y: x + len(groups[y]), group_keys, 0) + if total: + print "%s(%s) group summary:" % (self._name, + purify_message.GetMessageType(key)) + print " TOTAL: %d" % total + for group in group_keys: + if len(groups[group]): + print " %s: %d" % (group, len(groups[group])) + if echo: + for group in group_keys: + msgs = groups[group] + if len(msgs) == 0: + continue + print "messages from %s (%d)" % (group, len(msgs)) + print "="*79 + for msg in msgs: + # for the summary output, line numbers are useful + print msg.NormalizedStr(verbose=True) + # make sure stdout is flushed to avoid weird overlaps with logging + sys.stdout.flush() + + def PrintMemoryInUse(self, byte_filter=16384): + ''' Print one or more trees showing a hierarchy of memory allocations. + Args: + byte_filter: a number of bytes below which we'll prune the tree + ''' + list = self.GetMessageList("MIU") + sublists = list.GetSublists() + if not sublists: + sublists = [list] + trees = [] + summaries = [] + # create the trees and summaries + for sublist in sublists: + tree = MemoryTreeNode.CreateTree(sublist) + trees.append(tree) + + # while the tree is a hierarchical assignment from the root/bottom of the + # stack down, the summary is simply adding the total of the top-most + # stack item from our code + summary = {} + total = 0 + summaries.append(summary) + for msg in sublist.AllMessages(): + total += msg._bytes + stack = msg.GetAllocStack() + if stack._all_external: + alloc_caller = "WINDOWS" + else: + lines = stack.GetLines() + for line in lines: + alloc_caller = line["function"] + if alloc_caller: + break + summary[alloc_caller] = summary.get(alloc_caller, 0) + msg._bytes + summary["TOTAL"] = total + + # print out the summaries and trees. + # TODO(erikkay): perhaps we should be writing this output to a file + # instead? + tree_number = 1 + num_trees = len(trees) + for tree, summary in zip(trees, summaries): + print "MEMORY SNAPSHOT %d of %d" % (tree_number, num_trees) + lines = summary.keys() + lines.sort(cmp=lambda x,y: summary[y] - summary[x]) + rest = 0 + for line in lines: + bytes = summary[line] + if bytes < byte_filter: + rest += bytes + else: + print "%d: %s" % (bytes, line) + print "%d: REST" % rest + print + print "BEGIN TREE" + tree.PrintRecursive(byte_filter=byte_filter) + tree_number += 1 + + # make sure stdout is flushed to avoid weird overlaps with logging + sys.stdout.flush() + + def PrintBugReport(self): + ''' Print a summary of how many messages of each type were found. ''' + # make sure everyone else is done first + sys.stderr.flush() + sys.stdout.flush() + logging.info("summary of Purify bugs:") + # This is a specialized set of counters for layout tests, with some + # unfortunate hard-coded knowledge. + layout_test_counts = {} + for key in self._message_lists: + bug = {} + list = self._message_lists[key] + unique = list.UniqueMessages() + all = list.AllMessages() + count = 0 + for msg in all: + if msg._title not in bug: + # use a single sample message to represent all messages + # that match this title + bug[msg._title] = {"message":msg, + "total":0, + "count":0, + "programs":set()} + this_bug = bug[msg._title] + this_bug["total"] += msg._count + this_bug["count"] += 1 + this_bug["programs"].add(msg.Program()) + # try to summarize the problem areas for layout tests + if self._name == "layout": + prog = msg.Program() + prog_args = prog.split(" ") + if len(prog_args): + path = prog_args[-1].replace('\\', '/') + index = path.rfind("layout_tests/") + if index >= 0: + path = path[(index + len("layout_tests/")):] + else: + index = path.rfind("127.0.0.1:") + if index >= 0: + # the port number is 8000 or 9000, but length is the same + path = "http: " + path[(index + len("127.0.0.1:8000/")):] + path = "/".join(path.split('/')[0:-1]) + count = 1 + layout_test_counts.get(path, 0) + layout_test_counts[path] = count + for title in bug: + b = bug[title] + print "[%s] %s" % (key, title) + print "%d tests, %d stacks, %d instances" % (len(b["programs"]), + b["count"], b["total"]) + print "Reproducible with:" + for program in b["programs"]: + print " %s" % program + print "Sample error details:" + print "=====================" + print b["message"].NormalizedStr(verbose=True) + if len(layout_test_counts): + print + print "Layout test error counts" + print "========================" + paths = layout_test_counts.keys() + paths.sort() + for path in paths: + print "%s: %d" % (path, layout_test_counts[path]) + # make sure stdout is flushed to avoid weird overlaps with logging + sys.stdout.flush() + + def SaveLatestStrings(self, string_list, key, fname_extra=""): + '''Output a list of strings to a file in the "latest" dir. + ''' + script_dir = google.path_utils.ScriptDir() + path = os.path.join(script_dir, "latest") + out = os.path.join(path, "%s_%s%s.txt" % (self._name, key, fname_extra)) + logging.info("saving %s" % (out)) + try: + f = open(out, "w+") + f.write('\n'.join(string_list)) + except IOError, (errno, strerror): + logging.error("error writing to file %s (%d, %s)" % out, errno, strerror) + if f: + f.close() + return True + + def SaveResults(self, path=None, verbose=False): + ''' Output normalized data to baseline files for future comparison runs. + Messages are saved in sorted order into a separate file for each message + type. See Message.NormalizedStr() for details of what's written. + ''' + if not path: + path = self._data_dir + for key in self._message_lists: + out = os.path.join(path, "%s_%s.txt" % (self._name, key)) + logging.info("saving %s" % (out)) + f = open(out, "w+") + list = self._message_lists[key].UniqueMessages() + # TODO(erikkay): should the count of each message be a diff factor? + # (i.e. the same error shows up, but more frequently) + for message in list: + f.write(message.NormalizedStr(verbose=verbose)) + f.write("\n") + f.close() + return True + + def _MessageHashesFromFile(self, filename): + ''' Reads a file of normalized messages (see SaveResults) and creates a + dictionary mapping the hash of each message to its text. + ''' + # NOTE: this uses the same hashing algorithm as Message.__hash__. + # Unfortunately, we can't use the same code easily since Message is based + # on parsing an original Purify output file and this code is reading a file + # of already normalized messages. This means that these two bits of code + # need to be kept in sync. + msgs = {} + if not os.path.isabs(filename): + filename = os.path.join(self._data_dir, filename) + if os.path.exists(filename): + logging.info("reading messages from %s" % filename) + file = open(filename, "r") + msg = "" + title = None + lines = file.readlines() + # in case the file doesn't end in a blank line + lines.append("\n") + for line in lines: + # allow these files to have comments in them + if line.startswith('#') or line.startswith('//'): + continue + if not title: + if not line.isspace(): + # first line of each message is a title + title = line + continue + elif not line.isspace(): + msg += line + else: + # note that the hash doesn't include the title, see Message.__hash__ + h = hash(msg) + msgs[h] = title + msg + title = None + msg = "" + logging.info("%s: %d msgs" % (filename, len(msgs))) + return msgs + + def _SaveLatestGroupSummary(self, message_list): + '''Save a summary of message groups and their counts to a file in "latest" + ''' + string_list = [] + groups = message_list.UniqueMessageGroups() + group_keys = groups.keys() + + group_keys.sort(cmp=lambda x,y: len(groups[y]) - len(groups[x])) + for group in group_keys: + string_list.append("%s: %d" % (group, len(groups[group]))) + + self.SaveLatestStrings(string_list, message_list.GetType(), "_GROUPS") + + def CompareResults(self): + ''' Compares the results from the current run with the baseline data + stored in data/<name>_<key>.txt returning False if it finds new errors + that are not in the baseline. See ReadFile() and SaveResults() for + details of what's in the original file and what's in the baseline. + Errors that show up in the baseline but not the current run are not + considered errors (they're considered "fixed"), but they do suggest + that the baseline file could be re-generated.''' + errors = 0 + fixes = 0 + for type in purify_message.message_type: + if type in ["MIU"]: + continue + # number of new errors for this message type + type_errors = [] + # number of new unexpected fixes for this message type + type_fixes = [] + # the messages from the current run that are in the baseline + new_baseline = [] + # a common prefix used to describe the program being analyzed and the + # type of message which is used to generate filenames and descriptive + # error messages + type_name = "%s_%s" % (self._name, type) + + # open the baseline file to compare against + baseline_file = "%s.txt" % type_name + baseline_hashes = self._MessageHashesFromFile(baseline_file) + + # read the flakey file if it exists + flakey_file = "%s_flakey.txt" % type_name + flakey_hashes = self._MessageHashesFromFile(flakey_file) + + # read the ignore file if it exists + ignore_file = "%s_ignore.txt" % type_name + ignore_hashes = self._MessageHashesFromFile(ignore_file) + + # messages from the current run + current_list = self.GetMessageList(type) + if current_list: + # Since we're looking at the list of unique messages, + # if the number of occurrances of a given unique message + # changes, it won't show up as an error. + current_messages = current_list.UniqueMessages() + else: + current_messages = [] + current_hashes = {} + # compute errors and new baseline + for message in current_messages: + msg_hash = hash(message) + current_hashes[msg_hash] = message + if msg_hash in ignore_hashes or msg_hash in flakey_hashes: + continue + if msg_hash in baseline_hashes: + new_baseline.append(msg_hash) + continue + type_errors.append(msg_hash) + # compute unexpected fixes + for msg_hash in baseline_hashes: + if (msg_hash not in current_hashes and + msg_hash not in ignore_hashes and + msg_hash not in flakey_hashes): + type_fixes.append(baseline_hashes[msg_hash]) + + if len(current_messages) or len(type_errors) or len(type_fixes): + logging.info("%d '%s(%s)' messages " + "(%d new, %d unexpectedly fixed)" % (len(current_messages), + purify_message.GetMessageType(type), type, + len(type_errors), len(type_fixes))) + + if len(type_errors): + strs = [current_hashes[x].NormalizedStr(verbose=True) + for x in type_errors] + logging.error("%d new '%s(%s)' errors found\n%s" % (len(type_errors), + purify_message.GetMessageType(type), type, + '\n'.join(strs))) + strs = [current_hashes[x].NormalizedStr() for x in type_errors] + self.SaveLatestStrings(strs, type, "_NEW") + errors += len(type_errors) + + if len(type_fixes): + # we don't have access to the original message, so all we can do is log + # the non-verbose normalized text + logging.warning("%d new '%s(%s)' unexpected fixes found\n%s" % ( + len(type_fixes), purify_message.GetMessageType(type), + type, '\n'.join(type_fixes))) + self.SaveLatestStrings(type_fixes, type, "_FIXED") + fixes += len(type_fixes) + if len(current_messages) == 0: + logging.warning("all errors fixed in %s" % baseline_file) + + if len(type_fixes) or len(type_errors): + strs = [baseline_hashes[x] for x in new_baseline] + self.SaveLatestStrings(strs, type, "_BASELINE") + + if current_list: + self._SaveLatestGroupSummary(current_list) + + if errors: + logging.error("%d total new errors found" % errors) + return -1 + else: + logging.info("no new errors found - yay!") + if fixes: + logging.warning("%d total errors unexpectedly fixed" % fixes) + # magic return code to turn the builder orange (via ReturnCodeCommand) + return -88 + return 0 + + +# The following code is here for testing and development purposes. + +def _main(): + retcode = 0 + + parser = optparse.OptionParser("usage: %prog [options] <files to analyze>") + parser.add_option("-b", "--baseline", action="store_true", default=False, + help="save output to baseline files") + parser.add_option("-m", "--memory_in_use", + action="store_true", default=False, + help="print memory in use summary") + parser.add_option("", "--validate", + action="store_true", default=False, + help="validate results vs. baseline") + parser.add_option("-e", "--echo_to_stdout", + action="store_true", default=False, + help="echo purify output to standard output") + parser.add_option("", "--source_dir", + help="path to top of source tree for this build" + "(used to normalize source paths in output)") + parser.add_option("", "--byte_filter", default=16384, + help="prune the tree below this number of bytes") + parser.add_option("-n", "--name", + help="name of the test being run " + "(used for output filenames)") + parser.add_option("", "--data_dir", + help="path to where purify data files live") + parser.add_option("", "--bug_report", default=False, + action="store_true", + help="print output as an attempted summary of bugs") + parser.add_option("-v", "--verbose", action="store_true", default=False, + help="verbose output - enable debug log messages") + + (options, args) = parser.parse_args() + if not len(args) >= 1: + parser.error("no filename specified") + filenames = args + + if options.verbose: + google.logging_utils.config_root(level=logging.DEBUG) + else: + google.logging_utils.config_root(level=logging.INFO) + pa = PurifyAnalyze(filenames, options.echo_to_stdout, options.name, + options.source_dir, options.data_dir) + execute_crash = not pa.ReadFile() + if options.bug_report: + pa.PrintBugReport() + pa.PrintSummary(False) + elif options.memory_in_use: + pa.PrintMemoryInUse(int(options.byte_filter)) + elif execute_crash: + retcode = -1 + logging.error("Fatal error during test execution. Analysis skipped.") + elif options.validate: + if pa.CompareResults() != 0: + retcode = -1 + script_dir = google.path_utils.ScriptDir() + latest_dir = os.path.join(script_dir, "latest") + pa.SaveResults(latest_dir) + pa.PrintSummary() + elif options.baseline: + if not pa.SaveResults(verbose=True): + retcode = -1 + pa.PrintSummary(False) + else: + pa.PrintSummary(False) + + sys.exit(retcode) + +if __name__ == "__main__": + _main() + diff --git a/tools/purify/purify_coverage.py b/tools/purify/purify_coverage.py new file mode 100644 index 0000000..0828e22 --- /dev/null +++ b/tools/purify/purify_coverage.py @@ -0,0 +1,111 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# purify_coverage.py + +import logging +import optparse +import os +import re +import sys + +import google.path_utils + +# local modules +import common +import purify_analyze +import purify_message + + +class PurifyCoverage(common.Rational): + def __init__(self): + common.Rational.__init__(self) + script_dir = google.path_utils.ScriptDir() + self._latest_dir = os.path.join(script_dir, "latest") + + def CreateOptionParser(self): + common.Rational.CreateOptionParser(self) + self._parser.description = __doc__ + self._parser.add_option("-n", "--name", + dest="name", default=None, + help="name of the test being run " + "(used for output filenames)") + self._parser.add_option("", "--source_dir", + help="path to top of source tree for this build" + "(used to normalize source paths in baseline)") + + def ParseArgv(self): + if common.Rational.ParseArgv(self): + self._name = self._options.name + if not self._name: + self._name = os.path.basename(self._exe) + # _out_file can be set in common.Rational.ParseArgv + if not self._out_file: + self._out_file = os.path.join(self._latest_dir, + "%s_coverage.txt" % (self._name)) + self._source_dir = self._options.source_dir + return True + return False + + def _PurifyCommand(self): + cmd = [common.PURIFYW_PATH, "/CacheDir=" + self._cache_dir, + "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no", + "/AllocCallStackLength=30", "/Coverage", + "/CoverageDefaultInstrumentationType=line"] + return cmd + + def Instrument(self): + cmd = self._PurifyCommand() + # /Run=no means instrument only + cmd.append("/Run=no") + cmd.append(os.path.abspath(self._exe)) + return common.Rational.Instrument(self, cmd) + + def Execute(self): + cmd = self._PurifyCommand() + cmd.append("/SaveTextData=" + self._out_file) + # TODO(erikkay): should we also do /SaveMergeTextData? + return common.Rational.Execute(self, cmd) + + def Analyze(self): + if not os.path.isfile(self._out_file): + logging.info("no output file %s" % self._out_file) + return -1 + # TODO(erikkay): parse the output into a form we could use on the buildbots + return 0 + +if __name__ == "__main__": + rational = PurifyCoverage() + if rational.Run(): + retcode = 0 + else: + retcode = -1 + sys.exit(retcode) + diff --git a/tools/purify/purify_inuse.py b/tools/purify/purify_inuse.py new file mode 100644 index 0000000..d527189 --- /dev/null +++ b/tools/purify/purify_inuse.py @@ -0,0 +1,116 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# purify_inuse.py + +import logging +import optparse +import os +import re +import sys + +import google.path_utils + +# local modules +import common +import purify_analyze +import purify_message + + +class PurifyInUse(common.Rational): + def __init__(self): + common.Rational.__init__(self) + script_dir = google.path_utils.ScriptDir() + self._latest_dir = os.path.join(script_dir, "latest") + + def CreateOptionParser(self): + common.Rational.CreateOptionParser(self) + self._parser.description = __doc__ + self._parser.add_option("-n", "--name", + dest="name", default=None, + help="name of the test being run " + "(used for output filenames)") + self._parser.add_option("", "--source_dir", + help="path to top of source tree for this build" + "(used to normalize source paths in baseline)") + self._parser.add_option("", "--byte_filter", default=16384, + help="prune the tree below this number of bytes") + + def ParseArgv(self): + if common.Rational.ParseArgv(self): + self._name = self._options.name + if not self._name: + self._name = os.path.basename(self._exe) + # _out_file can be set in common.Rational.ParseArgv + if not self._out_file: + self._out_file = os.path.join(self._latest_dir, "%s.txt" % (self._name)) + self._source_dir = self._options.source_dir + self._byte_filter = int(self._options.byte_filter) + return True + return False + + def _PurifyCommand(self): + cmd = [common.PURIFYW_PATH, "/CacheDir=" + self._cache_dir, + "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no", + "/AllocCallStackLength=30", "/ErrorCallStackLength=30", + "/LeaksAtExit=no", "/InUseAtExit=yes"] + return cmd + + def Instrument(self): + cmd = self._PurifyCommand() + # /Run=no means instrument only + cmd.append("/Run=no") + cmd.append(os.path.abspath(self._exe)) + return common.Rational.Instrument(self, cmd) + + def Execute(self): + cmd = self._PurifyCommand() + cmd.append("/SaveTextData=" + self._out_file) + return common.Rational.Execute(self, cmd) + + def Analyze(self): + if not os.path.isfile(self._out_file): + logging.info("no output file %s" % self._out_file) + return -1 + pa = purify_analyze.PurifyAnalyze(self._out_file, False, + self._name, self._source_dir) + if not pa.ReadFile(): + logging.warning("inuse summary suspect due to fatal error during run") + pa.PrintMemoryInUse(byte_filter=self._byte_filter) + return 0 + +if __name__ == "__main__": + rational = PurifyInUse() + if rational.Run(): + retcode = 0 + else: + retcode = -1 + sys.exit(retcode) + diff --git a/tools/purify/purify_message.py b/tools/purify/purify_message.py new file mode 100644 index 0000000..d093461 --- /dev/null +++ b/tools/purify/purify_message.py @@ -0,0 +1,634 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# purify_message.py + +''' Utility objects and functions to parse and unique Purify messages ''' + +import cStringIO +import logging +import re +import sys + +import google.logging_utils + +# used to represent one or more elided frames +ELIDE = "..." +# used to represent stack truncation at a known entry point +TRUNCATE = "^^^" +# a file that's outside of our source directory +EXTERNAL_FILE = "EXTERNAL_FILE" + +# mapping of purify message types to descriptions +message_type = { + "ABR": "Array Bounds Read", + "ABW": "Array Bounds Write", + "ABWL": "Array Bounds Write (late detect)", + "BSR": "Beyond Stack Read", + "BSW": "Beyond Stack Write", + "COM": "COM API/Interface Failure", + "EXC": "Continued Exception", + "EXH": "Handled Exception", + "EXI": "Ignored Exception", + "EXU": "Unhandled Exception", + "FFM": "Freeing Freed Memory", + "FIM": "Freeing Invalid Memory", + "FMM": "Freeing Mismatched Memory", + "FMR": "Free Memory Read", + "FMW": "Free Memory Write", + "FMWL": "Free Memory Write (late detect)", + "HAN": "Invalid Handle", + "HIU": "Handle In Use", + "ILK": "COM Interface Leak", + "IPR": "Invalid Pointer Read", + "IPW": "Invalid Pointer Write", + "MAF": "Memory Allocation Failure", + "MIU": "Memory In Use", + "MLK": "Memory Leak", + "MPK": "Potential Memory Leak", + "NPR": "Null Pointer Read", + "NPW": "Null Pointer Write", + "PAR": "Bad Parameter", + "UMC": "Uninitialized Memory Copy", + "UMR": "Uninitialized Memory Read", +} + +# a magic message type which is not enumerated with the normal message type dict +FATAL = "FATAL" + +def GetMessageType(key): + if key in message_type: + return message_type[key] + elif key == FATAL: + return key + logging.warn("unknown message type %s" % key) + return "UNKNOWN" + +# currently unused, but here for documentation purposes +message_severity = { + "I": "Informational", + "E": "Error", + "W": "Warning", + "O": "Internal Purify Error", +} + + +class Stack: + ''' A normalized Purify Stack. The stack is constructed by adding one line + at a time from a stack in a Purify text file via AddLine. + Supports cmp and hash so that stacks which normalize the same can be sorted + and uniqued. + The original stack contents are preserved so that it's possible to drill + down into the full details if necessary. ''' + + # The top of the source tree. This is stripped from the filename as part + # of normalization. + source_dir = "" + + @classmethod + def SetSourceDir(cls, dir): + # normalize the dir + cls.source_dir = dir.replace("\\", "/").lower() + logging.debug("Stack.source_dir = %s" % cls.source_dir) + + # a line in a stack trace + pat_stack_line = re.compile('(.*)\[(\w:)?([^\:\s]*)(:\d+)?(\s+.*)?]') + + # Known stack entry points that allow us to truncate the rest of the stack + # below that point. + pat_known_entries = ( + re.compile('RunnableMethod::Run\(void\)'), + re.compile('ChromeMain'), + re.compile('BrowserMain'), + re.compile('wWinMain'), + re.compile('TimerManager::ProcessPendingTimer\(void\)'), + re.compile('RunnableMethod::RunableMethod\(.*\)'), + re.compile('RenderViewHost::OnMessageReceived\(Message::IPC const&\)'), + re.compile('testing::Test::Run\(void\)'), + re.compile('testing::TestInfoImpl::Run\(void\)'), + re.compile('Thread::ThreadFunc\\(void \*\)'), + re.compile('TimerTask::Run\(void\)'), + re.compile('MessageLoop::RunTask\(Task \*\)'), + re.compile('.DispatchToMethod\@.*'), + ) + + # if functions match the following, elide them from the stack + pat_func_elide = (re.compile('^std::'), re.compile('^new\(')) + # if files match the following, elide them from the stack + pat_file_elide = (re.compile('.*platformsdk_vista.*'), + re.compile('.*.(dll|DLL)$'), + # bug 1069902 + re.compile('webkit/pending/wtf/fastmalloc\.h'), + # When we leak sqlite stuff, we leak a lot, and the stacks + # are all over the place. For now, let's assume that + # sqlite itself is leak free and focus on our calling code. + re.compile('chrome/third_party/sqlite/.*'), + ) + + pat_unit_test = re.compile('^([a-zA-Z0-9]+)_(\w+)_Test::.*') + + def __init__(self, title): + self._title = title.lstrip() + self._stack = [] + self._orig = "" + # are we currently in an eliding block + self._eliding = False + # have we truncated the stack? + self._truncated = False + # is the stack made up completely of external code? (i.e. elided) + self._all_external = True + # a logical group that this stack belongs to + self._group = None + # top stack line (preserved even if elided) + self._top_stack_line = None + + def GetLines(self): + return self._stack + + def GetTopStackLine(self): + return self._top_stack_line + + def GetTopVisibleStackLine(self): + for line in self._stack: + if line['function']: + return line + return {} + + def GetGroup(self): + '''A logical grouping for this stack, allowing related stacks to be grouped + together. Subgroups within a group are separated by ".". + (e.g. group.subgroup.subsubgroup) + ''' + return self._group; + + def _ComputeStackLine(self, line): + line = line.lstrip() + m = Stack.pat_stack_line.match(line) + if m: + func = m.group(1).rstrip() + func = self._DemangleSymbol(func) + func = self._DetemplatizeSymbol(func) + if m.group(2): + file = m.group(2) + m.group(3) + else: + file = m.group(3) + # paths are normalized to use / and be lower case + file = file.replace("\\", "/").lower() + if not file.startswith(Stack.source_dir): + file = EXTERNAL_FILE + else: + file = file[len(Stack.source_dir):] + # trim leading / if present + if file[0] == "/": + file = file[1:] + loc = m.group(4) + if loc: + loc = int(loc[1:]) + else: + loc = 0 + return {'function': func, 'file': file, 'line_number': loc} + return None + + def _ShouldElide(self, stack_line): + func = stack_line['function'] + file = stack_line['file'] + # elide certain common functions from the stack such as the STL + for pat in Stack.pat_func_elide: + if pat.match(func): + logging.debug("eliding due to func pat match: %s" % func) + return True + if file == EXTERNAL_FILE: + # if it's not in our source tree, then elide + logging.debug("eliding due to external file: %s" % file) + return True + # elide certain common file sources from the stack, usually this + # involves system libraries + for pat in Stack.pat_file_elide: + if pat.match(file): + logging.debug("eliding due to file pat match: %s" % file) + return True + + return False + + def AddLine(self, line): + ''' Add one line from a stack in a Purify text file. Lines must be + added in order (top down). Lines are added to two internal structures: + an original string copy and an array of normalized lines, split into + (function, file, line number). + Stack normalization does several things: + * elides sections of the stack that are in external code + * truncates the stack at so called "known entry points" + * removes template type information from symbols + Returns False if the line was elided or otherwise omitted. + ''' + self._orig += line + "\n" + stack_line = self._ComputeStackLine(line) + if stack_line: + if not self._top_stack_line: + self._top_stack_line = stack_line + # Unit test entry points are good groupings. Even if we already have a + # group set, a later unit-test stack line will override. + # Note that we also do this even if the stack has already been truncated + # since this is useful information. + # TODO(erikkay): Maybe in this case, the truncation should be overridden? + test_match = Stack.pat_unit_test.match(stack_line["function"]) + if test_match: + self._group = test_match.group(1) + "." + test_match.group(2) + + if self._truncated: + return False + + if self._ShouldElide(stack_line): + if not self._eliding: + self._eliding = True + self._stack.append({'function': "", 'file': ELIDE, 'line_number': 0}) + return False + else: + self._stack.append(stack_line) + self._eliding = False + self._all_external = False + + # when we reach one of the known common stack entry points, truncate + # the stack to avoid printing overly redundant information + if len(self._stack) > 1: + for f in Stack.pat_known_entries: + if f.match(stack_line["function"]): + if not self._group: + # we're at the end of the stack, so use the path to the file + # as the group if we don't already have one + # This won't be incredibly reliable, but might still be useful. + prev = self._stack[-2] + if prev['file']: + self._group = '.'.join(prev['file'].split('/')[:-1]) + self._stack.append({'function': "", 'file': TRUNCATE, + 'line_number': 0}) + self._truncated = True + return False + return True + else: + # skip these lines + logging.debug(">>>" + line) + return False + + def _DemangleSymbol(self, symbol): + # TODO(erikkay) - I'm not sure why Purify prepends an address on the + # front of some of these as if it were a namespace (?A<addr>::). From an + # analysis standpoint, it seems meaningless and can change from machine to + # machine, so it's best if it's thrown away + if symbol.startswith("?A0x"): + skipto = symbol.find("::") + if skipto >= 0: + symbol = symbol[(skipto+2):] + else: + logging.warn("unable to strip address off of symbol (%s)" % symbol) + # TODO(erikkay) there are more symbols not being properly demangled + # in Purify's output. Some of these look like template-related issues. + return symbol + + def _DetemplatizeSymbol(self, symbol): + ''' remove all of the template arguments and return values from the + symbol, normalizing it, making it more readable, and less precise ''' + ret = "" + nested = 0 + for i in range(len(symbol)): + if nested > 0: + if symbol[i] == '>': + nested -= 1 + elif symbol[i] == '<': + nested += 1 + elif symbol[i] == '<': + nested += 1 + else: + ret += symbol[i] + return ret + + def __hash__(self): + return hash(self.NormalizedStr()) + + def __cmp__(self, other): + if not other: + return 1 + len_self = len(self._stack) + len_other = len(other._stack) + min_len = min(len_self, len_other) + # sort stacks from the bottom up + for i in range(-1, -(min_len + 1), -1): + # compare file, then func, but omit line number + ret = cmp((self._stack[i]['file'], self._stack[i]['function']), + (other._stack[i]['file'], other._stack[i]['function'])) + if ret: + return ret + return cmp(len_self, len_other) + + def NormalizedStr(self, verbose=False): + ''' String version of the normalized stack. See AddLine for normalization + details. ''' + # use cStringIO for more efficient string building + out = cStringIO.StringIO() + for line in self._stack: + out.write(" ") + out.write(line['file']) + if verbose and line['line_number'] > 0: + out.write(":%d" % line['line_number']) + out.write(" ") + out.write(line['function']) + out.write("\n") + ret = out.getvalue() + out.close() + return ret + + def __str__(self): + return self._orig + + +class Message: + '''A normalized message from a Purify text file. Messages all have a + severity, most have a type, and many have an error stack and/or an + allocation stack. + Supports cmp and hash so that messages which normalize the same can be + sorted and uniqued.''' + + pat_count = re.compile('^(.*) \{(\d+) occurrences?\}') + pat_leak = re.compile('(Potential )?[Mm]emory leak of (\d+) bytes? ' + 'from (\d+) blocks? allocated in (.+)') + pat_miu = re.compile('Memory use of (\d+) bytes? ' + '(\((\d+)% initialized\) )?from (\d+) blocks? ' + 'allocated .. (.+)') + # these are headings to different types of stack traces + pat_loc_error = re.compile('\s*(Exception|Error|Call) location') + pat_loc_alloc = re.compile('\s*Allocation location') + pat_loc_free = re.compile('\s*Free location') + pat_loc_free2 = re.compile('\s*Location of free attempt') + + def __init__(self, severity, type, title): + self._severity = severity + self._type = type + self._program = None + self._head = "" + self._loc_alloc = None + self._loc_error = None + self._loc_free = None + self._stack = None + self._count = 1 + self._bytes = 0 + self._blocks = 0 + m = Message.pat_count.match(title) + if m: + self._title = m.group(1) + self._count = int(m.group(2)) + else: + m = Message.pat_leak.match(title) + if m: + self._title = m.group(4) + self._bytes = int(m.group(2)) + self._blocks = int(m.group(3)) + else: + m = Message.pat_miu.match(title) + if m: + self._title = m.group(5) + self._bytes = int(m.group(1)) + self._blocks = int(m.group(4)) + #print "%d/%d - %s" % (self._bytes, self._blocks, title[0:60]) + elif type == "MIU": + logging.error("%s didn't match" % title) + sys.exit(-1) + else: + self._title = title + + def GetAllocStack(self): + return self._loc_alloc + + def GetErrorStack(self): + return self._loc_error + + def GetGroup(self): + '''An attempted logical grouping for this Message computed by the contained + Stack objects. + ''' + group = None + if self._loc_alloc: + group = self._loc_alloc.GetGroup() + if not group and self._loc_error: + group = self._loc_error.GetGroup() + if not group and self._loc_free: + group = self._loc_free.GetGroup() + if not group: + group = "UNKNOWN" + return group + + def AddLine(self, line): + '''Add a line one at a time (in order from the Purify text file) to + build up the message and its associated stacks. ''' + + if Message.pat_loc_error.match(line): + self._stack = Stack(line) + self._loc_error = self._stack + elif Message.pat_loc_alloc.match(line): + self._stack = Stack(line) + self._loc_alloc = self._stack + elif Message.pat_loc_free.match(line) or Message.pat_loc_free2.match(line): + self._stack = Stack(line) + self._loc_free = self._stack + elif self._stack: + if not line.startswith(" "): + logging.debug("*** " + line) + self._stack.AddLine(line) + else: + self._head += line.lstrip() + + def Type(self): + return self._type + + def Program(self): + return self._program + + def SetProgram(self, program): + self._program = program + + def StacksAllExternal(self): + '''Returns True if the stacks it contains are made up completely of + external (elided) symbols''' + return ((not self._loc_error or self._loc_error._all_external) and + (not self._loc_alloc or self._loc_alloc._all_external) and + (not self._loc_free or self._loc_free._all_external)) + + def __hash__(self): + # NOTE: see also _MessageHashesFromFile. If this method changes, then + # _MessageHashesFromFile must be updated to match. + s = "" + if self._loc_error: + s += "Error Location\n" + self._loc_error.NormalizedStr() + if self._loc_alloc: + s += "Alloc Location\n" + self._loc_alloc.NormalizedStr() + if self._loc_free: + s += "Free Location\n" + self._loc_free.NormalizedStr() + return hash(s) + + def NormalizedStr(self, verbose=False): + '''String version of the normalized message. Only includes title + and normalized versions of error and allocation stacks if present. + Example: + Unitialized Memory Read in Foo::Bar() + Error Location + foo/Foo.cc Foo::Bar(void) + foo/main.cc start(void) + foo/main.cc main(void) + Alloc Location + foo/Foo.cc Foo::Foo(void) + foo/main.cc start(void) + foo/main.cc main(void) + ''' + ret = "" + # some of the message types are more verbose than others and we + # don't need to indicate their type + if verbose and self._type not in ["UMR", "IPR", "IPW"]: + ret += GetMessageType(self._type) + ": " + if verbose and self._bytes > 0: + ret += "(%d bytes, %d blocks) " % (self._bytes, self._blocks) + ret += "%s\n" % self._title + if self._loc_error: + ret += "Error Location\n" + self._loc_error.NormalizedStr(verbose) + if self._loc_alloc: + ret += "Alloc Location\n" + self._loc_alloc.NormalizedStr(verbose) + if self._loc_free: + ret += "Free Location\n" + self._loc_free.NormalizedStr(verbose) + return ret + + def __str__(self): + ret = self._title + "\n" + self._head + if self._loc_error: + ret += "Error Location\n" + str(self._loc_error) + if self._loc_alloc: + ret += "Alloc Location\n" + str(self._loc_alloc) + if self._loc_free: + ret += "Free Location\n" + str(self._loc_free) + return ret + + def __cmp__(self, other): + if not other: + return 1 + ret = 0 + if self._loc_error: + ret = cmp(self._loc_error, other._loc_error) + if ret == 0 and self._loc_alloc: + ret = cmp(self._loc_alloc, other._loc_alloc) + if ret == 0 and self._loc_free: + ret = cmp(self._loc_free, other._loc_free) + # since title is often not very interesting, we sort against that last + if ret == 0: + ret = cmp(self._title, other._title) + return ret + + +class MessageList: + '''A collection of Message objects of a given message type.''' + def __init__(self, type): + self._type = type + self._messages = [] + self._unique_messages = None + self._sublists = None + self._bytes = 0 + + def GetType(self): + return self._type + + def BeginNewSublist(self): + '''Some message types are logically grouped into sets of messages which + should not be mixed in the same list. Specifically, Memory In Use (MIU), + Memory Leak (MLK) and Potential Memory Leak (MPK) are generated in a set + all at once, but this generation can happen at multiple distinct times, + either via the Purify UI or through Purify API calls. For example, if + Purify is told to dump a list all memory leaks once, and then a few minutes + later, the two lists will certainly overlap, so they should be kept + in separate lists. + In order to accommodate this, MessageList supports the notion of sublists. + When the caller determines that one list of messages of a type has ended + and a new list has begun, it calls BeginNewSublist() which takes the current + set of messages, puts them into a new MessageList and puts that into the + sublists array. Later, when the caller needs to get at these messages, + GetSublists() should be called. + ''' + if len(self._messages): + # if this is the first list, no need to make a new one + list = MessageList(self._type) + list._messages = self._messages + if not self._sublists: + self._sublists = [list] + else: + self._sublists.append(list) + self._messages = [] + logging.info("total size: %d" % self._bytes) + self._bytes = 0 + + def GetSublists(self): + '''Returns the current list of sublists. If there are currently sublists + and there are any messages that aren't in a sublist, BeginNewSublist() is + called implicitly by this method to force those ungrouped messages into + their own sublist. + ''' + if self._sublists and len(self._sublists) and len(self._messages): + self.BeginNewSublist() + return self._sublists + + def AddMessage(self, msg): + '''Adds a message to this MessageList.''' + # TODO(erikkay): assert if _unique_messages exists + self._messages.append(msg) + self._bytes += msg._bytes + + def AllMessages(self): + '''Returns an array of all Message objects in this MessageList. ''' + # TODO(erikkay): handle case with sublists + return self._messages + + def UniqueMessages(self): + '''Returns an array of the unique normalized Message objects in this + MessageList. + ''' + # the list is lazily computed since we have to create a sorted list, + # which is only valid once all messages have been added + # TODO(erikkay): handle case with sublists + if not self._unique_messages: + self._unique_messages = list(set(self._messages)) + self._unique_messages.sort() + return self._unique_messages + + def UniqueMessageGroups(self): + '''Returns a dictionary mapping Message group names to arrays of uniqued + normalized Message objects in this MessageList. + ''' + unique = self.UniqueMessages() + groups = {} + for msg in unique: + group = msg.GetGroup() + if not group in groups: + groups[group] = [] + groups[group].append(msg) + return groups diff --git a/tools/purify/purify_test.py b/tools/purify/purify_test.py new file mode 100644 index 0000000..c2c1b76 --- /dev/null +++ b/tools/purify/purify_test.py @@ -0,0 +1,249 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# purify_test.py + +'''Runs an exe through Purify and verifies that Purify was +able to successfully instrument and run it. The original purpose was +to be able to identify when a change to our code breaks our ability to Purify +the app. This can happen with seemingly innocuous changes to code due to bugs +in Purify, and is notoriously difficult to track down when it does happen. +Perhaps more importantly in the long run, this can also automate detection of +leaks and other memory bugs. It also may be useful to allow people to run +Purify in a consistent manner without having to worry about broken PATHs, +corrupt instrumentation, or other per-machine flakiness that Purify is +sometimes subject to. +''' + +import glob +import logging +import optparse +import os +import re +import shutil +import sys +import time + +import google.path_utils + +# local modules +import common +import purify_analyze + +class Purify(common.Rational): + def __init__(self): + common.Rational.__init__(self) + self._data_dir = None + + def CreateOptionParser(self): + common.Rational.CreateOptionParser(self) + self._parser.description = __doc__ + self._parser.add_option("-e", "--echo_to_stdout", + dest="echo_to_stdout", action="store_true", default=False, + help="echo purify output to standard output") + self._parser.add_option("-b", "--baseline", + dest="baseline", action="store_true", default=False, + help="create baseline error files") + self._parser.add_option("-n", "--name", + dest="name", default=None, + help="name of the test being run " + "(used for output filenames)") + self._parser.add_option("", "--source_dir", + help="path to top of source tree for this build" + "(used to normalize source paths in baseline)") + self._parser.add_option("", "--exe", + help="The actual exe to instrument which is " + "different than the program being run. This " + "is useful when the exe you want to purify is " + "run by another script or program.") + self._parser.add_option("", "--data_dir", + help="path to where purify data files live") + + def ParseArgv(self): + if common.Rational.ParseArgv(self): + if self._options.exe: + self._exe = self._options.exe; + if not os.path.isfile(self._exe): + logging.error("file doesn't exist " + self._exe) + return False + self._exe_dir = common.FixPath(os.path.abspath(os.path.dirname(self._exe))) + self._echo_to_stdout = self._options.echo_to_stdout + self._baseline = self._options.baseline + self._name = self._options.name + if not self._name: + self._name = os.path.basename(self._exe) + # _out_file can be set in common.Rational.ParseArgv + if not self._out_file: + self._out_file = os.path.join(self._latest_dir, "%s.txt" % self._name) + self._source_dir = self._options.source_dir + self._data_dir = self._options.data_dir + if not self._data_dir: + self._data_dir = os.path.join(script_dir, "data") + return True + return False + + def _PurifyCommand(self): + cmd = [common.PURIFY_PATH, "/CacheDir=" + self._cache_dir] + return cmd + + def Setup(self): + script_dir = google.path_utils.ScriptDir() + self._latest_dir = os.path.join(script_dir, "latest") + if common.Rational.Setup(self): + pft_file = os.path.join(script_dir, "data", "filters.pft") + shutil.copyfile(pft_file, self._exe.replace(".exe", "_exe.pft")) + string_list = [ + "[Purify]", + "option -cache-dir=\"%s\"" % (self._cache_dir), + "option -save-text-data=\"%s\"" % (common.FixPath(self._out_file)), + "option -alloc-call-stack-length=30", + "option -error-call-stack-length=30", + "option -free-call-stack-length=30", + "option -leaks-at-exit=yes", + "option -in-use-at-exit=no" + ] + ini_file = self._exe.replace(".exe", "_pure.ini") + if os.path.isfile(ini_file): + ini_file_orig = ini_file + ".Original" + if not os.path.isfile(ini_file_orig): + os.rename(ini_file, ini_file_orig) + try: + f = open(ini_file, "w+") + f.write('\n'.join(string_list)) + except IOError, (errno, strerror): + logging.error("error writing to file %s (%d, %s)" % ini_file, errno, + strerror) + return False + if f: + f.close() + return True + return False + + def Instrument(self): + if not os.path.isfile(self._exe): + logging.error("file doesn't exist " + self._exe) + return False + cmd = self._PurifyCommand() + # /Run=no means instrument only, /Replace=yes means replace the exe in place + cmd.extend(["/Run=no", "/Replace=yes"]) + cmd.append(os.path.abspath(self._exe)) + return common.Rational.Instrument(self, cmd) + + def _ExistingOutputFiles(self): + pat_multi = re.compile('(.*)%[0-9]+d(.*)') + m = pat_multi.match(self._out_file) + if m: + g = m.group(1) + '[0-9]*' + m.group(2) + out = glob.glob(g) + if os.path.isfile(m.group(1) + m.group(2)): + out.append(m.group(1) + m.group(2)) + return out + if not os.path.isfile(self._out_file): + return [] + return [self._out_file] + + def Execute(self): + # delete the old file(s) to make sure that this run actually generated + # something new + out_files = self._ExistingOutputFiles() + for f in out_files: + os.remove(f) + common.Rational.Execute(self, []) + # Unfortunately, when we replace the exe, there's no way here to figure out + # if purify is actually going to output a file or if the exe just crashed + # badly. The reason is that it takes some small amount of time for purify + # to dump out the file. + count = 60 + while count > 0 and not os.path.isfile(self._out_file): + time.sleep(0.2) + count -= 1 + # Always return true, even if Execute failed - we'll depend on Analyze to + # determine if the run was valid. + return True + + def Analyze(self): + out_files = self._ExistingOutputFiles() + if not len(out_files): + logging.info("no output files matching %s" % self._out_file) + return -1 + pa = purify_analyze.PurifyAnalyze(out_files, self._echo_to_stdout, + self._name, self._source_dir, + self._data_dir) + if not pa.ReadFile(): + # even though there was a fatal error during Purify, it's still useful + # to see the normalized output + pa.PrintSummary() + if self._baseline: + logging.warning("baseline not generated due to fatal error") + else: + logging.warning("baseline comparison skipped due to fatal error") + return -1 + if self._baseline: + pa.PrintSummary(False) + if pa.SaveResults(): + return 0 + return -1 + else: + retcode = pa.CompareResults() + if retcode != 0: + pa.SaveResults(self._latest_dir) + pa.PrintSummary() + # with more than one output file, it's also important to emit the bug + # report which includes info on the arguments that generated each stack + if len(out_files) > 1: + pa.PrintBugReport() + return retcode + + def Cleanup(self): + common.Rational.Cleanup(self); + cmd = self._PurifyCommand() + # undo the /Replace=yes that was done in Instrument(), which means to + # remove the instrumented exe, and then rename exe.Original back to exe. + cmd.append("/UndoReplace") + cmd.append(os.path.abspath(self._exe)) + common.RunSubprocess(cmd, self._timeout, detach=True) + # if we overwrote an existing ini file, restore it + ini_file = self._exe.replace(".exe", "_pure.ini") + if os.path.isfile(ini_file): + os.remove(ini_file) + ini_file_orig = ini_file + ".Original" + if os.path.isfile(ini_file_orig): + os.rename(ini_file_orig, ini_file) + # remove the pft file we wrote out + pft_file = self._exe.replace(".exe", "_exe.pft") + if os.path.isfile(pft_file): + os.remove(pft_file) + + +if __name__ == "__main__": + rational = Purify() + retcode = rational.Run() + sys.exit(retcode) + diff --git a/tools/purify/quantify_test.py b/tools/purify/quantify_test.py new file mode 100644 index 0000000..054af5d --- /dev/null +++ b/tools/purify/quantify_test.py @@ -0,0 +1,85 @@ +#!/bin/env python +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# quantify_test.py + +'''Runs an app through Quantify and verifies that Quantify was able to +successfully instrument and run it. The original purpose was to allow people +to run Quantify in a consistent manner without having to worry about broken +PATHs, corrupt instrumentation, or other per-machine flakiness that Quantify is +sometimes subject to. Unlike purify_test, the output from quantify_test is +a binary file, which is much more useful in manual analysis. As such, this +tool is not particularly interesting for automated analysis yet. +''' + +import os +import sys + +# local modules +import common + +class Quantify(common.Rational): + def __init__(self): + common.Rational.__init__(self) + + def CreateOptionParser(self): + common.Rational.CreateOptionParser(self) + self._parser.description = __doc__ + + def ParseArgv(self): + if common.Rational.ParseArgv(self): + if not self._out_file: + self._out_file = os.path.join(self._cache_dir, + "%s.qfy" % (os.path.basename(self._exe))) + return True + return False + + def Instrument(self): + proc = [common.QUANTIFYE_PATH, "-quantify", + '-quantify_home="' + common.PPLUS_PATH + '"' , + "/CacheDir=" + self._cache_dir, + "-first-search-dir=" + self._exe_dir, self._exe] + return common.Rational.Instrument(self, proc) + + def Execute(self): + # TODO(erikkay): add an option to also do /SaveTextData and add an + # Analyze method for automated analysis of that data. + proc = [common.QUANTIFYW_PATH, "/CacheDir=" + self._cache_dir, + "/ShowInstrumentationProgress=no", "/ShowLoadLibraryProgress=no", + "/SaveData=" + self._out_file] + return common.Rational.Execute(self, proc) + +if __name__ == "__main__": + retcode = -1 + rational = Quantify() + if rational.Run(): + retcode = 0 + sys.exit(retcode) + |