diff options
author | jrg@chromium.org <jrg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-06 01:19:17 +0000 |
---|---|---|
committer | jrg@chromium.org <jrg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-06 01:19:17 +0000 |
commit | 116e89a8ea60cba6ae26e7aabcbb445244c58037 (patch) | |
tree | 53f95e4904a20c759f6d3e0d70e07f676b71c1f9 /tools/code_coverage | |
parent | e22a7d3e8d27c68b52ed144baf7d8238c7a75893 (diff) | |
download | chromium_src-116e89a8ea60cba6ae26e7aabcbb445244c58037.zip chromium_src-116e89a8ea60cba6ae26e7aabcbb445244c58037.tar.gz chromium_src-116e89a8ea60cba6ae26e7aabcbb445244c58037.tar.bz2 |
Kill subprocesses (e.g. unit tests) which appear to be hanging. Use a
mechanism similar to buildbot (e.g. give up if no output from a
subprocess within a few minutes).
When SIGTERMed (e.g. by buildbot), make an effort to SIGTERM any
running subprocess. Should make the coverage bots more resiliant
against getting "stuck" (e.g. the "clean" stage won't fail because a
running process prevents a directory from being empty).
Added some unit tests for the coverage script.
BUG=None
TEST=Run the coverage script.
Review URL: http://codereview.chromium.org/1957004
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46538 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/code_coverage')
-rwxr-xr-x | tools/code_coverage/coverage_posix.py | 145 | ||||
-rwxr-xr-x | tools/code_coverage/coverage_posix_unittest.py | 72 |
2 files changed, 210 insertions, 7 deletions
diff --git a/tools/code_coverage/coverage_posix.py b/tools/code_coverage/coverage_posix.py index 3015ca9..340199d 100755 --- a/tools/code_coverage/coverage_posix.py +++ b/tools/code_coverage/coverage_posix.py @@ -45,6 +45,9 @@ Linux: RunPythonCommandInBuildDir() command to avoid the need for this step. +--timeout=SECS: if a subprocess doesn't have output within SECS, + assume it's a hang. Kill it and give up. + Strings after all options are considered tests to run. Test names have all text before a ':' stripped to help with gyp compatibility. For example, ../base/base.gyp:base_unittests is interpreted as a test @@ -55,13 +58,113 @@ import glob import logging import optparse import os +import Queue import shutil +import signal import subprocess import sys +import threading import time import traceback +"""Global list of child PIDs to kill when we die.""" +gChildPIDs = [] + +def TerminateSignalHandler(sig, stack): + """When killed, try and kill our child processes.""" + signal.signal(sig, signal.SIG_DFL) + for pid in gChildPIDs: + if 'kill' in os.__all__: # POSIX + os.kill(pid, sig) + else: + subprocess.call(['taskkill.exe', '/PID', pid]) + sys.exit(0) + + +class RunTooLongException(Exception): + """Thrown when a command runs too long without output.""" + pass + + +class RunProgramThread(threading.Thread): + """A thread to run a subprocess. + + We want to print the output of our subprocess in real time, but also + want a timeout if there has been no output for a certain amount of + time. Normal techniques (e.g. loop in select()) aren't cross + platform enough. + """ + # Constants in our queue + LINE = 0 + DIED = 1 + + def __init__(self, cmd): + super(RunProgramThread, self).__init__() + self._cmd = cmd + self._process = None + self._queue = Queue.Queue() + self._retcode = None + + def run(self): + self._process = subprocess.Popen(self._cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + gChildPIDs.append(self._process.pid) + try: + while True: + line = self._process.stdout.readline() + if not line: # EOF + break + print line, + self._queue.put(RunProgramThread.LINE, True) + except IOError: + pass + # If we get here the process is dead. + gChildPIDs.remove(self._process.pid) + self._queue.put(RunProgramThread.DIED) + + def stop(self): + self.kill() + + def kill(self): + """Kill our running process if needed. Wait for kill to complete. + + Should be called in the PARENT thread; we do not self-kill. + Returns the return code of the process. + Safe to call even if the process is dead.""" + if not self._process: + return self._retcode + if 'kill' in os.__all__: # POSIX + os.kill(self._process.pid, signal.SIGTERM) + else: + subprocess.call(['taskkill.exe', '/PID', self._process.pid]) + self._retcode = self._process.wait() + return self._retcode + + def retcode(self): + """Return the return value of the subprocess. + + Kill it if needed.""" + return self.kill() + + def RunUntilCompletion(self, timeout): + """Run thread until completion or timeout (in seconds). + + Start the thread. Let it run until completion, or until we've + spent TIMEOUT without seeing output. On timeout throw + RunTooLongException.""" + self.start() + while True: + try: + x = self._queue.get(True, timeout) + if x == RunProgramThread.DIED: + return self.retcode() + except Queue.Empty, e: # timed out + self.kill() + raise RunTooLongException() + + class Coverage(object): """Doitall class for code coverage.""" @@ -189,9 +292,22 @@ class Coverage(object): def Run(self, cmdlist, ignore_error=False, ignore_retcode=None, explanation=None): - """Run the command list; exit fatally on error.""" + """Run the command list; exit fatally on error. + + Args: + cmdlist: a list of commands (e.g. to pass to subprocess.call) + ignore_error: if True log an error; if False then exit. + ignore_retcode: if retcode is non-zero, exit unless we ignore. + + Returns: process return code. + Throws: RunTooLongException if the process does not produce output + within TIMEOUT seconds; timeout is specified as a command line + option to the Coverage class and is set on init. + """ logging.info('Running ' + str(cmdlist)) - retcode = subprocess.call(cmdlist) + t = RunProgramThread(cmdlist) + retcode = t.RunUntilCompletion(self.options.timeout) + if retcode: if ignore_error or retcode == ignore_retcode: logging.warning('COVERAGE: %s unhappy but errors ignored %s' % @@ -200,7 +316,7 @@ class Coverage(object): logging.fatal('COVERAGE: %s failed; return code: %d' % (str(cmdlist), retcode)) sys.exit(retcode) - + return retcode def IsPosix(self): """Return True if we are POSIX.""" @@ -417,10 +533,8 @@ class Coverage(object): if self.options.strict: sys.exit(retcode) -def main(): - # Print out the args to help someone do it by hand if needed - print >>sys.stderr, sys.argv - +def CoverageOptionParser(): + """Return an optparse.OptionParser() suitable for Coverage object creation.""" parser = optparse.OptionParser() parser.add_option('-d', '--directory', @@ -462,6 +576,23 @@ def main(): dest='xvfb', default=True, help='Use Xvfb for tests? Default True.') + parser.add_option('-T', + '--timeout', + dest='timeout', + default=9.9 * 60.0, + help='Timeout before bailing if a subprocess has no output.' + ' Default is a hair under 10min (Buildbot is 10min.)') + return parser + + +def main(): + # Print out the args to help someone do it by hand if needed + print >>sys.stderr, sys.argv + + # Try and clean up nice if we're killed by buildbot + signal.signal(signal.SIGTERM, TerminateSignalHandler) + + parser = CoverageOptionParser() (options, args) = parser.parse_args() if not options.directory: parser.error('Directory not specified') diff --git a/tools/code_coverage/coverage_posix_unittest.py b/tools/code_coverage/coverage_posix_unittest.py new file mode 100755 index 0000000..e7ae155 --- /dev/null +++ b/tools/code_coverage/coverage_posix_unittest.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# Copyright (c) 2010 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unit tests for coverage_posix.py.""" + +import coverage_posix as coverage +import sys +import unittest + +class CoveragePosixTest(unittest.TestCase): + + def setUp(self): + self.parseArgs() + + def parseArgs(self): + """Setup and process arg parsing.""" + self.parser = coverage.CoverageOptionParser() + (self.options, self.args) = self.parser.parse_args() + + def testSanity(self): + """Sanity check we're able to actually run the tests. + + Simply creating a Coverage instance checks a few things (e.g. on + Windows that the coverage tools can be found).""" + c = coverage.Coverage('.', self.options, self.args) + + def testRunBasicProcess(self): + """Test a simple run of a subprocess.""" + c = coverage.Coverage('.', self.options, self.args) + for code in range(2): + retcode = c.Run([sys.executable, '-u', '-c', + 'import sys; sys.exit(%d)' % code], + ignore_error=True) + self.assertEqual(code, retcode) + + def testRunSlowProcess(self): + """Test program which prints slowly but doesn't hit our timeout. + + Overall runtime is longer than the timeout but output lines + trickle in keeping things alive. + """ + self.options.timeout = 2.5 + c = coverage.Coverage('.', self.options, self.args) + slowscript = ('import sys, time\n' + 'for x in range(10):\n' + ' time.sleep(0.5)\n' + ' print "hi mom"\n' + 'sys.exit(0)\n') + retcode = c.Run([sys.executable, '-u', '-c', slowscript]) + self.assertEqual(0, retcode) + + def testRunExcessivelySlowProcess(self): + """Test program which DOES hit our timeout. + + Initial lines should print but quickly it takes too long and + should be killed. + """ + self.options.timeout = 2.5 + c = coverage.Coverage('.', self.options, self.args) + slowscript = ('import time\n' + 'for x in range(1,10):\n' + ' print "sleeping for %d" % x\n' + ' time.sleep(x)\n') + self.assertRaises(Exception, + c.Run, + [sys.executable, '-u', '-c', slowscript]) + + +if __name__ == '__main__': + unittest.main() |