diff options
author | charleslee@chromium.org <charleslee@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-18 03:30:25 +0000 |
---|---|---|
committer | charleslee@chromium.org <charleslee@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-18 03:30:25 +0000 |
commit | f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec (patch) | |
tree | 76a3b7809c8dd98feeae4495c1462d8877abd0ef /tools | |
parent | d0cf438c4d63c5127a35d76316a1b28e1e788376 (diff) | |
download | chromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.zip chromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.tar.gz chromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.tar.bz2 |
Retry failed and optional prefix for sharding_supervisor
Complements the changes just made to buildbot
BUG=91709, 93091
Review URL: http://codereview.chromium.org/7670002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@97275 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/sharding_supervisor/sharding_supervisor.py | 180 |
1 files changed, 130 insertions, 50 deletions
diff --git a/tools/sharding_supervisor/sharding_supervisor.py b/tools/sharding_supervisor/sharding_supervisor.py index 114b9fd..21bbab5 100755 --- a/tools/sharding_supervisor/sharding_supervisor.py +++ b/tools/sharding_supervisor/sharding_supervisor.py @@ -14,21 +14,33 @@ is started for that shard and the output is identical to gtest's output. """ -from cStringIO import StringIO +import cStringIO +import itertools import optparse import os import Queue import random import re -import subprocess import sys import threading +# Add tools/ to path +BASE_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(BASE_PATH, "..")) +try: + import find_depot_tools + # Fixes a bug in Windows where some shards die upon starting + import subprocess2 as subprocess +except ImportError: + # Unable to find depot_tools, so just use standard subprocess + import subprocess + SS_USAGE = "python %prog [options] path/to/test [gtest_args]" SS_DEFAULT_NUM_CORES = 4 SS_DEFAULT_SHARDS_PER_CORE = 5 # num_shards = cores * SHARDS_PER_CORE SS_DEFAULT_RUNS_PER_CORE = 1 # num_workers = cores * RUNS_PER_CORE +SS_DEFAULT_RETRY_PERCENT = 5 # --retry-failed ignored if more than 5% fail def DetectNumCores(): @@ -91,16 +103,17 @@ class ShardRunner(threading.Thread): self.test_fail = test_fail self.current_test = "" - def ReportFailure(self, description, prefix, test_name): - log_line = "%s: %s%s\n" % (description, prefix, test_name) - self.supervisor.LogLineFailure(log_line) + def ReportFailure(self, description, test_name): + log_line = "%s: %s\n" % (description, test_name) + self.supervisor.LogTestFailure(log_line) - def ProcessLine(self, prefix, line): + def ProcessLine(self, line): results = self.test_start.search(line) if results: if self.current_test: - self.ReportFailure("INCOMPLETE", prefix, self.current_test) + self.ReportFailure("INCOMPLETE", self.current_test) self.current_test = results.group(1) + self.supervisor.IncrementTestCount() return results = self.test_ok.search(line) @@ -110,7 +123,7 @@ class ShardRunner(threading.Thread): results = self.test_fail.search(line) if results: - self.ReportFailure("FAILED", prefix, results.group(1)) + self.ReportFailure("FAILED", results.group(1)) self.current_test = "" def run(self): @@ -125,8 +138,7 @@ class ShardRunner(threading.Thread): index = self.counter.get_nowait() except Queue.Empty: break - prefix = "%i>" % index - chars = StringIO() + chars = cStringIO.StringIO() shard_running = True shard = RunShard( self.supervisor.test, self.supervisor.num_shards, index, @@ -140,11 +152,12 @@ class ShardRunner(threading.Thread): line = chars.getvalue() if not line and not shard_running: break - self.ProcessLine(prefix, line) - line = prefix + line + self.ProcessLine(line) self.supervisor.LogOutputLine(index, line) chars.close() - chars = StringIO() + chars = cStringIO.StringIO() + if self.current_test: + self.ReportFailure("INCOMPLETE", prefix, self.current_test) self.supervisor.ShardIndexCompleted(index) if shard.returncode != 0: self.supervisor.LogShardFailure(index) @@ -159,25 +172,28 @@ class ShardingSupervisor(object): num_runs: Total number of worker threads to create for running shards. color: Indicates which coloring mode to use in the output. gtest_args: The options to pass to gtest. - failure_log: List of statements from shard output indicating a failure. + failed_tests: List of statements from shard output indicating a failure. failed_shards: List of shards that contained failing tests. """ SHARD_COMPLETED = object() - def __init__( - self, test, num_shards, num_runs, color, reorder, gtest_args): + def __init__(self, test, num_shards, num_runs, color, original_order, + prefix, retry_percent, gtest_args): """Inits ShardingSupervisor with given options and gtest arguments.""" self.test = test self.num_shards = num_shards self.num_runs = num_runs self.color = color - self.reorder = reorder + self.original_order = original_order + self.prefix = prefix + self.retry_percent = retry_percent self.gtest_args = gtest_args - self.failure_log = [] + self.failed_tests = [] self.failed_shards = [] self.shards_completed = [False] * num_shards self.shard_output = [Queue.Queue() for _ in range(num_shards)] + self.test_counter = itertools.count() def ShardTest(self): """Runs the test and manages the worker threads. @@ -185,10 +201,10 @@ class ShardingSupervisor(object): Runs the test and outputs a summary at the end. All the tests in the suite are run by creating (cores * runs_per_core) threads and (cores * shards_per_core) shards. When all the worker threads have - finished, the lines saved in the failure_log are printed again. + finished, the lines saved in failed_tests are printed again. Returns: - The number of shards that had failing tests. + 1 if some unexpected (not FLAKY or FAILS) tests failed, 0 otherwise. """ # Regular expressions for parsing GTest logs. Test names look like @@ -218,22 +234,38 @@ class ShardingSupervisor(object): self, counter, test_start, test_ok, test_fail) worker.start() workers.append(worker) - if self.reorder: - self.WaitForShards() - else: + if self.original_order: for worker in workers: worker.join() + else: + self.WaitForShards() - return self.PrintSummary(self.failure_log) - - def LogLineFailure(self, line): + num_failed = len(self.failed_shards) + if num_failed > 0: + self.failed_shards.sort() + self.WriteText(sys.stderr, + "\nFAILED SHARDS: %s\n" % str(self.failed_shards), + "\x1b[1;5;31m") + else: + self.WriteText(sys.stderr, "\nALL SHARDS PASSED!\n", "\x1b[1;5;32m") + self.PrintSummary(self.failed_tests) + + self.failed_tests = [x for x in self.failed_tests if x.find("FAILS_") < 0] + self.failed_tests = [x for x in self.failed_tests if x.find("FLAKY_") < 0] + if not self.failed_tests: + return 0 + if self.retry_percent < 0: + return len(self.failed_shards) > 0 + return self.RetryFailedTests() + + def LogTestFailure(self, line): """Saves a line in the failure log to be printed at the end. Args: - line: The line to save in the failure_log. + line: The line to save in the failed_tests list. """ - if line not in self.failure_log: - self.failure_log.append(line) + if line not in self.failed_tests: + self.failed_tests.append(line) def LogShardFailure(self, index): """Records that a test in the given shard has failed. @@ -252,39 +284,54 @@ class ShardingSupervisor(object): sys.stdout.write(line) def LogOutputLine(self, index, line): - if self.reorder: - self.shard_output[index].put(line) - else: + if self.prefix: + line = "%i>%s" % (index, line) + if self.original_order: sys.stdout.write(line) + else: + self.shard_output[index].put(line) + + def IncrementTestCount(self): + self.test_counter.next() def ShardIndexCompleted(self, index): self.shard_output[index].put(self.SHARD_COMPLETED) + def RetryFailedTests(self): + num_tests_run = self.test_counter.next() + if len(self.failed_tests) > self.retry_percent * num_tests_run: + sys.stderr.write("\nNOT RETRYING FAILED TESTS (too many failed)\n") + return 1 + self.WriteText(sys.stderr, "\nRETRYING FAILED TESTS:\n", "\x1b[1;5;33m") + sharded_description = re.compile(r": (?:\d+>)?(.*)") + gtest_filters = [sharded_description.search(line).group(1) + for line in self.failed_tests] + failed_retries = [] + + for test_filter in gtest_filters: + args = [self.test, "--gtest_filter=" + test_filter] + args.extend(self.gtest_args) + rerun = subprocess.Popen(args) + rerun.wait() + if rerun.returncode != 0: + failed_retries.append(test_filter) + + self.WriteText(sys.stderr, "RETRY RESULTS:\n", "\x1b[1;5;33m") + self.PrintSummary(failed_retries) + return len(failed_retries) > 0 + def PrintSummary(self, failed_tests): """Prints a summary of the test results. If any shards had failing tests, the list is sorted and printed. Then all the lines that indicate a test failure are reproduced. - - Returns: - The number of shards that had failing tests. """ - sys.stderr.write("\n") - num_failed = len(self.failed_shards) - if num_failed > 0: - self.failed_shards.sort() - self.WriteText(sys.stderr, - "FAILED SHARDS: %s\n" % str(self.failed_shards), - "\x1b[1;5;31m") - else: - self.WriteText(sys.stderr, "ALL SHARDS PASSED!\n", "\x1b[1;5;32m") if failed_tests: self.WriteText(sys.stderr, "FAILED TESTS:\n", "\x1b[1;5;31m") for line in failed_tests: sys.stderr.write(line) - if self.color: - sys.stderr.write("\x1b[m") - return num_failed + else: + self.WriteText(sys.stderr, "ALL TESTS PASSED!\n", "\x1b[1;5;32m") def WriteText(self, pipe, text, ansi): if self.color: @@ -316,8 +363,26 @@ def main(): "--reorder", action="store_true", help="ensure that all output from an earlier shard is printed before" " output from a later shard") - parser.add_option("--random-seed", action="store_true", + # TODO(charleslee): for backwards compatibility with master.cfg file + parser.add_option( + "--original-order", action="store_true", + help="print shard output in its orginal jumbled order of execution" + " (useful for debugging flaky tests)") + parser.add_option( + "--prefix", action="store_true", + help="prefix each line of shard output with 'N>', where N is the shard" + " index (forced True when --original-order is True)") + parser.add_option( + "--random-seed", action="store_true", help="shuffle the tests with a random seed value") + parser.add_option( + "--retry-failed", action="store_true", + help="retry tests that did not pass serially") + parser.add_option( + "--retry-percent", type="int", + default=SS_DEFAULT_RETRY_PERCENT, + help="ignore --retry-failed if more than this percent fail [0, 100]" + " (default = %i)" % SS_DEFAULT_RETRY_PERCENT) parser.disable_interspersed_args() (options, args) = parser.parse_args() @@ -339,10 +404,24 @@ def main(): gtest_args = ["--gtest_color=%s" % { True: "yes", False: "no"}[options.color]] + args[1:] + if options.original_order: + options.prefix = True + + # TODO(charleslee): for backwards compatibility with buildbot's log_parser + if options.reorder: + options.original_order = False + options.prefix = True + if options.random_seed: seed = random.randint(1, 99999) gtest_args.extend(["--gtest_shuffle", "--gtest_random_seed=%i" % seed]) + if options.retry_failed: + if options.retry_percent < 0 or options.retry_percent > 100: + parser.error("Retry percent must be an integer [0, 100]!") + else: + options.retry_percent = -1 + if options.runshard != None: # run a single shard and exit if (options.runshard < 0 or options.runshard >= num_shards): @@ -353,8 +432,9 @@ def main(): return shard.poll() # shard and run the whole test - ss = ShardingSupervisor(args[0], num_shards, num_runs, options.color, - options.reorder, gtest_args) + ss = ShardingSupervisor( + args[0], num_shards, num_runs, options.color, options.original_order, + options.prefix, options.retry_percent, gtest_args) return ss.ShardTest() |