summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorcharleslee@chromium.org <charleslee@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-18 03:30:25 +0000
committercharleslee@chromium.org <charleslee@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-18 03:30:25 +0000
commitf25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec (patch)
tree76a3b7809c8dd98feeae4495c1462d8877abd0ef /tools
parentd0cf438c4d63c5127a35d76316a1b28e1e788376 (diff)
downloadchromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.zip
chromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.tar.gz
chromium_src-f25b8f41b1dcc4581ce0d46e1d45515d5c9f59ec.tar.bz2
Retry failed and optional prefix for sharding_supervisor
Complements the changes just made to buildbot BUG=91709, 93091 Review URL: http://codereview.chromium.org/7670002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@97275 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools')
-rwxr-xr-xtools/sharding_supervisor/sharding_supervisor.py180
1 files changed, 130 insertions, 50 deletions
diff --git a/tools/sharding_supervisor/sharding_supervisor.py b/tools/sharding_supervisor/sharding_supervisor.py
index 114b9fd..21bbab5 100755
--- a/tools/sharding_supervisor/sharding_supervisor.py
+++ b/tools/sharding_supervisor/sharding_supervisor.py
@@ -14,21 +14,33 @@ is started for that shard and the output is identical to gtest's output.
"""
-from cStringIO import StringIO
+import cStringIO
+import itertools
import optparse
import os
import Queue
import random
import re
-import subprocess
import sys
import threading
+# Add tools/ to path
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.join(BASE_PATH, ".."))
+try:
+ import find_depot_tools
+ # Fixes a bug in Windows where some shards die upon starting
+ import subprocess2 as subprocess
+except ImportError:
+ # Unable to find depot_tools, so just use standard subprocess
+ import subprocess
+
SS_USAGE = "python %prog [options] path/to/test [gtest_args]"
SS_DEFAULT_NUM_CORES = 4
SS_DEFAULT_SHARDS_PER_CORE = 5 # num_shards = cores * SHARDS_PER_CORE
SS_DEFAULT_RUNS_PER_CORE = 1 # num_workers = cores * RUNS_PER_CORE
+SS_DEFAULT_RETRY_PERCENT = 5 # --retry-failed ignored if more than 5% fail
def DetectNumCores():
@@ -91,16 +103,17 @@ class ShardRunner(threading.Thread):
self.test_fail = test_fail
self.current_test = ""
- def ReportFailure(self, description, prefix, test_name):
- log_line = "%s: %s%s\n" % (description, prefix, test_name)
- self.supervisor.LogLineFailure(log_line)
+ def ReportFailure(self, description, test_name):
+ log_line = "%s: %s\n" % (description, test_name)
+ self.supervisor.LogTestFailure(log_line)
- def ProcessLine(self, prefix, line):
+ def ProcessLine(self, line):
results = self.test_start.search(line)
if results:
if self.current_test:
- self.ReportFailure("INCOMPLETE", prefix, self.current_test)
+ self.ReportFailure("INCOMPLETE", self.current_test)
self.current_test = results.group(1)
+ self.supervisor.IncrementTestCount()
return
results = self.test_ok.search(line)
@@ -110,7 +123,7 @@ class ShardRunner(threading.Thread):
results = self.test_fail.search(line)
if results:
- self.ReportFailure("FAILED", prefix, results.group(1))
+ self.ReportFailure("FAILED", results.group(1))
self.current_test = ""
def run(self):
@@ -125,8 +138,7 @@ class ShardRunner(threading.Thread):
index = self.counter.get_nowait()
except Queue.Empty:
break
- prefix = "%i>" % index
- chars = StringIO()
+ chars = cStringIO.StringIO()
shard_running = True
shard = RunShard(
self.supervisor.test, self.supervisor.num_shards, index,
@@ -140,11 +152,12 @@ class ShardRunner(threading.Thread):
line = chars.getvalue()
if not line and not shard_running:
break
- self.ProcessLine(prefix, line)
- line = prefix + line
+ self.ProcessLine(line)
self.supervisor.LogOutputLine(index, line)
chars.close()
- chars = StringIO()
+ chars = cStringIO.StringIO()
+ if self.current_test:
+ self.ReportFailure("INCOMPLETE", prefix, self.current_test)
self.supervisor.ShardIndexCompleted(index)
if shard.returncode != 0:
self.supervisor.LogShardFailure(index)
@@ -159,25 +172,28 @@ class ShardingSupervisor(object):
num_runs: Total number of worker threads to create for running shards.
color: Indicates which coloring mode to use in the output.
gtest_args: The options to pass to gtest.
- failure_log: List of statements from shard output indicating a failure.
+ failed_tests: List of statements from shard output indicating a failure.
failed_shards: List of shards that contained failing tests.
"""
SHARD_COMPLETED = object()
- def __init__(
- self, test, num_shards, num_runs, color, reorder, gtest_args):
+ def __init__(self, test, num_shards, num_runs, color, original_order,
+ prefix, retry_percent, gtest_args):
"""Inits ShardingSupervisor with given options and gtest arguments."""
self.test = test
self.num_shards = num_shards
self.num_runs = num_runs
self.color = color
- self.reorder = reorder
+ self.original_order = original_order
+ self.prefix = prefix
+ self.retry_percent = retry_percent
self.gtest_args = gtest_args
- self.failure_log = []
+ self.failed_tests = []
self.failed_shards = []
self.shards_completed = [False] * num_shards
self.shard_output = [Queue.Queue() for _ in range(num_shards)]
+ self.test_counter = itertools.count()
def ShardTest(self):
"""Runs the test and manages the worker threads.
@@ -185,10 +201,10 @@ class ShardingSupervisor(object):
Runs the test and outputs a summary at the end. All the tests in the
suite are run by creating (cores * runs_per_core) threads and
(cores * shards_per_core) shards. When all the worker threads have
- finished, the lines saved in the failure_log are printed again.
+ finished, the lines saved in failed_tests are printed again.
Returns:
- The number of shards that had failing tests.
+ 1 if some unexpected (not FLAKY or FAILS) tests failed, 0 otherwise.
"""
# Regular expressions for parsing GTest logs. Test names look like
@@ -218,22 +234,38 @@ class ShardingSupervisor(object):
self, counter, test_start, test_ok, test_fail)
worker.start()
workers.append(worker)
- if self.reorder:
- self.WaitForShards()
- else:
+ if self.original_order:
for worker in workers:
worker.join()
+ else:
+ self.WaitForShards()
- return self.PrintSummary(self.failure_log)
-
- def LogLineFailure(self, line):
+ num_failed = len(self.failed_shards)
+ if num_failed > 0:
+ self.failed_shards.sort()
+ self.WriteText(sys.stderr,
+ "\nFAILED SHARDS: %s\n" % str(self.failed_shards),
+ "\x1b[1;5;31m")
+ else:
+ self.WriteText(sys.stderr, "\nALL SHARDS PASSED!\n", "\x1b[1;5;32m")
+ self.PrintSummary(self.failed_tests)
+
+ self.failed_tests = [x for x in self.failed_tests if x.find("FAILS_") < 0]
+ self.failed_tests = [x for x in self.failed_tests if x.find("FLAKY_") < 0]
+ if not self.failed_tests:
+ return 0
+ if self.retry_percent < 0:
+ return len(self.failed_shards) > 0
+ return self.RetryFailedTests()
+
+ def LogTestFailure(self, line):
"""Saves a line in the failure log to be printed at the end.
Args:
- line: The line to save in the failure_log.
+ line: The line to save in the failed_tests list.
"""
- if line not in self.failure_log:
- self.failure_log.append(line)
+ if line not in self.failed_tests:
+ self.failed_tests.append(line)
def LogShardFailure(self, index):
"""Records that a test in the given shard has failed.
@@ -252,39 +284,54 @@ class ShardingSupervisor(object):
sys.stdout.write(line)
def LogOutputLine(self, index, line):
- if self.reorder:
- self.shard_output[index].put(line)
- else:
+ if self.prefix:
+ line = "%i>%s" % (index, line)
+ if self.original_order:
sys.stdout.write(line)
+ else:
+ self.shard_output[index].put(line)
+
+ def IncrementTestCount(self):
+ self.test_counter.next()
def ShardIndexCompleted(self, index):
self.shard_output[index].put(self.SHARD_COMPLETED)
+ def RetryFailedTests(self):
+ num_tests_run = self.test_counter.next()
+ if len(self.failed_tests) > self.retry_percent * num_tests_run:
+ sys.stderr.write("\nNOT RETRYING FAILED TESTS (too many failed)\n")
+ return 1
+ self.WriteText(sys.stderr, "\nRETRYING FAILED TESTS:\n", "\x1b[1;5;33m")
+ sharded_description = re.compile(r": (?:\d+>)?(.*)")
+ gtest_filters = [sharded_description.search(line).group(1)
+ for line in self.failed_tests]
+ failed_retries = []
+
+ for test_filter in gtest_filters:
+ args = [self.test, "--gtest_filter=" + test_filter]
+ args.extend(self.gtest_args)
+ rerun = subprocess.Popen(args)
+ rerun.wait()
+ if rerun.returncode != 0:
+ failed_retries.append(test_filter)
+
+ self.WriteText(sys.stderr, "RETRY RESULTS:\n", "\x1b[1;5;33m")
+ self.PrintSummary(failed_retries)
+ return len(failed_retries) > 0
+
def PrintSummary(self, failed_tests):
"""Prints a summary of the test results.
If any shards had failing tests, the list is sorted and printed. Then all
the lines that indicate a test failure are reproduced.
-
- Returns:
- The number of shards that had failing tests.
"""
- sys.stderr.write("\n")
- num_failed = len(self.failed_shards)
- if num_failed > 0:
- self.failed_shards.sort()
- self.WriteText(sys.stderr,
- "FAILED SHARDS: %s\n" % str(self.failed_shards),
- "\x1b[1;5;31m")
- else:
- self.WriteText(sys.stderr, "ALL SHARDS PASSED!\n", "\x1b[1;5;32m")
if failed_tests:
self.WriteText(sys.stderr, "FAILED TESTS:\n", "\x1b[1;5;31m")
for line in failed_tests:
sys.stderr.write(line)
- if self.color:
- sys.stderr.write("\x1b[m")
- return num_failed
+ else:
+ self.WriteText(sys.stderr, "ALL TESTS PASSED!\n", "\x1b[1;5;32m")
def WriteText(self, pipe, text, ansi):
if self.color:
@@ -316,8 +363,26 @@ def main():
"--reorder", action="store_true",
help="ensure that all output from an earlier shard is printed before"
" output from a later shard")
- parser.add_option("--random-seed", action="store_true",
+ # TODO(charleslee): for backwards compatibility with master.cfg file
+ parser.add_option(
+ "--original-order", action="store_true",
+ help="print shard output in its orginal jumbled order of execution"
+ " (useful for debugging flaky tests)")
+ parser.add_option(
+ "--prefix", action="store_true",
+ help="prefix each line of shard output with 'N>', where N is the shard"
+ " index (forced True when --original-order is True)")
+ parser.add_option(
+ "--random-seed", action="store_true",
help="shuffle the tests with a random seed value")
+ parser.add_option(
+ "--retry-failed", action="store_true",
+ help="retry tests that did not pass serially")
+ parser.add_option(
+ "--retry-percent", type="int",
+ default=SS_DEFAULT_RETRY_PERCENT,
+ help="ignore --retry-failed if more than this percent fail [0, 100]"
+ " (default = %i)" % SS_DEFAULT_RETRY_PERCENT)
parser.disable_interspersed_args()
(options, args) = parser.parse_args()
@@ -339,10 +404,24 @@ def main():
gtest_args = ["--gtest_color=%s" % {
True: "yes", False: "no"}[options.color]] + args[1:]
+ if options.original_order:
+ options.prefix = True
+
+ # TODO(charleslee): for backwards compatibility with buildbot's log_parser
+ if options.reorder:
+ options.original_order = False
+ options.prefix = True
+
if options.random_seed:
seed = random.randint(1, 99999)
gtest_args.extend(["--gtest_shuffle", "--gtest_random_seed=%i" % seed])
+ if options.retry_failed:
+ if options.retry_percent < 0 or options.retry_percent > 100:
+ parser.error("Retry percent must be an integer [0, 100]!")
+ else:
+ options.retry_percent = -1
+
if options.runshard != None:
# run a single shard and exit
if (options.runshard < 0 or options.runshard >= num_shards):
@@ -353,8 +432,9 @@ def main():
return shard.poll()
# shard and run the whole test
- ss = ShardingSupervisor(args[0], num_shards, num_runs, options.color,
- options.reorder, gtest_args)
+ ss = ShardingSupervisor(
+ args[0], num_shards, num_runs, options.color, options.original_order,
+ options.prefix, options.retry_percent, gtest_args)
return ss.ShardTest()