Retry failing tests. Keeps the original list of failing tests for

the purposes of computing statistics at the end and for the sake of the layout test dashboard. Flaky tests are listed as follow: Flaky: Unexpected image failures (1): LayoutTests/media/video-transformed.html = IMAGE PASS For now, they won't turn the bot red or yellow. I'll submit a following change to the log parser to turn the bot yellow when there is unexpected flakiness. Also, removed an extraneous commandline flag and a couple extraneous logging statements. Review URL: http://codereview.chromium.org/402058 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@32452 0039d316-1c4b-4281-b951-d872f2087c98
author: ojan@chromium.org <ojan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-11-19 00:43:45 +0000
committer: ojan@chromium.org <ojan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-11-19 00:43:45 +0000
commit: 5abbf4aa3f7f85fc16569f45324779ecddac51a7 (patch)
tree: 5683a15ee7c1758dd1636f7daca0cf813b939e9b
parent: 14c42626863dc927b1dd0048b27502e67fef6904 (diff)
download: chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.zip
chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.tar.gz
chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.tar.bz2
4 files changed, 223 insertions, 150 deletions
diff --git a/webkit/tools/layout_tests/layout_package/compare_failures.py b/webkit/tools/layout_tests/layout_package/compare_failures.py
index d6e3dd0..010a584 100644
--- a/webkit/tools/layout_tests/layout_package/compare_failures.py
+++ b/webkit/tools/layout_tests/layout_package/compare_failures.py
@@ -12,33 +12,6 @@ import path_utils
 import test_failures
 import test_expectations
 
-
-def PrintFilesFromSet(filenames, header_text, output, opt_expectations=None,
-    opt_relativizeFilenames=True):
-  """A helper method to print a list of files to output.
-
-  Args:
-  filenames: a list of absolute filenames
-  header_text: a string to display before the list of filenames
-  output: file descriptor to write the results to.
-  opt_expectations: expectations that failed for this test
-  """
-  if not len(filenames):
-    return
-
-  filenames = list(filenames)
-  filenames.sort()
-  output.write("\n")
-  output.write("%s (%d):\n" % (header_text, len(filenames)))
-  output_string = "  %s"
-  if opt_expectations:
-    output_string += " = %s" % opt_expectations
-  output_string += "\n"
-  for filename in filenames:
-    if opt_relativizeFilenames:
-      filename = path_utils.RelativeTestFilename(filename)
-    output.write(output_string % filename)
-
 class CompareFailures:
   # A list of which TestFailure classes count as a failure vs a crash.
   FAILURE_TYPES = (test_failures.FailureTextMismatch,
@@ -52,7 +25,7 @@ class CompareFailures:
                    test_failures.FailureMissingImageHash)
 
 
-  def __init__(self, test_files, test_failures, expectations):
+  def __init__(self, test_files, test_failures, expectations, is_flaky):
     """Calculate the regressions in this test run.
 
     Args:
@@ -61,66 +34,103 @@ class CompareFailures:
           TestFailure objects if the test failed
       expectations is a TestExpectations object representing the
           current test status
+      is_flaky is whether this set of failures represents tests that failed
+          the first time around, but passed on a subsequent run
     """
     self._test_files = test_files
     self._test_failures = test_failures
     self._expectations = expectations
+    self._is_flaky = is_flaky
     self._CalculateRegressions()
 
 
+  def PrintFilesFromSet(self, filenames, header_text, output,
+      opt_expectations=None):
+    """A helper method to print a list of files to output.
+
+    Args:
+    filenames: a list of absolute filenames
+    header_text: a string to display before the list of filenames
+    output: file descriptor to write the results to.
+    opt_expectations: expectations that failed for this test
+    """
+    if not len(filenames):
+      return
+
+    filenames = list(filenames)
+    filenames.sort()
+
+    # Print real regressions.
+    if opt_expectations:
+      if self._is_flaky:
+        header_text = "Unexpected flakiness: " + header_text
+      else:
+        header_text = "Regressions: Unexpected " + header_text
+
+    output.write("\n%s (%d):\n" % (header_text, len(filenames)))
+    for filename in filenames:
+      output_string = "  %s" % path_utils.RelativeTestFilename(filename)
+      if opt_expectations:
+        if self._is_flaky:
+          opt_expectations += (" " +
+              self._expectations.GetExpectationsString(filename))
+        output_string += " = %s" % opt_expectations
+
+      output.write(output_string + "\n")
+
   def PrintRegressions(self, output):
     """Write the regressions computed by _CalculateRegressions() to output. """
 
     # Print unexpected passes by category.
     passes = self._regressed_passes
-    PrintFilesFromSet(passes & self._expectations.GetFixableFailures(),
-                      "Expected to fail, but passed",
-                      output)
-    PrintFilesFromSet(passes & self._expectations.GetFixableTimeouts(),
-                      "Expected to timeout, but passed",
-                      output)
-    PrintFilesFromSet(passes & self._expectations.GetFixableCrashes(),
-                      "Expected to crash, but passed",
-                      output)
-
-    PrintFilesFromSet(passes & self._expectations.GetWontFixFailures(),
-                      "Expected to fail (ignored), but passed",
-                      output)
-    PrintFilesFromSet(passes & self._expectations.GetWontFixTimeouts(),
-                      "Expected to timeout (ignored), but passed",
-                      output)
+    self.PrintFilesFromSet(passes & self._expectations.GetFixableFailures(),
+                           "Expected to fail, but passed",
+                           output)
+    self.PrintFilesFromSet(passes & self._expectations.GetFixableTimeouts(),
+                           "Expected to timeout, but passed",
+                           output)
+    self.PrintFilesFromSet(passes & self._expectations.GetFixableCrashes(),
+                           "Expected to crash, but passed",
+                           output)
+
+    self.PrintFilesFromSet(passes & self._expectations.GetWontFixFailures(),
+                           "Expected to fail (ignored), but passed",
+                           output)
+    self.PrintFilesFromSet(passes & self._expectations.GetWontFixTimeouts(),
+                           "Expected to timeout (ignored), but passed",
+                           output)
     # Crashes should never be deferred.
-    PrintFilesFromSet(passes & self._expectations.GetDeferredFailures(),
-                      "Expected to fail (deferred), but passed",
-                      output)
-    PrintFilesFromSet(passes & self._expectations.GetDeferredTimeouts(),
-                      "Expected to timeout (deferred), but passed",
-                      output)
-    # Print real regressions.
-    PrintFilesFromSet(self._regressed_text_failures,
-                      "Regressions: Unexpected text failures",
-                      output,
-                      'TEXT')
-    PrintFilesFromSet(self._regressed_image_failures,
-                      "Regressions: Unexpected image failures",
-                      output,
-                      'IMAGE')
-    PrintFilesFromSet(self._regressed_image_plus_text_failures,
-                      "Regressions: Unexpected image + text failures",
-                      output,
-                      'IMAGE+TEXT')
-    PrintFilesFromSet(self._regressed_hangs,
-                      "Regressions: Unexpected timeouts",
-                      output,
-                      'TIMEOUT')
-    PrintFilesFromSet(self._regressed_crashes,
-                      "Regressions: Unexpected crashes",
-                      output,
-                      'CRASH')
-    PrintFilesFromSet(self._missing,
-                      "Missing expected results",
-                      output,
-                      'MISSING')
+    self.PrintFilesFromSet(passes & self._expectations.GetDeferredFailures(),
+                           "Expected to fail (deferred), but passed",
+                           output)
+    self.PrintFilesFromSet(passes & self._expectations.GetDeferredTimeouts(),
+                           "Expected to timeout (deferred), but passed",
+                           output)
+
+    self.PrintFilesFromSet(self._regressed_text_failures,
+                           "text failures",
+                           output,
+                           'TEXT')
+    self.PrintFilesFromSet(self._regressed_image_failures,
+                           "image failures",
+                           output,
+                           'IMAGE')
+    self.PrintFilesFromSet(self._regressed_image_plus_text_failures,
+                           "image + text failures",
+                           output,
+                           'IMAGE+TEXT')
+    self.PrintFilesFromSet(self._regressed_hangs,
+                           "timeouts",
+                           output,
+                           'TIMEOUT')
+    self.PrintFilesFromSet(self._regressed_crashes,
+                           "crashes",
+                           output,
+                           'CRASH')
+    self.PrintFilesFromSet(self._missing,
+                           "missing expected results",
+                           output,
+                           'MISSING')
 
   def _CalculateRegressions(self):
     """Calculate regressions from this run through the layout tests."""
@@ -178,10 +188,14 @@ class CompareFailures:
         raise ValueError('unexpected failure type:' + f)
       worklist.remove(test)
 
-    for test in worklist:
-      # Check that all passing tests are expected to pass.
-      expectations = self._expectations.GetExpectations(test)
-      if not test_expectations.PASS in expectations: passes.add(test)
+    # When processing flaky failures, the list of actual failures is excluded,
+    # so don't look for unexpected passes.
+    if not self._is_flaky:
+      for test in worklist:
+        # Check that all passing tests are expected to pass.
+        expectations = self._expectations.GetExpectations(test)
+        if not test_expectations.PASS in expectations:
+          passes.add(test)
 
     self._regressed_passes = passes
     self._regressed_crashes = crashes
diff --git a/webkit/tools/layout_tests/layout_package/test_expectations.py b/webkit/tools/layout_tests/layout_package/test_expectations.py
index 80c0e7a..ca78647 100644
--- a/webkit/tools/layout_tests/layout_package/test_expectations.py
+++ b/webkit/tools/layout_tests/layout_package/test_expectations.py
@@ -134,6 +134,20 @@ class TestExpectations:
     # we expect it to pass (and nothing else).
     return set([PASS])
 
+  def GetExpectationsString(self, test):
+    """Returns the expectatons for the given test as an uppercase string.
+    If there are no expectations for the test, then "PASS" is returned."""
+    expectations = self.GetExpectations(test)
+    retval = []
+
+    for expectation in expectations:
+      for item in TestExpectationsFile.EXPECTATIONS.items():
+        if item[1] == expectation:
+          retval.append(item[0])
+          break
+
+    return " ".join(retval).upper()
+
   def GetModifiers(self, test):
     if self._expected_failures.Contains(test):
       return self._expected_failures.GetModifiers(test)
diff --git a/webkit/tools/layout_tests/layout_package/test_shell_thread.py b/webkit/tools/layout_tests/layout_package/test_shell_thread.py
index 865cae8..3c7e52f 100644
--- a/webkit/tools/layout_tests/layout_package/test_shell_thread.py
+++ b/webkit/tools/layout_tests/layout_package/test_shell_thread.py
@@ -312,7 +312,6 @@ class TestShellThread(threading.Thread):
               self._filename_list_queue.get_nowait()
         except Queue.Empty:
           self._KillTestShell()
-          logging.debug("queue empty, quitting test shell thread")
           tests_run_file.close()
           return
 
diff --git a/webkit/tools/layout_tests/run_webkit_tests.py b/webkit/tools/layout_tests/run_webkit_tests.py
index 16cc01c..196b8cc 100755
--- a/webkit/tools/layout_tests/run_webkit_tests.py
+++ b/webkit/tools/layout_tests/run_webkit_tests.py
@@ -120,6 +120,8 @@ class TestRunner:
   # test_shell.exe.
   DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
 
+  NUM_RETRY_ON_UNEXPECTED_FAILURE = 3
+
   def __init__(self, options):
     """Initialize test runner data structures.
 
@@ -439,7 +441,7 @@ class TestRunner:
         return True
     return False
 
-  def _InstantiateTestShellThreads(self, test_shell_binary):
+  def _InstantiateTestShellThreads(self, test_shell_binary, test_files):
     """Instantitates and starts the TestShellThread(s).
 
     Return:
@@ -454,14 +456,8 @@ class TestRunner:
       # about it anyway.
       test_shell_command = self._options.wrapper.split() + test_shell_command
 
-    test_files = self._test_files_list
     filename_queue = self._GetTestFileQueue(test_files)
 
-    # If we have http tests, the first one will be an http test.
-    if ((test_files and test_files[0].find(self.HTTP_SUBDIR) >= 0)
-        or self._options.randomize_order):
-      self._http_server.Start()
-
     # Start Web Socket server.
     if (self._ContainWebSocketTest(test_files)):
       self._websocket_server.Start()
@@ -496,6 +492,56 @@ class TestRunner:
       proc.stdin.close()
       proc.wait()
 
+  def _RunTests(self, test_shell_binary, file_list):
+    """Runs the tests in the file_list.
+
+    Return: A tuple (failures, thread_timings, test_timings,
+        individual_test_timings)
+        failures is a map from test to list of failure types
+        thread_timings is a list of dicts with the total runtime of each thread
+          with 'name', 'num_tests', 'total_time' properties
+        test_timings is a list of timings for each sharded subdirectory of the
+          form [time, directory_name, num_tests]
+        individual_test_timings is a list of run times for each test in the form
+          {filename:filename, test_run_time:test_run_time}
+    """
+    threads = self._InstantiateTestShellThreads(test_shell_binary, file_list)
+
+    # Wait for the threads to finish and collect test failures.
+    failures = {}
+    test_timings = {}
+    individual_test_timings = []
+    thread_timings = []
+    try:
+      for thread in threads:
+        while thread.isAlive():
+          # Let it timeout occasionally so it can notice a KeyboardInterrupt
+          # Actually, the timeout doesn't really matter: apparently it
+          # suffices to not use an indefinite blocking join for it to
+          # be interruptible by KeyboardInterrupt.
+          thread.join(1.0)
+        failures.update(thread.GetFailures())
+        thread_timings.append({ 'name': thread.getName(),
+                                'num_tests': thread.GetNumTests(),
+                                'total_time': thread.GetTotalTime()});
+        test_timings.update(thread.GetDirectoryTimingStats())
+        individual_test_timings.extend(thread.GetIndividualTestStats())
+    except KeyboardInterrupt:
+      for thread in threads:
+        thread.Cancel()
+      self._StopLayoutTestHelper(layout_test_helper_proc)
+      raise
+    for thread in threads:
+      # Check whether a TestShellThread died before normal completion.
+      exception_info = thread.GetExceptionInfo()
+      if exception_info is not None:
+        # Re-raise the thread's exception here to make it clear that
+        # testing was aborted. Otherwise, the tests that did not run
+        # would be assumed to have passed.
+        raise exception_info[0], exception_info[1], exception_info[2]
+
+    return (failures, thread_timings, test_timings, individual_test_timings)
+
   def Run(self):
     """Run all our tests on all our test files.
 
@@ -533,43 +579,27 @@ class TestRunner:
                      "To override, invoke with --nocheck-sys-deps")
         sys.exit(1)
 
-    logging.info("Starting tests")
+    # If we have http tests, the first one will be an http test.
+    if ((self._test_files_list and
+         self._test_files_list[0].find(self.HTTP_SUBDIR) >= 0)
+        or self._options.randomize_order):
+      self._http_server.Start()
 
-    threads = self._InstantiateTestShellThreads(test_shell_binary)
+    original_failures, thread_timings, test_timings, individual_test_timings = (
+        self._RunTests(test_shell_binary, self._test_files_list))
+
+    retries = 0
+    final_failures = original_failures
+    original_regressions = self._CompareFailures(final_failures)
+    regressions = original_regressions
+
+    while retries < self.NUM_RETRY_ON_UNEXPECTED_FAILURE and len(regressions):
+      logging.info("Retrying %d unexpected failure(s)" % len(regressions))
+      retries += 1
+      final_failures = self._RunTests(test_shell_binary, list(regressions))[0]
+      regressions = self._CompareFailures(final_failures)
 
-    # Wait for the threads to finish and collect test failures.
-    failures = {}
-    test_timings = {}
-    individual_test_timings = []
-    thread_timings = []
-    try:
-      for thread in threads:
-        while thread.isAlive():
-          # Let it timeout occasionally so it can notice a KeyboardInterrupt
-          # Actually, the timeout doesn't really matter: apparently it
-          # suffices to not use an indefinite blocking join for it to
-          # be interruptible by KeyboardInterrupt.
-          thread.join(1.0)
-        failures.update(thread.GetFailures())
-        thread_timings.append({ 'name': thread.getName(),
-                                'num_tests': thread.GetNumTests(),
-                                'total_time': thread.GetTotalTime()});
-        test_timings.update(thread.GetDirectoryTimingStats())
-        individual_test_timings.extend(thread.GetIndividualTestStats())
-    except KeyboardInterrupt:
-      for thread in threads:
-        thread.Cancel()
-      self._StopLayoutTestHelper(layout_test_helper_proc)
-      raise
     self._StopLayoutTestHelper(layout_test_helper_proc)
-    for thread in threads:
-      # Check whether a TestShellThread died before normal completion.
-      exception_info = thread.GetExceptionInfo()
-      if exception_info is not None:
-        # Re-raise the thread's exception here to make it clear that
-        # testing was aborted. Otherwise, the tests that did not run
-        # would be assumed to have passed.
-        raise exception_info[0], exception_info[1], exception_info[2]
 
     print
     end_time = time.time()
@@ -587,21 +617,21 @@ class TestRunner:
                   (cuml_time, cuml_time / int(self._options.num_test_shells)))
 
     print
-    self._PrintTimingStatistics(test_timings, individual_test_timings, failures)
+    self._PrintTimingStatistics(test_timings, individual_test_timings,
+        original_failures)
 
-    print "-" * 78
-
-    # Tests are done running. Compare failures with expected failures.
-    regressions = self._CompareFailures(failures)
-
-    print "-" * 78
+    self._PrintRegressions(original_failures, original_regressions,
+        final_failures)
 
     # Write summaries to stdout.
-    result_summary = self._GetResultSummary(failures)
+    # The summary should include flaky tests, so use original_failures, not
+    # final_failures.
+    result_summary = self._GetResultSummary(original_failures)
     self._PrintResultSummary(result_summary, sys.stdout)
 
     if self._options.verbose:
-      self._WriteJSONFiles(failures, individual_test_timings, result_summary);
+      self._WriteJSONFiles(original_failures, individual_test_timings,
+          result_summary);
 
     # Write the same data to a log file.
     out_filename = os.path.join(self._options.results_directory, "score.txt")
@@ -611,14 +641,39 @@ class TestRunner:
 
     # Write the summary to disk (results.html) and maybe open the test_shell
     # to this file.
-    wrote_results = self._WriteResultsHtmlFile(failures, regressions)
+    wrote_results = self._WriteResultsHtmlFile(original_failures,
+        original_regressions)
     if not self._options.noshow_results and wrote_results:
       self._ShowResultsHtmlFile()
 
     sys.stdout.flush()
     sys.stderr.flush()
+    # Ignore flaky failures so we don't turn the bot red for those.
     return len(regressions)
 
+  def _PrintRegressions(self, original_failures, original_regressions,
+      final_failures):
+    """Prints the regressions from the test run.
+    Args:
+      original_failures: Failures from the first test run.
+      original_regressions: Regressions from the first test run.
+      final_failures: Failures after retrying the failures from the first run.
+    """
+    print "-" * 78
+
+    flaky_failures = {}
+    non_flaky_failures = {}
+    for failure in original_failures:
+      if failure not in original_regressions or failure in final_failures:
+        non_flaky_failures[failure] = original_failures[failure]
+      else:
+        flaky_failures[failure] = original_failures[failure]
+
+    self._CompareFailures(non_flaky_failures, print_regressions=True)
+    self._CompareFailures(flaky_failures, print_regressions=True, is_flaky=True)
+
+    print "-" * 78
+
   def _WriteJSONFiles(self, failures, individual_test_timings, result_summary):
     logging.debug("Writing JSON files in %s." % self._options.results_directory)
     # Write a json file of the test_expectations.txt file for the layout tests
@@ -905,30 +960,26 @@ class TestRunner:
               'percent' : float(count) * 100 / total,
               'message' : message }))
 
-  def _CompareFailures(self, failures):
-    """Determine how the test failures from this test run differ from the
-    previous test run and print results to stdout and a file.
+  def _CompareFailures(self, failures, print_regressions=False, is_flaky=False):
+    """Determine if the failures in this test run are unexpected.
 
     Args:
-      failures is a dictionary mapping the test filename to a list of
-      TestFailure objects if the test failed
+      failures: a dictionary mapping the test filename to a list of
+          TestFailure objects if the test failed
+      print_regressions: whether to print the regressions to stdout
+      is_flaky: whether this set of failures represents tests that failed
+          the first time around, but passed on a subsequent run
 
     Return:
       A set of regressions (unexpected failures, hangs, or crashes)
     """
     cf = compare_failures.CompareFailures(self._test_files,
                                           failures,
-                                          self._expectations)
-
-    if not self._options.nocompare_failures:
+                                          self._expectations,
+                                          is_flaky)
+    if print_regressions:
       cf.PrintRegressions(sys.stdout)
 
-      out_filename = os.path.join(self._options.results_directory,
-                                  "regressions.txt")
-      output_file = open(out_filename, "w")
-      cf.PrintRegressions(output_file)
-      output_file.close()
-
     return cf.GetRegressions()
 
   def _WriteResultsHtmlFile(self, failures, regressions):
@@ -1194,11 +1245,6 @@ if '__main__' == __name__:
                            default=False,
                            help="Run all tests, even those marked SKIP in the "
                                 "test list")
-  option_parser.add_option("", "--nocompare-failures", action="store_true",
-                           default=False,
-                           help="Disable comparison to the last test run. "
-                                "When enabled, show stats on how many tests "
-                                "newly pass or fail.")
   option_parser.add_option("", "--num-test-shells",
                            help="Number of testshells to run in parallel.")
   option_parser.add_option("", "--time-out-ms", default=None,
author	ojan@chromium.org <ojan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-11-19 00:43:45 +0000
committer	ojan@chromium.org <ojan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-11-19 00:43:45 +0000
commit	5abbf4aa3f7f85fc16569f45324779ecddac51a7 (patch)
tree	5683a15ee7c1758dd1636f7daca0cf813b939e9b
parent	14c42626863dc927b1dd0048b27502e67fef6904 (diff)
download	chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.zip chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.tar.gz chromium_src-5abbf4aa3f7f85fc16569f45324779ecddac51a7.tar.bz2