diff options
author | dpranke@google.com <dpranke@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-09 23:40:21 +0000 |
---|---|---|
committer | dpranke@google.com <dpranke@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-09 23:40:21 +0000 |
commit | 2ce33b377242a0b75645eb653c02a60f2a238f9b (patch) | |
tree | 8cdeefd1fd4bfbd03f010acbef1402cf06c6b4d2 | |
parent | d3a1b3c81b49ea4089ef19237c51508670acffca (diff) | |
download | chromium_src-2ce33b377242a0b75645eb653c02a60f2a238f9b.zip chromium_src-2ce33b377242a0b75645eb653c02a60f2a238f9b.tar.gz chromium_src-2ce33b377242a0b75645eb653c02a60f2a238f9b.tar.bz2 |
Update run_webkit_tests to generate an 'unexpected_results.json' file in the
layout-test-results directory instead of a regressions.txt. This file contains
more information in an easier-to-mechanically-consume format.
Also, refactor the print routines to use the unexpected-results struct, which cleans
up a bunch of stuff.
R=ojan@chromium.org, pam@chromium.org
TEST=none
BUG=none
Review URL: http://codereview.chromium.org/465090
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@34199 0039d316-1c4b-4281-b951-d872f2087c98
-rwxr-xr-x | webkit/tools/layout_tests/run_webkit_tests.py | 400 |
1 files changed, 190 insertions, 210 deletions
diff --git a/webkit/tools/layout_tests/run_webkit_tests.py b/webkit/tools/layout_tests/run_webkit_tests.py index 0a0c09a..a030197 100755 --- a/webkit/tools/layout_tests/run_webkit_tests.py +++ b/webkit/tools/layout_tests/run_webkit_tests.py @@ -50,6 +50,9 @@ from test_types import image_diff from test_types import test_type_base from test_types import text_diff +sys.path.append(path_utils.PathFromBase('third_party')) +import simplejson + TestExpectationsFile = test_expectations.TestExpectationsFile class TestInfo: @@ -396,8 +399,7 @@ class TestRunner: tests_by_dir = {} for test_file in test_files: directory = self._GetDirForTestFile(test_file) - if directory not in tests_by_dir: - tests_by_dir[directory] = [] + tests_by_dir.setdefault(directory, []) tests_by_dir[directory].append(self._GetTestInfoForFile(test_file)) # Sort by the number of tests in the dir so that the ones with the most @@ -613,31 +615,18 @@ class TestRunner: thread_timings, test_timings, individual_test_timings = ( self._RunTests(test_shell_binary, self._test_files_list, result_summary)) - original_results_by_test, original_results_by_type = self._CompareResults( - result_summary) - final_results_by_test = original_results_by_test - final_results_by_type = original_results_by_type # We exclude the crashes from the list of results to retry, because # we want to treat even a potentially flaky crash as an error. - failed_results_by_test, failed_results_by_type = self._OnlyFailures( - original_results_by_test, include_crashes=False) - num_passes = len(original_results_by_test) - len(failed_results_by_test) - + failures = self._GetFailures(result_summary, include_crashes=False) retries = 0 - while (retries < self.NUM_RETRY_ON_UNEXPECTED_FAILURE and - len(failed_results_by_test)): - logging.info("Retrying %d unexpected failure(s)" % - len(failed_results_by_test)) + retry_summary = result_summary + while (retries < self.NUM_RETRY_ON_UNEXPECTED_FAILURE and len(failures)): + logging.info("Retrying %d unexpected failure(s)" % len(failures)) retries += 1 - retry_summary = ResultSummary(self._expectations, - failed_results_by_test.keys()) - self._RunTests(test_shell_binary, failed_results_by_test.keys(), - retry_summary) - final_results_by_test, final_results_by_type = self._CompareResults( - retry_summary) - failed_results_by_test, failed_results_by_type = self._OnlyFailures( - final_results_by_test, include_crashes=True) + retry_summary = ResultSummary(self._expectations, failures.keys()) + self._RunTests(test_shell_binary, failures.keys(), retry_summary) + failures = self._GetFailures(retry_summary, include_crashes=True) self._StopLayoutTestHelper(layout_test_helper_proc) @@ -659,18 +648,17 @@ class TestRunner: self._PrintTimingStatistics(test_timings, individual_test_timings, result_summary) - self._PrintUnexpectedResults(original_results_by_test, - original_results_by_type, - final_results_by_test, - final_results_by_type) + unexpected_results = self._SummarizeUnexpectedResults(result_summary, + retry_summary) # Write summaries to stdout. + self._PrintUnexpectedResults(unexpected_results) + # The summaries should include flaky tests, so use the original summary, # not the final one. self._PrintResultSummary(result_summary, sys.stdout) - - if self._options.verbose: - self._WriteJSONFiles(result_summary, individual_test_timings); + self._WriteJSONFiles(unexpected_results, result_summary, + individual_test_timings) # Write the same data to a log file. out_filename = os.path.join(self._options.results_directory, "score.txt") @@ -686,9 +674,10 @@ class TestRunner: sys.stdout.flush() sys.stderr.flush() + # Ignore flaky failures and unexpected passes so we don't turn the # bot red for those. - return len(failed_results_by_test) + return unexpected_results['num_regressions'] def _UpdateSummary(self, result_summary): """Update the summary while running tests.""" @@ -700,49 +689,124 @@ class TestRunner: except Queue.Empty: return - def _CompareResults(self, result_summary): - """Determine if the results in this test run are unexpected. - - Returns: - A dict of files -> results and a dict of results -> sets of files - """ - results_by_test = {} - results_by_type = {} - for result in TestExpectationsFile.EXPECTATIONS.values(): - results_by_type[result] = set() - for test, result in result_summary.unexpected_results.iteritems(): - results_by_test[test] = result - results_by_type[result].add(test) - return results_by_test, results_by_type - - def _OnlyFailures(self, results_by_test, include_crashes): + def _GetFailures(self, result_summary, include_crashes): """Filters a dict of results and returns only the failures. Args: - results_by_test: a dict of files -> results + result_summary: the results of the test run include_crashes: whether crashes are included in the output. We use False when finding the list of failures to retry to see if the results were flaky. Although the crashes may also be flaky, we treat them as if they aren't so that they're not ignored. Returns: - a dict of files -> results and results -> sets of files. + a dict of files -> results """ - failed_results_by_test = {} - failed_results_by_type = {} - for test, result in results_by_test.iteritems(): + failed_results = {} + for test, result in result_summary.unexpected_results.iteritems(): if (result == test_expectations.PASS or result == test_expectations.CRASH and not include_crashes): continue + failed_results[test] = result + + return failed_results + + def _SummarizeUnexpectedResults(self, result_summary, retry_summary): + """Summarize any unexpected results as a dict. + + TODO(dpranke): split this data structure into a separate class? - failed_results_by_test[test] = result - if result in failed_results_by_type: - failed_results_by_type.add(test) + Args: + result_summary: summary object from initial test runs + retry_summary: summary object from final test run of retried tests + Returns: + A dictionary containing a summary of the unexpected results from the + run, with the following fields: + 'version': a version indicator (1 in this version) + 'fixable': # of fixable tests (NOW - PASS) + 'skipped': # of skipped tests (NOW & SKIPPED) + 'num_regressions': # of non-flaky failures + 'num_flaky': # of flaky failures + 'num_passes': # of unexpected passes + 'tests': a dict of tests -> { 'expected' : '...', 'actual' : '...' } + """ + results = {} + results['version'] = 1 + + tbe = result_summary.tests_by_expectation + tbt = result_summary.tests_by_timeline + results['fixable'] = len(tbt[test_expectations.NOW] - + tbe[test_expectations.PASS]) + results['skipped'] = len(tbt[test_expectations.NOW] & + tbe[test_expectations.SKIP]) + + num_passes = 0 + num_flaky = 0 + num_regressions = 0 + keywords = {} + for k, v in TestExpectationsFile.EXPECTATIONS.iteritems(): + keywords[v] = k.upper() + + tests = {} + for filename, result in result_summary.unexpected_results.iteritems(): + # Note that if a test crashed in the original run, we ignore whether or + # not it crashed when we retried it (if we retried it), and always + # consider the result not flaky. + test = path_utils.RelativeTestFilename(filename) + expected = self._expectations.GetExpectationsString(filename) + actual = [keywords[result]] + + if result == test_expectations.PASS: + num_passes += 1 + elif result == test_expectations.CRASH: + num_regressions += 1 else: - failed_results_by_type = set([test]) - return failed_results_by_test, failed_results_by_type + if filename not in retry_summary.unexpected_results: + actual.extend( + self._expectations.GetExpectationsString(filename).split(" ")) + num_flaky += 1 + else: + retry_result = retry_summary.unexpected_results[filename] + if result != retry_result: + actual.append(keywords[retry_result]) + num_flaky += 1 + else: + num_regressions += 1 + + tests[test] = {} + tests[test]['expected'] = expected + tests[test]['actual'] = " ".join(actual) + + results['tests'] = tests + results['num_passes'] = num_passes + results['num_flaky'] = num_flaky + results['num_regressions'] = num_regressions + + return results + + def _WriteJSONFiles(self, unexpected_results, result_summary, + individual_test_timings): + """Writes the results of the test run as JSON files into the results dir. + + There are three different files written into the results dir: + unexpected_results.json: A short list of any unexpected results. This + is used by the buildbots to display results. + expectations.json: This is used by the flakiness dashboard. + results.json: A full list of the results - used by the flakiness + dashboard and the aggregate results dashboard. - def _WriteJSONFiles(self, result_summary, individual_test_timings): + Args: + unexpected_results: dict of unexpected results + result_summary: full summary object + individual_test_timings: list of test times (used by the flakiness + dashboard). + """ logging.debug("Writing JSON files in %s." % self._options.results_directory) + unexpected_file = open(os.path.join(self._options.results_directory, + "unexpected_results.json"), "w") + unexpected_file.write(simplejson.dumps(unexpected_results, sort_keys=True, + indent=2)) + unexpected_file.close() + # Write a json file of the test_expectations.txt file for the layout tests # dashboard. expectations_file = open(os.path.join(self._options.results_directory, @@ -962,6 +1026,7 @@ class TestRunner: len(result_summary.tests_by_expectation[test_expectations.PASS] & result_summary.tests_by_timeline[timeline])) output.write("=> %s (%d):\n" % (heading, not_passing)) + for result in TestExpectationsFile.EXPECTATION_ORDER: if result == test_expectations.PASS: continue @@ -975,154 +1040,68 @@ class TestRunner: (len(results), plural[len(results) != 1], pct, desc[len(results) != 1])) - def _PrintUnexpectedResults(self, original_results_by_test, - original_results_by_type, - final_results_by_test, - final_results_by_type): - """Print unexpected results (regressions) to stdout and a file. - - Args: - original_results_by_test: dict mapping tests -> results for the first - (original) test run - original_results_by_type: dict mapping results -> sets of tests for the - first test run - final_results_by_test: dict of tests->results after the retries - eliminated any flakiness - final_results_by_type: dict of results->tests after the retries - eliminated any flakiness - """ - print "-" * 78 - print - flaky_results_by_type = {} - non_flaky_results_by_type = {} - - for test, result in original_results_by_test.iteritems(): - # Note that if a test crashed in the original run, we ignore whether or - # not it crashed when we retried it (if we retried it), and always - # consider the result "not flaky". - if (result == test_expectations.PASS or - result == test_expectations.CRASH or - (test in final_results_by_test and - result == final_results_by_test[test])): - if result in non_flaky_results_by_type: - non_flaky_results_by_type[result].add(test) + def _PrintUnexpectedResults(self, unexpected_results): + """Prints any unexpected results in a human-readable form to stdout.""" + passes = {} + flaky = {} + regressions = {} + + for test, results in unexpected_results['tests'].iteritems(): + actual = results['actual'].split(" ") + expected = results['expected'].split(" ") + + if actual == ['PASS']: + if 'CRASH' in expected: + _AddToDictOfLists(passes, 'Expected to crash, but passed', test) + elif 'TIMEOUT' in expected: + _AddToDictOfLists(passes, 'Expected to timeout, but passed', test) else: - non_flaky_results_by_type[result] = set([test]) + _AddToDictOfLists(passes, 'Expected to fail, but passed', test) + elif len(actual) > 1: + # We group flaky tests by the first actual result we got. + _AddToDictOfLists(flaky, actual[0], test) else: - if result in flaky_results_by_type: - flaky_results_by_type[result].add(test) - else: - flaky_results_by_type[result] = set([test]) - - self._PrintUnexpectedResultsByType(non_flaky_results_by_type, False, - sys.stdout) - self._PrintUnexpectedResultsByType(flaky_results_by_type, True, sys.stdout) - - out_filename = os.path.join(self._options.results_directory, - "regressions.txt") - output_file = open(out_filename, "w") - self._PrintUnexpectedResultsByType(non_flaky_results_by_type, False, - output_file) - output_file.close() - - print "-" * 78 - - def _PrintUnexpectedResultsByType(self, results_by_type, is_flaky, output): - """Helper method to print a set of unexpected results to an output stream - sorted by the result type. - - Args: - results_by_type: dict(result_type -> list of files) - is_flaky: where these results flaky or not (changes the output) - output: stream to write output to - """ - descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS - keywords = {} - for k, v in TestExpectationsFile.EXPECTATIONS.iteritems(): - keywords[v] = k.upper() - - for result in TestExpectationsFile.EXPECTATION_ORDER: - if result in results_by_type: - self._PrintUnexpectedResultSet(output, results_by_type[result], - is_flaky, descriptions[result][1], - keywords[result]) - - def _PrintUnexpectedResultSet(self, output, filenames, is_flaky, - header_text, keyword): - """A helper method to print one set of results (all of the same type) - to a stream. - - Args: - output: a stream or file object to write() to - filenames: a list of absolute filenames - header_text: a string to display before the list of filenames - keyword: expectation keyword - """ - filenames = list(filenames) - filenames.sort() - if not is_flaky and keyword == 'PASS': - self._PrintUnexpectedPasses(output, filenames) - return - - if is_flaky: - prefix = "Unexpected flakiness:" - else: - prefix = "Regressions: Unexpected" - output.write("%s %s (%d):\n" % (prefix, header_text, len(filenames))) - for filename in filenames: - flaky_text = "" - if is_flaky: - flaky_text = " " + self._expectations.GetExpectationsString(filename) - - filename = path_utils.RelativeTestFilename(filename) - output.write(" %s = %s%s\n" % (filename, keyword, flaky_text)) - output.write("\n") - - def _PrintUnexpectedPasses(self, output, filenames): - """Prints the list of files that passed unexpectedly. - - TODO(dpranke): This routine is a bit of a hack, since it's not clear - what the best way to output this is. Each unexpected pass might have - multiple expected results, and printing all the combinations would - be pretty clunky. For now we sort them into three buckets, crashes, - timeouts, and everything else. - - Args: - output: a stream to write to - filenames: list of files that passed - """ - crashes = [] - timeouts = [] - failures = [] - for filename in filenames: - expectations = self._expectations.GetExpectations(filename) - if test_expectations.CRASH in expectations: - crashes.append(filename) - elif test_expectations.TIMEOUT in expectations: - timeouts.append(filename) - else: - failures.append(filename) - - self._PrintPassSet(output, "crash", crashes) - self._PrintPassSet(output, "timeout", timeouts) - self._PrintPassSet(output, "fail", failures) - - def _PrintPassSet(self, output, expected_str, filenames): - """Print a set of unexpected passes. - - Args: - output: stream to write to - expected_str: worst expectation for the given file - filenames: list of files in that set - """ - if len(filenames): - filenames.sort() - output.write("Expected to %s, but passed: (%d)\n" % - (expected_str, len(filenames))) - for filename in filenames: - filename = path_utils.RelativeTestFilename(filename) - output.write(" %s\n" % filename) - output.write("\n") + _AddToDictOfLists(regressions, results['actual'], test) + + if len(passes): + for key, tests in passes.iteritems(): + print "%s: (%d)" % (key, len(tests)) + tests.sort() + for test in tests: + print " %s" % test + print + + if len(flaky): + descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS + for key, tests in flaky.iteritems(): + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + print "Unexpected flakiness: %s (%d)" % ( + descriptions[result][1], len(tests)) + tests.sort() + + for test in tests: + actual = unexpected_results['tests'][test]['actual'].split(" ") + expected = unexpected_results['tests'][test]['expected'].split(" ") + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + new_expectations_list = list(set(actual) | set(expected)) + print " %s = %s" % (test, " ".join(new_expectations_list)) + print + + if len(regressions): + descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS + for key, tests in regressions.iteritems(): + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + print "Regressions: Unexpected %s : (%d)" % ( + descriptions[result][1], len(tests)) + tests.sort() + for test in tests: + print " %s = %s" % (test, key) + print + print + + if len(unexpected_results['tests']): + print "" + print "-" * 78 def _WriteResultsHtmlFile(self, result_summary): """Write results.html which is a summary of tests that failed. @@ -1134,13 +1113,12 @@ class TestRunner: True if any results were written (since expected failures may be omitted) """ # test failures - failures = result_summary.failures - failed_results_by_test, failed_results_by_type = self._OnlyFailures( - result_summary.unexpected_results, include_crashes=True) if self._options.full_results_html: - test_files = failures.keys() + test_files = result_summary.failures.keys() else: - test_files = failed_results_by_test.keys() + unexpected_failures = self._GetFailures(result_summary, + include_crashes=True) + test_files = unexpected_failures.keys() if not len(test_files): return False @@ -1158,8 +1136,7 @@ class TestRunner: test_files.sort() for test_file in test_files: - if test_file in failures: test_failures = failures[test_file] - else: test_failures = [] # unexpected passes + test_failures = result_summary.failures.get(test_file, []) out_file.write("<p><a href='%s'>%s</a><br />\n" % (path_utils.FilenameToUri(test_file), path_utils.RelativeTestFilename(test_file))) @@ -1181,6 +1158,9 @@ class TestRunner: path_utils.FilenameToUri(results_filename)]) +def _AddToDictOfLists(dict, key, value): + dict.setdefault(key, []).append(value) + def ReadTestFiles(files): tests = [] for file in files: |