diff options
author | jparent@chromium.org <jparent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-18 23:59:34 +0000 |
---|---|---|
committer | jparent@chromium.org <jparent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-18 23:59:34 +0000 |
commit | f950f9dd72164d51b03bf23fa51a49d23c04eaad (patch) | |
tree | dd7b48035f26f68a7eddc20fcbe908c0932d556b /webkit | |
parent | bac71bdd5c5f36acffbbca3112401ff3dd4d5ab1 (diff) | |
download | chromium_src-f950f9dd72164d51b03bf23fa51a49d23c04eaad.zip chromium_src-f950f9dd72164d51b03bf23fa51a49d23c04eaad.tar.gz chromium_src-f950f9dd72164d51b03bf23fa51a49d23c04eaad.tar.bz2 |
Add a % failure column to the flakiness dashboard.
This makes it easy to defend changing a test because you have the numeric evidence of "this test was flaky 43% of the time before".
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/399093
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@32439 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit')
-rw-r--r-- | webkit/tools/layout_tests/dashboards/dashboard_base.js | 8 | ||||
-rw-r--r-- | webkit/tools/layout_tests/flakiness_dashboard.html | 29 |
2 files changed, 29 insertions, 8 deletions
diff --git a/webkit/tools/layout_tests/dashboards/dashboard_base.js b/webkit/tools/layout_tests/dashboards/dashboard_base.js index 803b6b7..a8618de 100644 --- a/webkit/tools/layout_tests/dashboards/dashboard_base.js +++ b/webkit/tools/layout_tests/dashboards/dashboard_base.js @@ -60,6 +60,14 @@ var CHROME_REVISIONS_KEY = 'chromeRevision'; var WEBKIT_REVISIONS_KEY = 'webkitRevision'; /** + * @return {boolean} Whether the value represents a failing result. + */ +function isFailingResult(value) { + return 'FSTOCIZ'.indexOf(value) != -1; +} + + +/** * Takes a key and a value and sets the currentState[key] = value iff key is * a valid hash parameter and the value is a valid value for that key. Handles * cross-dashboard parameters then falls back to calling diff --git a/webkit/tools/layout_tests/flakiness_dashboard.html b/webkit/tools/layout_tests/flakiness_dashboard.html index fb5dcc6..6eddd54 100644 --- a/webkit/tools/layout_tests/flakiness_dashboard.html +++ b/webkit/tools/layout_tests/flakiness_dashboard.html @@ -278,7 +278,7 @@ }; var BUILD_TYPES = {'DEBUG': 'DBG', 'RELEASE': 'RELEASE'}; var BASE_TABLE_HEADERS = ['bugs', 'modifiers', 'expectations', 'missing', - 'extra', 'slowest run', 'flakiness (numbers are runtimes in seconds)']; + 'extra', 'slowest run', '% fail', 'flakiness (numbers are runtimes in seconds)']; var MIN_SECONDS_FOR_SLOW_TEST = 4; var MIN_SECONDS_FOR_SLOW_TEST_DEBUG = 2 * MIN_SECONDS_FOR_SLOW_TEST; var FAIL_RESULTS = ['IMAGE', 'IMAGE+TEXT', 'TEXT', 'SIMPLIFIED', 'OTHER']; @@ -436,7 +436,8 @@ expectationsHTML: '', // HTML for modifiers for this test for all platforms modifiersHTML: '', - rawResults: '' + rawResults: '', + percentFailed: 0 }; } @@ -924,7 +925,7 @@ resultsForTest.rawResults = rawResults; resultsForTest.flips = rawResults.length - 1; - var times = resultsByBuilder[builderName].tests[test].times; + var times = resultsForTest.rawTimes; var numTimesSeen = 0; var numResultsSeen = 0; var resultsIndex = 0; @@ -943,7 +944,7 @@ if (rawResults && rawResults[resultsIndex]) currentResult = rawResults[resultsIndex][1]; - time = times[i][1] + var time = times[i][1] // Ignore times for crashing/timeout runs for the sake of seeing if // a test should be marked slow. @@ -995,6 +996,7 @@ var numResultsSeen = 0; var haveSeenNonFlakeResult = false; var numRealResults = 0; + var failedCount = 0; var seenResults = {}; for (var i = 0; @@ -1003,25 +1005,32 @@ var numResults = rawResults[i][0]; numResultsSeen += numResults; + var result = rawResults[i][1]; + if (isFailingResult(result)) { + failedCount += numResults; + } + var hasMinRuns = numResults >= MIN_RUNS_FOR_FLAKE; if (haveSeenNonFlakeResult && hasMinRuns) { continue; } else if (hasMinRuns) { haveSeenNonFlakeResult = true; - } else if (!seenResults[rawResults[i][1]]) { + } else if (!seenResults[result]) { // Only consider a short-lived result if we've seen it more than once. // Otherwise, we include lots of false-positives due to tests that fail // for a couple runs and then start passing. - seenResults[rawResults[i][1]] = true; + seenResults[result] = true; continue; } - var expectation = getExpectationsFileStringForResult(rawResults[i][1]); + var expectation = getExpectationsFileStringForResult(result); resultsMap[expectation] = true; numRealResults++; } resultsForTest.isFlaky = numRealResults > 1; + // Calculate the % of times the test failed - how flaky is it? + resultsForTest.percentFailed = Math.round(failedCount / numResultsSeen * 100); var expectationsArray = resultsForTest.expectations ? resultsForTest.expectations.split(' ') : []; @@ -1333,6 +1342,7 @@ '</td><td>' + test.missing + '</td><td>' + test.extra + '</td><td>' + (test.slowestTime ? test.slowestTime + 's' : '') + + '</td><td>' + test.percentFailed + '</td>' + getHtmlForTestResults(test) + '</tr>'; } @@ -1428,7 +1438,10 @@ } else if (column == 'slowest') { sortFunctionGetter = getNumericSort; resultsProperty = 'slowestTime'; - } else { + } else if (column == '%') { + sortFunctionGetter = getNumericSort; + resultsProperty = 'percentFailed'; + } else { sortFunctionGetter = getAlphanumericCompare; resultsProperty = column; } |