summaryrefslogtreecommitdiffstats
path: root/tools/auto_bisect/bisect_perf_regression.py
diff options
context:
space:
mode:
authorrobertocn <robertocn@chromium.org>2014-10-17 18:27:26 -0700
committerCommit bot <commit-bot@chromium.org>2014-10-18 01:27:56 +0000
commit4436048a948a2010062d3c6a171775d9389fd732 (patch)
treec6e38d89231b1e833346db5d61583a9bb5f120da /tools/auto_bisect/bisect_perf_regression.py
parent7beb8e171e62517354c18a56230d0cffd71350a7 (diff)
downloadchromium_src-4436048a948a2010062d3c6a171775d9389fd732.zip
chromium_src-4436048a948a2010062d3c6a171775d9389fd732.tar.gz
chromium_src-4436048a948a2010062d3c6a171775d9389fd732.tar.bz2
Requiring confidence in initial regression range before bisecting.
BUG=422727 Review URL: https://codereview.chromium.org/644323002 Cr-Commit-Position: refs/heads/master@{#300196}
Diffstat (limited to 'tools/auto_bisect/bisect_perf_regression.py')
-rwxr-xr-xtools/auto_bisect/bisect_perf_regression.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/tools/auto_bisect/bisect_perf_regression.py b/tools/auto_bisect/bisect_perf_regression.py
index e80fd43..01b4fab 100755
--- a/tools/auto_bisect/bisect_perf_regression.py
+++ b/tools/auto_bisect/bisect_perf_regression.py
@@ -50,6 +50,7 @@ sys.path.append(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'telemetry'))
from bisect_results import BisectResults
+from bisect_results import ConfidenceScore
import bisect_utils
import builder
import math_utils
@@ -169,6 +170,9 @@ MAX_LINUX_BUILD_TIME = 14400
# The percentage at which confidence is considered high.
HIGH_CONFIDENCE = 95
+# The confidence percentage we require to consider the initial range a
+# regression based on the test results of the inital good and bad revisions.
+REGRESSION_CONFIDENCE = 95
# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
@@ -2471,6 +2475,19 @@ class BisectPerformanceMetrics(object):
return results
print message, "Therefore we continue to bisect."
+ # Check how likely it is that the good and bad results are different
+ # beyond chance-induced variation.
+ if not self.opts.debug_ignore_regression_confidence:
+ regression_confidence = ConfidenceScore(known_bad_value['values'],
+ known_good_value['values'])
+ if regression_confidence < REGRESSION_CONFIDENCE:
+ results.error = ('We could not reproduce the regression with this '
+ 'test/metric/platform combination with enough '
+ 'confidence. There\'s still a chance that this is '
+ 'actually a regression, but you may need to bisect '
+ 'a different platform.')
+ return results
+
# Can just mark the good and bad revisions explicitly here since we
# already know the results.
bad_revision_data = revision_data[revision_list[0]]
@@ -2968,6 +2985,7 @@ class BisectOptions(object):
self.debug_ignore_build = None
self.debug_ignore_sync = None
self.debug_ignore_perf_test = None
+ self.debug_ignore_regression_confidence = None
self.debug_fake_first_test_mean = 0
self.gs_bucket = None
self.target_arch = 'ia32'
@@ -3135,6 +3153,10 @@ class BisectOptions(object):
group.add_option('--debug_ignore_perf_test',
action='store_true',
help='DEBUG: Don\'t perform performance tests.')
+ group.add_option('--debug_ignore_regression_confidence',
+ action='store_true',
+ help='DEBUG: Don\'t score the confidence of the initial '
+ 'good and bad revisions\' test results.')
group.add_option('--debug_fake_first_test_mean',
type='int',
default='0',