summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornednguyen <nednguyen@google.com>2015-05-22 12:26:59 -0700
committerCommit bot <commit-bot@chromium.org>2015-05-22 19:27:27 +0000
commitcd42fc33c58a58b4067f5c3c578634205c214958 (patch)
tree68f6f5640b4146dbc3fa2043267a743533fc2004
parent7fcbd6df138bad8ade9eebe222a293cd577a134d (diff)
downloadchromium_src-cd42fc33c58a58b4067f5c3c578634205c214958.zip
chromium_src-cd42fc33c58a58b4067f5c3c578634205c214958.tar.gz
chromium_src-cd42fc33c58a58b4067f5c3c578634205c214958.tar.bz2
Enable more benchmark_smoke_unittest coverage (Reland)
Previously, benchmark_smoke_unittest has a heuristic to pick a benchmark for smoke testing if the benchmark uses a test in measurement directory. This patch removes that check, and instead provide a black list of benchmarks module that we exclude the benchmark_smoke_unittest to avoid high CQ time. This helped increases benchmark smoke coverage from 14 benchmarks -> 33 benchmarks (one benchmark per benchmark module). On my local linux machine, this increases the cycle time of benchmark_smoke_unittest from 20s -> 1m7s. To review this, the 1st patch is from issue 1151013002 at patchset 20001 (http://crrev.com/1151013002#ps20001). 2nd patch adds image_decoding to the black list. BUG= CQ_EXTRA_TRYBOTS=tryserver.chromium.perf:linux_perf_bisect;tryserver.chromium.perf:mac_perf_bisect;tryserver.chromium.perf:win_perf_bisect;tryserver.chromium.perf:android_nexus5_perf_bisect Review URL: https://codereview.chromium.org/1154913002 Cr-Commit-Position: refs/heads/master@{#331157}
-rw-r--r--tools/perf/benchmarks/benchmark_smoke_unittest.py41
-rw-r--r--tools/telemetry/telemetry/user_story/user_story_set.py2
2 files changed, 31 insertions, 12 deletions
diff --git a/tools/perf/benchmarks/benchmark_smoke_unittest.py b/tools/perf/benchmarks/benchmark_smoke_unittest.py
index 4630c5b..a25f713 100644
--- a/tools/perf/benchmarks/benchmark_smoke_unittest.py
+++ b/tools/perf/benchmarks/benchmark_smoke_unittest.py
@@ -10,14 +10,23 @@ of every benchmark would run impractically long.
"""
import os
+import sys
+import time
import unittest
from telemetry import benchmark as benchmark_module
from telemetry.core import discover
-from telemetry.page import page_test
from telemetry.unittest_util import options_for_unittests
from telemetry.unittest_util import progress_reporter
+from benchmarks import dom_perf
+from benchmarks import indexeddb_perf
+from benchmarks import image_decoding
+from benchmarks import rasterize_and_record_micro
+from benchmarks import spaceport
+from benchmarks import speedometer
+from benchmarks import jetstream
+
def SmokeTestGenerator(benchmark):
# NOTE TO SHERIFFS: DO NOT DISABLE THIS TEST.
@@ -56,34 +65,44 @@ def SmokeTestGenerator(benchmark):
benchmark.ProcessCommandLineArgs(None, options)
benchmark_module.ProcessCommandLineArgs(None, options)
- self.assertEqual(0, SinglePageBenchmark().Run(options),
- msg='Failed: %s' % benchmark)
+ current = time.time()
+ try:
+ self.assertEqual(0, SinglePageBenchmark().Run(options),
+ msg='Failed: %s' % benchmark)
+ finally:
+ print 'Benchmark %s run takes %i seconds' % (
+ benchmark.Name(), time.time() - current)
return BenchmarkSmokeTest
+# The list of benchmark modules to be excluded from our smoke tests.
+_BLACK_LIST_TEST_MODULES = {
+ dom_perf, # Always fails on cq bot.
+ image_decoding, # Always fails on Mac10.9 Tests builder.
+ indexeddb_perf, # Always fails on Win7 & Android Tests builder.
+ rasterize_and_record_micro, # Always fails on cq bot.
+ spaceport, # Takes 451 seconds.
+ speedometer, # Takes 101 seconds.
+ jetstream, # Take 206 seconds.
+}
+
+
def load_tests(loader, standard_tests, pattern):
del loader, standard_tests, pattern # unused
suite = progress_reporter.TestSuite()
benchmarks_dir = os.path.dirname(__file__)
top_level_dir = os.path.dirname(benchmarks_dir)
- measurements_dir = os.path.join(top_level_dir, 'measurements')
- all_measurements = discover.DiscoverClasses(
- measurements_dir, top_level_dir, page_test.PageTest).values()
# Using the default of |index_by_class_name=False| means that if a module
# has multiple benchmarks, only the last one is returned.
all_benchmarks = discover.DiscoverClasses(
benchmarks_dir, top_level_dir, benchmark_module.Benchmark,
index_by_class_name=False).values()
for benchmark in all_benchmarks:
- if hasattr(benchmark, 'test') and benchmark.test not in all_measurements:
- # If the benchmark does not have a measurement, then it is not composable.
- # Ideally we'd like to test these as well, but the non-composable
- # benchmarks are usually long-running benchmarks.
+ if sys.modules[benchmark.__module__] in _BLACK_LIST_TEST_MODULES:
continue
-
# TODO(tonyg): Smoke doesn't work with session_restore yet.
if (benchmark.Name().startswith('session_restore') or
benchmark.Name().startswith('skpicture_printer')):
diff --git a/tools/telemetry/telemetry/user_story/user_story_set.py b/tools/telemetry/telemetry/user_story/user_story_set.py
index a0473d7..d18941c 100644
--- a/tools/telemetry/telemetry/user_story/user_story_set.py
+++ b/tools/telemetry/telemetry/user_story/user_story_set.py
@@ -43,7 +43,7 @@ class UserStorySet(object):
self._cloud_storage_bucket = cloud_storage_bucket
if base_dir:
if not os.path.isdir(base_dir):
- raise ValueError('Must provide valid directory path for base_dir.')
+ raise ValueError('Invalid directory path of base_dir: %s' % base_dir)
self._base_dir = base_dir
else:
self._base_dir = os.path.dirname(inspect.getfile(self.__class__))