summaryrefslogtreecommitdiffstats
path: root/tools/bisect-builds.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/bisect-builds.py')
-rw-r--r--tools/bisect-builds.py446
1 files changed, 240 insertions, 206 deletions
diff --git a/tools/bisect-builds.py b/tools/bisect-builds.py
index 68ceb3c..299aade 100644
--- a/tools/bisect-builds.py
+++ b/tools/bisect-builds.py
@@ -14,7 +14,6 @@ it will ask you whether it is good or bad before continuing the search.
# The root URL for storage.
BASE_URL = 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'
-BASE_URL_RECENT = 'http://build.chromium.org/f/chromium/snapshots'
# URL to the ViewVC commit page.
BUILD_VIEWVC_URL = 'http://src.chromium.org/viewvc/chrome?view=rev&revision=%d'
@@ -31,8 +30,10 @@ import os
import pipes
import re
import shutil
+import subprocess
import sys
import tempfile
+import threading
import urllib
from xml.etree import ElementTree
import zipfile
@@ -40,13 +41,12 @@ import zipfile
class PathContext(object):
"""A PathContext is used to carry the information used to construct URLs and
paths when dealing with the storage server and archives."""
- def __init__(self, platform, good_revision, bad_revision, use_recent):
+ def __init__(self, platform, good_revision, bad_revision):
super(PathContext, self).__init__()
# Store off the input parameters.
self.platform = platform # What's passed in to the '-a/--archive' option.
self.good_revision = good_revision
self.bad_revision = bad_revision
- self.use_recent = use_recent
# The name of the ZIP file in a revision directory on the server.
self.archive_name = None
@@ -74,7 +74,7 @@ class PathContext(object):
self._archive_extract_dir = 'chrome-win32'
self._binary_name = 'chrome.exe'
else:
- raise Exception("Invalid platform")
+ raise Exception('Invalid platform: %s' % self.platform)
def GetListingURL(self, marker=None):
"""Returns the URL for a directory listing, with an optional marker."""
@@ -84,19 +84,10 @@ class PathContext(object):
return BASE_URL + '/?delimiter=/&prefix=' + self._listing_platform_dir + \
marker_param
- def GetListingURLRecent(self):
- """Returns the URL for a directory listing of recent builds."""
- return BASE_URL_RECENT + '/' + self._listing_platform_dir
-
def GetDownloadURL(self, revision):
"""Gets the download URL for a build archive of a specific revision."""
- if self.use_recent:
- return "%s/%s%d/%s" % (
- BASE_URL_RECENT, self._listing_platform_dir, revision,
- self.archive_name)
- else:
- return "%s/%s%d/%s" % (
- BASE_URL, self._listing_platform_dir, revision, self.archive_name)
+ return "%s/%s%d/%s" % (
+ BASE_URL, self._listing_platform_dir, revision, self.archive_name)
def GetLastChangeURL(self):
"""Returns a URL to the LAST_CHANGE file."""
@@ -107,12 +98,82 @@ class PathContext(object):
that is used to run the executable."""
return os.path.join(self._archive_extract_dir, self._binary_name)
+ def ParseDirectoryIndex(self):
+ """Parses the Google Storage directory listing into a list of revision
+ numbers. The range starts with self.good_revision and goes until
+ self.bad_revision."""
+
+ def _FetchAndParse(url):
+ """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
+ next-marker is not None, then the listing is a partial listing and another
+ fetch should be performed with next-marker being the marker= GET
+ parameter."""
+ handle = urllib.urlopen(url)
+ document = ElementTree.parse(handle)
+
+ # All nodes in the tree are namespaced. Get the root's tag name to extract
+ # the namespace. Etree does namespaces as |{namespace}tag|.
+ root_tag = document.getroot().tag
+ end_ns_pos = root_tag.find('}')
+ if end_ns_pos == -1:
+ raise Exception("Could not locate end namespace for directory index")
+ namespace = root_tag[:end_ns_pos + 1]
+
+ # Find the prefix (_listing_platform_dir) and whether or not the list is
+ # truncated.
+ prefix_len = len(document.find(namespace + 'Prefix').text)
+ next_marker = None
+ is_truncated = document.find(namespace + 'IsTruncated')
+ if is_truncated is not None and is_truncated.text.lower() == 'true':
+ next_marker = document.find(namespace + 'NextMarker').text
+
+ # Get a list of all the revisions.
+ all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
+ namespace + 'Prefix')
+ # The <Prefix> nodes have content of the form of
+ # |_listing_platform_dir/revision/|. Strip off the platform dir and the
+ # trailing slash to just have a number.
+ revisions = []
+ for prefix in all_prefixes:
+ revnum = prefix.text[prefix_len:-1]
+ try:
+ revnum = int(revnum)
+ revisions.append(revnum)
+ except ValueError:
+ pass
+ return (revisions, next_marker)
+
+ # Fetch the first list of revisions.
+ (revisions, next_marker) = _FetchAndParse(self.GetListingURL())
+
+ # If the result list was truncated, refetch with the next marker. Do this
+ # until an entire directory listing is done.
+ while next_marker:
+ next_url = self.GetListingURL(next_marker)
+ (new_revisions, next_marker) = _FetchAndParse(next_url)
+ revisions.extend(new_revisions)
+
+ return revisions
+
+ def GetRevList(self):
+ """Gets the list of revision numbers between self.good_revision and
+ self.bad_revision."""
+ # Download the revlist and filter for just the range between good and bad.
+ minrev = self.good_revision
+ maxrev = self.bad_revision
+ revlist = map(int, self.ParseDirectoryIndex())
+ revlist = [x for x in revlist if x >= minrev and x <= maxrev]
+ revlist.sort()
+ return revlist
+
def UnzipFilenameToDir(filename, dir):
"""Unzip |filename| to directory |dir|."""
+ cwd = os.getcwd()
+ if not os.path.isabs(filename):
+ filename = os.path.join(cwd, filename)
zf = zipfile.ZipFile(filename)
# Make base.
- pushd = os.getcwd()
try:
if not os.path.isdir(dir):
os.mkdir(dir)
@@ -132,191 +193,201 @@ def UnzipFilenameToDir(filename, dir):
out.close()
# Set permissions. Permission info in external_attr is shifted 16 bits.
os.chmod(name, info.external_attr >> 16L)
- os.chdir(pushd)
+ os.chdir(cwd)
except Exception, e:
print >>sys.stderr, e
sys.exit(1)
-def ParseDirectoryIndex(context):
- """Parses the Google Storage directory listing into a list of revision
- numbers. The range starts with context.good_revision and goes until the latest
- revision."""
- def _FetchAndParse(url):
- """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
- next-marker is not None, then the listing is a partial listing and another
- fetch should be performed with next-marker being the marker= GET
- parameter."""
- handle = urllib.urlopen(url)
- document = ElementTree.parse(handle)
-
- # All nodes in the tree are namespaced. Get the root's tag name to extract
- # the namespace. Etree does namespaces as |{namespace}tag|.
- root_tag = document.getroot().tag
- end_ns_pos = root_tag.find('}')
- if end_ns_pos == -1:
- raise Exception("Could not locate end namespace for directory index")
- namespace = root_tag[:end_ns_pos + 1]
-
- # Find the prefix (_listing_platform_dir) and whether or not the list is
- # truncated.
- prefix_len = len(document.find(namespace + 'Prefix').text)
- next_marker = None
- is_truncated = document.find(namespace + 'IsTruncated')
- if is_truncated is not None and is_truncated.text.lower() == 'true':
- next_marker = document.find(namespace + 'NextMarker').text
-
- # Get a list of all the revisions.
- all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
- namespace + 'Prefix')
- # The <Prefix> nodes have content of the form of
- # |_listing_platform_dir/revision/|. Strip off the platform dir and the
- # trailing slash to just have a number.
- revisions = []
- for prefix in all_prefixes:
- revnum = prefix.text[prefix_len:-1]
- try:
- revnum = int(revnum)
- revisions.append(revnum)
- except ValueError:
- pass
- return (revisions, next_marker)
-
- # Fetch the first list of revisions.
- (revisions, next_marker) = _FetchAndParse(context.GetListingURL())
- # If the result list was truncated, refetch with the next marker. Do this
- # until an entire directory listing is done.
- while next_marker:
- (new_revisions, next_marker) = _FetchAndParse(
- context.GetListingURL(next_marker))
- revisions.extend(new_revisions)
-
- return revisions
-
-
-def ParseDirectoryIndexRecent(context):
- """Parses the recent builds directory listing into a list of revision
- numbers."""
- handle = urllib.urlopen(context.GetListingURLRecent())
- document = handle.read()
-
- # Looking for: <a href="92976/">92976/</a>
- return re.findall(r"<a href=\"(\d+)/\">\1/</a>", document)
-
-
-def FilterRevList(context, revlist):
- """Filter revlist to the revisions between |good_revision| and
- |bad_revision| of the |context|."""
- # Download the revlist and filter for just the range between good and bad.
- rev_range = range(context.good_revision, context.bad_revision)
- revlist = filter(lambda r: r in rev_range, revlist)
- revlist.sort()
- return revlist
-
-
-def TryRevision(context, rev, profile, args):
- """Downloads revision |rev|, unzips it, and opens it for the user to test.
- |profile| is the profile to use."""
- # Do this in a temp dir so we don't collide with user files.
- cwd = os.getcwd()
- tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
- os.chdir(tempdir)
+def FetchRevision(context, rev, filename, quit_event=None):
+ """Downloads and unzips revision |rev|.
+ @param context A PathContext instance.
+ @param rev The Chromium revision number/tag to download.
+ @param filename The destination for the downloaded file.
+ @param quit_event A threading.Event which will be set by the master thread to
+ indicate that the download should be aborted.
+ """
+ def ReportHook(blocknum, blocksize, totalsize):
+ if quit_event and quit_event.is_set():
+ raise RuntimeError("Aborting download of revision %d" % rev)
- # Download the file.
download_url = context.GetDownloadURL(rev)
- def _ReportHook(blocknum, blocksize, totalsize):
- size = blocknum * blocksize
- if totalsize == -1: # Total size not known.
- progress = "Received %d bytes" % size
- else:
- size = min(totalsize, size)
- progress = "Received %d of %d bytes, %.2f%%" % (
- size, totalsize, 100.0 * size / totalsize)
- # Send a \r to let all progress messages use just one line of output.
- sys.stdout.write("\r" + progress)
- sys.stdout.flush()
try:
- print 'Fetching ' + download_url
- urllib.urlretrieve(download_url, context.archive_name, _ReportHook)
- print
- # Throw an exception if the download was less than 1000 bytes.
- if os.path.getsize(context.archive_name) < 1000: raise Exception()
- except Exception, e:
- print('Could not retrieve the download. Sorry.')
- sys.exit(-1)
+ urllib.urlretrieve(download_url, filename, ReportHook)
+ except RuntimeError, e:
+ pass
+
- # Unzip the file.
- print 'Unzipping ...'
- UnzipFilenameToDir(context.archive_name, os.curdir)
+def RunRevision(context, revision, zipfile, profile, args):
+ """Given a zipped revision, unzip it and run the test."""
+ print "Trying revision %d..." % revision
+
+ # Create a temp directory and unzip the revision into it.
+ cwd = os.getcwd()
+ tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
+ UnzipFilenameToDir(zipfile, tempdir)
+ os.chdir(tempdir)
- # Tell the system to open the app.
- args = ['--user-data-dir=%s' % profile] + args
- flags = ' '.join(map(pipes.quote, args))
- cmd = '%s %s' % (context.GetLaunchPath(), flags)
- print 'Running %s' % cmd
- os.system(cmd)
+ # Run the build.
+ testargs = [context.GetLaunchPath(), '--user-data-dir=%s' % profile] + args
+ subproc = subprocess.Popen(testargs,
+ bufsize=-1,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (stdout, stderr) = subproc.communicate()
os.chdir(cwd)
- print 'Cleaning temp dir ...'
try:
shutil.rmtree(tempdir, True)
except Exception, e:
pass
+ return (subproc.returncode, stdout, stderr)
-def AskIsGoodBuild(rev):
+def AskIsGoodBuild(rev, status, stdout, stderr):
"""Ask the user whether build |rev| is good or bad."""
# Loop until we get a response that we can parse.
while True:
- response = raw_input('\nBuild %d is [(g)ood/(b)ad]: ' % int(rev))
+ response = raw_input('\nRevision %d is [(g)ood/(b)ad/(q)uit]: ' % int(rev))
if response and response in ('g', 'b'):
return response == 'g'
+ if response and response == 'q':
+ raise SystemExit()
-
-def Bisect(revlist,
- context,
+def Bisect(platform,
+ good_rev=0,
+ bad_rev=0,
try_args=(),
- profile='profile',
+ profile=None,
predicate=AskIsGoodBuild):
- """Tries to find the exact commit where a regression was introduced by
- running a binary search on all archived builds in a given revision range.
-
- @param revlist A list of chromium revision numbers to check.
- @param context A PathContext object.
- @param try_args A tuple of arguments to pass to the predicate function.
- @param profile The user profile with which to run chromium.
+ """Given known good and known bad revisions, run a binary search on all
+ archived revisions to determine the last known good revision.
+
+ @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).
+ @param good_rev Number/tag of the last known good revision.
+ @param bad_rev Number/tag of the first known bad revision.
+ @param try_args A tuple of arguments to pass to the test application.
+ @param profile The name of the user profile to run with.
@param predicate A predicate function which returns True iff the argument
chromium revision is good.
+
+ Threading is used to fetch Chromium revisions in the background, speeding up
+ the user's experience. For example, suppose the bounds of the search are
+ good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
+ whether revision 50 is good or bad, the next revision to check will be either
+ 25 or 75. So, while revision 50 is being checked, the script will download
+ revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
+ known:
+
+ - If rev 50 is good, the download of rev 25 is cancelled, and the next test
+ is run on rev 75.
+
+ - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
+ is run on rev 25.
"""
- good = 0
- bad = len(revlist) - 1
- last_known_good_rev = revlist[good]
- first_known_bad_rev = revlist[bad]
+ if not profile:
+ profile = 'profile'
- # Binary search time!
- while good < bad:
- candidates = revlist[good:bad]
- num_poss = len(candidates)
- if num_poss > 10:
- print('%d candidates. %d tries left.' %
- (num_poss, round(math.log(num_poss, 2))))
- else:
- print('Candidates: %s' % revlist[good:bad])
+ context = PathContext(platform, good_rev, bad_rev)
+ cwd = os.getcwd()
- # Cut the problem in half...
- test = int((bad - good) / 2) + good
- test_rev = revlist[test]
+ _GetDownloadPath = lambda rev: os.path.join(cwd,
+ '%d-%s' % (rev, context.archive_name))
- # Let the user give this rev a spin (in her own profile, if she wants).
- TryRevision(context, test_rev, profile, try_args)
- if predicate(test_rev):
- last_known_good_rev = test_rev
- good = test + 1
- else:
- bad = test
+ revlist = context.GetRevList()
+
+ # Get a list of revisions to bisect across.
+ if len(revlist) < 2: # Don't have enough builds to bisect.
+ msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist
+ raise RuntimeError(msg)
+
+ # Figure out our bookends and first pivot point; fetch the pivot revision.
+ good = 0
+ bad = len(revlist) - 1
+ pivot = bad / 2
+ rev = revlist[pivot]
+ zipfile = _GetDownloadPath(rev)
+ print "Downloading revision %d..." % rev
+ FetchRevision(context, rev, zipfile)
- return (last_known_good_rev, first_known_bad_rev)
+ # Binary search time!
+ while zipfile and bad - good > 1:
+ # Pre-fetch next two possible pivots
+ # - down_pivot is the next revision to check if the current revision turns
+ # out to be bad.
+ # - up_pivot is the next revision to check if the current revision turns
+ # out to be good.
+ down_pivot = int((pivot - good) / 2) + good
+ down_thread = None
+ if down_pivot != pivot and down_pivot != good:
+ down_rev = revlist[down_pivot]
+ down_zipfile = _GetDownloadPath(down_rev)
+ down_event = threading.Event()
+ fetchargs = (context, down_rev, down_zipfile, down_event)
+ down_thread = threading.Thread(target=FetchRevision,
+ name='down_fetch',
+ args=fetchargs)
+ down_thread.start()
+
+ up_pivot = int((bad - pivot) / 2) + pivot
+ up_thread = None
+ if up_pivot != pivot and up_pivot != bad:
+ up_rev = revlist[up_pivot]
+ up_zipfile = _GetDownloadPath(up_rev)
+ up_event = threading.Event()
+ fetchargs = (context, up_rev, up_zipfile, up_event)
+ up_thread = threading.Thread(target=FetchRevision,
+ name='up_fetch',
+ args=fetchargs)
+ up_thread.start()
+
+ # Run test on the pivot revision.
+ (status, stdout, stderr) = RunRevision(context,
+ rev,
+ zipfile,
+ profile,
+ try_args)
+ os.unlink(zipfile)
+ zipfile = None
+
+ # Call the predicate function to see if the current revision is good or bad.
+ # On that basis, kill one of the background downloads and complete the
+ # other, as described in the comments above.
+ try:
+ if predicate(rev, status, stdout, stderr):
+ good = pivot
+ if down_thread:
+ down_event.set() # Kill the download of older revision.
+ down_thread.join()
+ os.unlink(down_zipfile)
+ if up_thread:
+ print "Downloading revision %d..." % up_rev
+ up_thread.join() # Wait for newer revision to finish downloading.
+ pivot = up_pivot
+ zipfile = up_zipfile
+ else:
+ bad = pivot
+ if up_thread:
+ up_event.set() # Kill download of newer revision.
+ up_thread.join()
+ os.unlink(up_zipfile)
+ if down_thread:
+ print "Downloading revision %d..." % down_rev
+ down_thread.join() # Wait for older revision to finish downloading.
+ pivot = down_pivot
+ zipfile = down_zipfile
+ except SystemExit:
+ for f in [down_zipfile, up_zipfile]:
+ try:
+ os.unlink(f)
+ except OSError:
+ pass
+ sys.exit(0)
+
+ rev = revlist[pivot]
+
+ return (revlist[good], revlist[bad])
def main():
@@ -354,7 +425,7 @@ def main():
return 1
# Create the context. Initialize 0 for the revisions as they are set below.
- context = PathContext(opts.archive, 0, 0, use_recent=False)
+ context = PathContext(opts.archive, 0, 0)
# Pick a starting point, try to get HEAD for this.
if opts.bad:
@@ -384,45 +455,8 @@ def main():
except Exception, e:
pass
- # Set the input parameters now that they've been validated.
- context.good_revision = good_rev
- context.bad_revision = bad_rev
-
- # Get recent revision list and check whether it's sufficient.
- all_revs_recent = map(int, ParseDirectoryIndexRecent(context))
- all_revs_recent.sort()
- # Skipping 0 since it might be deleted off the server soon:
- all_revs_recent = all_revs_recent[1:]
- oldest_recent_rev = all_revs_recent[0]
- if good_rev >= oldest_recent_rev:
- # The range is within recent builds, so switch on use_recent.
- context.use_recent = True
- elif bad_rev >= oldest_recent_rev:
- # The range spans both old and recent builds.
- # If oldest_recent_rev is good, we bisect the recent builds.
- context.use_recent = True
- TryRevision(context, oldest_recent_rev, opts.profile, args)
- if AskIsGoodBuild(oldest_recent_rev):
- # context.use_recent is True
- context.good_revision = oldest_recent_rev
- else:
- context.use_recent = False
- context.bad_revision = oldest_recent_rev
-
- all_revs = []
- if context.use_recent:
- all_revs = all_revs_recent
- else:
- all_revs = map(int, ParseDirectoryIndex(context))
-
- # Filter list of revisions to bisect across.
- revlist = FilterRevList(context, all_revs)
- if len(revlist) < 2: # Don't have enough builds to bisect
- print 'We don\'t have enough builds to bisect. revlist: %s' % revlist
- sys.exit(1)
-
(last_known_good_rev, first_known_bad_rev) = Bisect(
- revlist, context, args, opts.profile)
+ opts.archive, good_rev, bad_rev, args, opts.profile)
# We're done. Let the user know the results in an official manner.
print('You are probably looking for build %d.' % first_known_bad_rev)