Use threads to download builds in the background.

Review URL: http://codereview.chromium.org/7493016 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@94810 0039d316-1c4b-4281-b951-d872f2087c98
author: szager@google.com <szager@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2011-07-30 01:05:52 +0000
committer: szager@google.com <szager@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2011-07-30 01:05:52 +0000
commit: afe3066f4df920c927640ab917027063e54bec05 (patch)
tree: cfc013dd80d74732d7184979ad8d68c071778733
parent: 5f4f2a73f0a14bc9438525d1d4d85e704ec6d590 (diff)
download: chromium_src-afe3066f4df920c927640ab917027063e54bec05.zip
chromium_src-afe3066f4df920c927640ab917027063e54bec05.tar.gz
chromium_src-afe3066f4df920c927640ab917027063e54bec05.tar.bz2
1 files changed, 240 insertions, 206 deletions
diff --git a/tools/bisect-builds.py b/tools/bisect-builds.py
index 68ceb3c..299aade 100644
--- a/tools/bisect-builds.py
+++ b/tools/bisect-builds.py
@@ -14,7 +14,6 @@ it will ask you whether it is good or bad before continuing the search.
 
 # The root URL for storage.
 BASE_URL = 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'
-BASE_URL_RECENT = 'http://build.chromium.org/f/chromium/snapshots'
 
 # URL to the ViewVC commit page.
 BUILD_VIEWVC_URL = 'http://src.chromium.org/viewvc/chrome?view=rev&revision=%d'
@@ -31,8 +30,10 @@ import os
 import pipes
 import re
 import shutil
+import subprocess
 import sys
 import tempfile
+import threading
 import urllib
 from xml.etree import ElementTree
 import zipfile
@@ -40,13 +41,12 @@ import zipfile
 class PathContext(object):
   """A PathContext is used to carry the information used to construct URLs and
   paths when dealing with the storage server and archives."""
-  def __init__(self, platform, good_revision, bad_revision, use_recent):
+  def __init__(self, platform, good_revision, bad_revision):
     super(PathContext, self).__init__()
     # Store off the input parameters.
     self.platform = platform  # What's passed in to the '-a/--archive' option.
     self.good_revision = good_revision
     self.bad_revision = bad_revision
-    self.use_recent = use_recent
 
     # The name of the ZIP file in a revision directory on the server.
     self.archive_name = None
@@ -74,7 +74,7 @@ class PathContext(object):
       self._archive_extract_dir = 'chrome-win32'
       self._binary_name = 'chrome.exe'
     else:
-      raise Exception("Invalid platform")
+      raise Exception('Invalid platform: %s' % self.platform)
 
   def GetListingURL(self, marker=None):
     """Returns the URL for a directory listing, with an optional marker."""
@@ -84,19 +84,10 @@ class PathContext(object):
     return BASE_URL + '/?delimiter=/&prefix=' + self._listing_platform_dir + \
         marker_param
 
-  def GetListingURLRecent(self):
-    """Returns the URL for a directory listing of recent builds."""
-    return BASE_URL_RECENT + '/' + self._listing_platform_dir
-
   def GetDownloadURL(self, revision):
     """Gets the download URL for a build archive of a specific revision."""
-    if self.use_recent:
-      return "%s/%s%d/%s" % (
-          BASE_URL_RECENT, self._listing_platform_dir, revision,
-          self.archive_name)
-    else:
-      return "%s/%s%d/%s" % (
-          BASE_URL, self._listing_platform_dir, revision, self.archive_name)
+    return "%s/%s%d/%s" % (
+        BASE_URL, self._listing_platform_dir, revision, self.archive_name)
 
   def GetLastChangeURL(self):
     """Returns a URL to the LAST_CHANGE file."""
@@ -107,12 +98,82 @@ class PathContext(object):
     that is used to run the executable."""
     return os.path.join(self._archive_extract_dir, self._binary_name)
 
+  def ParseDirectoryIndex(self):
+    """Parses the Google Storage directory listing into a list of revision
+    numbers. The range starts with self.good_revision and goes until
+    self.bad_revision."""
+
+    def _FetchAndParse(url):
+      """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
+      next-marker is not None, then the listing is a partial listing and another
+      fetch should be performed with next-marker being the marker= GET
+      parameter."""
+      handle = urllib.urlopen(url)
+      document = ElementTree.parse(handle)
+
+      # All nodes in the tree are namespaced. Get the root's tag name to extract
+      # the namespace. Etree does namespaces as |{namespace}tag|.
+      root_tag = document.getroot().tag
+      end_ns_pos = root_tag.find('}')
+      if end_ns_pos == -1:
+        raise Exception("Could not locate end namespace for directory index")
+      namespace = root_tag[:end_ns_pos + 1]
+
+      # Find the prefix (_listing_platform_dir) and whether or not the list is
+      # truncated.
+      prefix_len = len(document.find(namespace + 'Prefix').text)
+      next_marker = None
+      is_truncated = document.find(namespace + 'IsTruncated')
+      if is_truncated is not None and is_truncated.text.lower() == 'true':
+        next_marker = document.find(namespace + 'NextMarker').text
+
+      # Get a list of all the revisions.
+      all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
+                                      namespace + 'Prefix')
+      # The <Prefix> nodes have content of the form of
+      # |_listing_platform_dir/revision/|. Strip off the platform dir and the
+      # trailing slash to just have a number.
+      revisions = []
+      for prefix in all_prefixes:
+        revnum = prefix.text[prefix_len:-1]
+        try:
+          revnum = int(revnum)
+          revisions.append(revnum)
+        except ValueError:
+          pass
+      return (revisions, next_marker)
+
+    # Fetch the first list of revisions.
+    (revisions, next_marker) = _FetchAndParse(self.GetListingURL())
+
+    # If the result list was truncated, refetch with the next marker. Do this
+    # until an entire directory listing is done.
+    while next_marker:
+      next_url = self.GetListingURL(next_marker)
+      (new_revisions, next_marker) = _FetchAndParse(next_url)
+      revisions.extend(new_revisions)
+
+    return revisions
+
+  def GetRevList(self):
+    """Gets the list of revision numbers between self.good_revision and
+    self.bad_revision."""
+    # Download the revlist and filter for just the range between good and bad.
+    minrev = self.good_revision
+    maxrev = self.bad_revision
+    revlist = map(int, self.ParseDirectoryIndex())
+    revlist = [x for x in revlist if x >= minrev and x <= maxrev]
+    revlist.sort()
+    return revlist
+
 
 def UnzipFilenameToDir(filename, dir):
   """Unzip |filename| to directory |dir|."""
+  cwd = os.getcwd()
+  if not os.path.isabs(filename):
+    filename = os.path.join(cwd, filename)
   zf = zipfile.ZipFile(filename)
   # Make base.
-  pushd = os.getcwd()
   try:
     if not os.path.isdir(dir):
       os.mkdir(dir)
@@ -132,191 +193,201 @@ def UnzipFilenameToDir(filename, dir):
         out.close()
       # Set permissions. Permission info in external_attr is shifted 16 bits.
       os.chmod(name, info.external_attr >> 16L)
-    os.chdir(pushd)
+    os.chdir(cwd)
   except Exception, e:
     print >>sys.stderr, e
     sys.exit(1)
 
 
-def ParseDirectoryIndex(context):
-  """Parses the Google Storage directory listing into a list of revision
-  numbers. The range starts with context.good_revision and goes until the latest
-  revision."""
-  def _FetchAndParse(url):
-    """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
-    next-marker is not None, then the listing is a partial listing and another
-    fetch should be performed with next-marker being the marker= GET
-    parameter."""
-    handle = urllib.urlopen(url)
-    document = ElementTree.parse(handle)
-
-    # All nodes in the tree are namespaced. Get the root's tag name to extract
-    # the namespace. Etree does namespaces as |{namespace}tag|.
-    root_tag = document.getroot().tag
-    end_ns_pos = root_tag.find('}')
-    if end_ns_pos == -1:
-      raise Exception("Could not locate end namespace for directory index")
-    namespace = root_tag[:end_ns_pos + 1]
-
-    # Find the prefix (_listing_platform_dir) and whether or not the list is
-    # truncated.
-    prefix_len = len(document.find(namespace + 'Prefix').text)
-    next_marker = None
-    is_truncated = document.find(namespace + 'IsTruncated')
-    if is_truncated is not None and is_truncated.text.lower() == 'true':
-      next_marker = document.find(namespace + 'NextMarker').text
-
-    # Get a list of all the revisions.
-    all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
-                                    namespace + 'Prefix')
-    # The <Prefix> nodes have content of the form of
-    # |_listing_platform_dir/revision/|. Strip off the platform dir and the
-    # trailing slash to just have a number.
-    revisions = []
-    for prefix in all_prefixes:
-      revnum = prefix.text[prefix_len:-1]
-      try:
-        revnum = int(revnum)
-        revisions.append(revnum)
-      except ValueError:
-        pass
-    return (revisions, next_marker)
-
-  # Fetch the first list of revisions.
-  (revisions, next_marker) = _FetchAndParse(context.GetListingURL())
-  # If the result list was truncated, refetch with the next marker. Do this
-  # until an entire directory listing is done.
-  while next_marker:
-    (new_revisions, next_marker) = _FetchAndParse(
-        context.GetListingURL(next_marker))
-    revisions.extend(new_revisions)
-
-  return revisions
-
-
-def ParseDirectoryIndexRecent(context):
-  """Parses the recent builds directory listing into a list of revision
-  numbers."""
-  handle = urllib.urlopen(context.GetListingURLRecent())
-  document = handle.read()
-
-  # Looking for: <a href="92976/">92976/</a>
-  return re.findall(r"<a href=\"(\d+)/\">\1/</a>", document)
-
-
-def FilterRevList(context, revlist):
-  """Filter revlist to the revisions between |good_revision| and
-  |bad_revision| of the |context|."""
-  # Download the revlist and filter for just the range between good and bad.
-  rev_range = range(context.good_revision, context.bad_revision)
-  revlist = filter(lambda r: r in rev_range, revlist)
-  revlist.sort()
-  return revlist
-
-
-def TryRevision(context, rev, profile, args):
-  """Downloads revision |rev|, unzips it, and opens it for the user to test.
-  |profile| is the profile to use."""
-  # Do this in a temp dir so we don't collide with user files.
-  cwd = os.getcwd()
-  tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
-  os.chdir(tempdir)
+def FetchRevision(context, rev, filename, quit_event=None):
+  """Downloads and unzips revision |rev|.
+  @param context A PathContext instance.
+  @param rev The Chromium revision number/tag to download.
+  @param filename The destination for the downloaded file.
+  @param quit_event A threading.Event which will be set by the master thread to
+                    indicate that the download should be aborted.
+  """
+  def ReportHook(blocknum, blocksize, totalsize):
+    if quit_event and quit_event.is_set():
+     raise RuntimeError("Aborting download of revision %d" % rev)
 
-  # Download the file.
   download_url = context.GetDownloadURL(rev)
-  def _ReportHook(blocknum, blocksize, totalsize):
-    size = blocknum * blocksize
-    if totalsize == -1:  # Total size not known.
-      progress = "Received %d bytes" % size
-    else:
-      size = min(totalsize, size)
-      progress = "Received %d of %d bytes, %.2f%%" % (
-          size, totalsize, 100.0 * size / totalsize)
-    # Send a \r to let all progress messages use just one line of output.
-    sys.stdout.write("\r" + progress)
-    sys.stdout.flush()
   try:
-    print 'Fetching ' + download_url
-    urllib.urlretrieve(download_url, context.archive_name, _ReportHook)
-    print
-    # Throw an exception if the download was less than 1000 bytes.
-    if os.path.getsize(context.archive_name) < 1000: raise Exception()
-  except Exception, e:
-    print('Could not retrieve the download. Sorry.')
-    sys.exit(-1)
+    urllib.urlretrieve(download_url, filename, ReportHook)
+  except RuntimeError, e:
+    pass
+
 
-  # Unzip the file.
-  print 'Unzipping ...'
-  UnzipFilenameToDir(context.archive_name, os.curdir)
+def RunRevision(context, revision, zipfile, profile, args):
+  """Given a zipped revision, unzip it and run the test."""
+  print "Trying revision %d..." % revision
+
+  # Create a temp directory and unzip the revision into it.
+  cwd = os.getcwd()
+  tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
+  UnzipFilenameToDir(zipfile, tempdir)
+  os.chdir(tempdir)
 
-  # Tell the system to open the app.
-  args = ['--user-data-dir=%s' % profile] + args
-  flags = ' '.join(map(pipes.quote, args))
-  cmd = '%s %s' % (context.GetLaunchPath(), flags)
-  print 'Running %s' % cmd
-  os.system(cmd)
+  # Run the build.
+  testargs = [context.GetLaunchPath(), '--user-data-dir=%s' % profile] + args
+  subproc = subprocess.Popen(testargs,
+                             bufsize=-1,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+  (stdout, stderr) = subproc.communicate()
 
   os.chdir(cwd)
-  print 'Cleaning temp dir ...'
   try:
     shutil.rmtree(tempdir, True)
   except Exception, e:
     pass
 
+  return (subproc.returncode, stdout, stderr)
 
-def AskIsGoodBuild(rev):
+def AskIsGoodBuild(rev, status, stdout, stderr):
   """Ask the user whether build |rev| is good or bad."""
   # Loop until we get a response that we can parse.
   while True:
-    response = raw_input('\nBuild %d is [(g)ood/(b)ad]: ' % int(rev))
+    response = raw_input('\nRevision %d is [(g)ood/(b)ad/(q)uit]: ' % int(rev))
     if response and response in ('g', 'b'):
       return response == 'g'
+    if response and response == 'q':
+      raise SystemExit()
 
-
-def Bisect(revlist,
-           context,
+def Bisect(platform,
+           good_rev=0,
+           bad_rev=0,
            try_args=(),
-           profile='profile',
+           profile=None,
            predicate=AskIsGoodBuild):
-  """Tries to find the exact commit where a regression was introduced by
-  running a binary search on all archived builds in a given revision range.
-
-  @param revlist A list of chromium revision numbers to check.
-  @param context A PathContext object.
-  @param try_args A tuple of arguments to pass to the predicate function.
-  @param profile The user profile with which to run chromium.
+  """Given known good and known bad revisions, run a binary search on all
+  archived revisions to determine the last known good revision.
+
+  @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).
+  @param good_rev Number/tag of the last known good revision.
+  @param bad_rev Number/tag of the first known bad revision.
+  @param try_args A tuple of arguments to pass to the test application.
+  @param profile The name of the user profile to run with.
   @param predicate A predicate function which returns True iff the argument
                    chromium revision is good.
+
+  Threading is used to fetch Chromium revisions in the background, speeding up
+  the user's experience. For example, suppose the bounds of the search are
+  good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
+  whether revision 50 is good or bad, the next revision to check will be either
+  25 or 75. So, while revision 50 is being checked, the script will download
+  revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
+  known:
+
+    - If rev 50 is good, the download of rev 25 is cancelled, and the next test
+      is run on rev 75.
+
+    - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
+      is run on rev 25.
   """
 
-  good = 0
-  bad = len(revlist) - 1
-  last_known_good_rev = revlist[good]
-  first_known_bad_rev = revlist[bad]
+  if not profile:
+    profile = 'profile'
 
-  # Binary search time!
-  while good < bad:
-    candidates = revlist[good:bad]
-    num_poss = len(candidates)
-    if num_poss > 10:
-      print('%d candidates. %d tries left.' %
-          (num_poss, round(math.log(num_poss, 2))))
-    else:
-      print('Candidates: %s' % revlist[good:bad])
+  context = PathContext(platform, good_rev, bad_rev)
+  cwd = os.getcwd()
 
-    # Cut the problem in half...
-    test = int((bad - good) / 2) + good
-    test_rev = revlist[test]
+  _GetDownloadPath = lambda rev: os.path.join(cwd,
+      '%d-%s' % (rev, context.archive_name))
 
-    # Let the user give this rev a spin (in her own profile, if she wants).
-    TryRevision(context, test_rev, profile, try_args)
-    if predicate(test_rev):
-      last_known_good_rev = test_rev
-      good = test + 1
-    else:
-      bad = test
+  revlist = context.GetRevList()
+
+  # Get a list of revisions to bisect across.
+  if len(revlist) < 2:  # Don't have enough builds to bisect.
+    msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist
+    raise RuntimeError(msg)
+
+  # Figure out our bookends and first pivot point; fetch the pivot revision.
+  good = 0
+  bad = len(revlist) - 1
+  pivot = bad / 2
+  rev = revlist[pivot]
+  zipfile = _GetDownloadPath(rev)
+  print "Downloading revision %d..." % rev
+  FetchRevision(context, rev, zipfile)
 
-  return (last_known_good_rev, first_known_bad_rev)
+  # Binary search time!
+  while zipfile and bad - good > 1:
+    # Pre-fetch next two possible pivots
+    #   - down_pivot is the next revision to check if the current revision turns
+    #     out to be bad.
+    #   - up_pivot is the next revision to check if the current revision turns
+    #     out to be good.
+    down_pivot = int((pivot - good) / 2) + good
+    down_thread = None
+    if down_pivot != pivot and down_pivot != good:
+      down_rev = revlist[down_pivot]
+      down_zipfile = _GetDownloadPath(down_rev)
+      down_event = threading.Event()
+      fetchargs = (context, down_rev, down_zipfile, down_event)
+      down_thread = threading.Thread(target=FetchRevision,
+                                     name='down_fetch',
+                                     args=fetchargs)
+      down_thread.start()
+
+    up_pivot = int((bad - pivot) / 2) + pivot
+    up_thread = None
+    if up_pivot != pivot and up_pivot != bad:
+      up_rev = revlist[up_pivot]
+      up_zipfile = _GetDownloadPath(up_rev)
+      up_event = threading.Event()
+      fetchargs = (context, up_rev, up_zipfile, up_event)
+      up_thread = threading.Thread(target=FetchRevision,
+                                   name='up_fetch',
+                                   args=fetchargs)
+      up_thread.start()
+
+    # Run test on the pivot revision.
+    (status, stdout, stderr) = RunRevision(context,
+                                           rev,
+                                           zipfile,
+                                           profile,
+                                           try_args)
+    os.unlink(zipfile)
+    zipfile = None
+
+    # Call the predicate function to see if the current revision is good or bad.
+    # On that basis, kill one of the background downloads and complete the
+    # other, as described in the comments above.
+    try:
+      if predicate(rev, status, stdout, stderr):
+        good = pivot
+        if down_thread:
+          down_event.set()  # Kill the download of older revision.
+          down_thread.join()
+          os.unlink(down_zipfile)
+        if up_thread:
+          print "Downloading revision %d..." % up_rev
+          up_thread.join()  # Wait for newer revision to finish downloading.
+          pivot = up_pivot
+          zipfile = up_zipfile
+      else:
+        bad = pivot
+        if up_thread:
+          up_event.set()  # Kill download of newer revision.
+          up_thread.join()
+          os.unlink(up_zipfile)
+        if down_thread:
+          print "Downloading revision %d..." % down_rev
+          down_thread.join()  # Wait for older revision to finish downloading.
+          pivot = down_pivot
+          zipfile = down_zipfile
+    except SystemExit:
+      for f in [down_zipfile, up_zipfile]:
+        try:
+          os.unlink(f)
+        except OSError:
+          pass
+      sys.exit(0)
+
+    rev = revlist[pivot]
+
+  return (revlist[good], revlist[bad])
 
 
 def main():
@@ -354,7 +425,7 @@ def main():
     return 1
 
   # Create the context. Initialize 0 for the revisions as they are set below.
-  context = PathContext(opts.archive, 0, 0, use_recent=False)
+  context = PathContext(opts.archive, 0, 0)
 
   # Pick a starting point, try to get HEAD for this.
   if opts.bad:
@@ -384,45 +455,8 @@ def main():
     except Exception, e:
       pass
 
-  # Set the input parameters now that they've been validated.
-  context.good_revision = good_rev
-  context.bad_revision = bad_rev
-
-  # Get recent revision list and check whether it's sufficient.
-  all_revs_recent = map(int, ParseDirectoryIndexRecent(context))
-  all_revs_recent.sort()
-  # Skipping 0 since it might be deleted off the server soon:
-  all_revs_recent = all_revs_recent[1:]
-  oldest_recent_rev = all_revs_recent[0]
-  if good_rev >= oldest_recent_rev:
-    # The range is within recent builds, so switch on use_recent.
-    context.use_recent = True
-  elif bad_rev >= oldest_recent_rev:
-    # The range spans both old and recent builds.
-    # If oldest_recent_rev is good, we bisect the recent builds.
-    context.use_recent = True
-    TryRevision(context, oldest_recent_rev, opts.profile, args)
-    if AskIsGoodBuild(oldest_recent_rev):
-      # context.use_recent is True
-      context.good_revision = oldest_recent_rev
-    else:
-      context.use_recent = False
-      context.bad_revision = oldest_recent_rev
-
-  all_revs = []
-  if context.use_recent:
-    all_revs = all_revs_recent
-  else:
-    all_revs = map(int, ParseDirectoryIndex(context))
-
-  # Filter list of revisions to bisect across.
-  revlist = FilterRevList(context, all_revs)
-  if len(revlist) < 2:  # Don't have enough builds to bisect
-    print 'We don\'t have enough builds to bisect. revlist: %s' % revlist
-    sys.exit(1)
-
   (last_known_good_rev, first_known_bad_rev) = Bisect(
-      revlist, context, args, opts.profile)
+      opts.archive, good_rev, bad_rev, args, opts.profile)
 
   # We're done. Let the user know the results in an official manner.
   print('You are probably looking for build %d.' % first_known_bad_rev)
author	szager@google.com <szager@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2011-07-30 01:05:52 +0000
committer	szager@google.com <szager@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2011-07-30 01:05:52 +0000
commit	afe3066f4df920c927640ab917027063e54bec05 (patch)
tree	cfc013dd80d74732d7184979ad8d68c071778733
parent	5f4f2a73f0a14bc9438525d1d4d85e704ec6d590 (diff)
download	chromium_src-afe3066f4df920c927640ab917027063e54bec05.zip chromium_src-afe3066f4df920c927640ab917027063e54bec05.tar.gz chromium_src-afe3066f4df920c927640ab917027063e54bec05.tar.bz2