1 files changed, 124 insertions, 42 deletions
diff --git a/tools/isolate/isolate.py b/tools/isolate/isolate.py
index 8a02b76..8e28d11 100755
--- a/tools/isolate/isolate.py
+++ b/tools/isolate/isolate.py
@@ -21,6 +21,8 @@ import re
 import stat
 import subprocess
 import sys
+import urllib
+import urllib2
 
 import trace_inputs
 import run_test_from_archive
@@ -43,6 +45,9 @@ KEY_UNTRACKED = 'isolate_dependency_untracked'
 _GIT_PATH = os.path.sep + '.git'
 _SVN_PATH = os.path.sep + '.svn'
 
+# The maximum number of upload attempts to try when uploading a single file.
+MAX_UPLOAD_ATTEMPTS = 5
+
 
 class ExecutionError(Exception):
   """A generic error occurred."""
@@ -175,7 +180,7 @@ def expand_directories_and_symlinks(indir, infiles, blacklist):
   """Expands the directories and the symlinks, applies the blacklist and
   verifies files exist.
 
-  Files are specified in os native path separatro.
+  Files are specified in os native path separator.
   """
   outfiles = []
   for relfile in infiles:
@@ -240,6 +245,84 @@ def recreate_tree(outdir, indir, infiles, action, as_sha1):
       run_test_from_archive.link_file(outfile, infile, action)
 
 
+def upload_hash_content(url, params, hash_content):
+  """Uploads the given hash contents.
+
+  Arguments:
+    url: The url to upload the hash contents to.
+    params: The params to include with the upload.
+    hash_contents: The contents to upload.
+  """
+  url = url + '?' + urllib.urlencode(params)
+  request = urllib2.Request(url, data=hash_content)
+  request.add_header('Content-Type', 'application/octet-stream')
+  request.add_header('Content-Length', len(hash_content or ''))
+
+  return urllib2.urlopen(request)
+
+
+def upload_sha1_tree(base_url, indir, infiles):
+  """Upload the given tree to the given url.
+
+  Arguments:
+    base_url: The base url, it is assume that |base_url|/has/ can be used to
+              query if an element was already uploaded, and |base_url|/store/
+              can be used to upload a new element.
+    indir:    Root directory the infiles are based in.
+    infiles:  dict of files to map from |indir| to |outdir|.
+  """
+  logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
+               (base_url, indir, len(infiles)))
+
+  contains_hash_url = base_url.rstrip('/') + '/content/contains'
+  upload_hash_url = base_url.rstrip('/') + '/content/store'
+
+  # TODO(csharp): Get this code to upload in parallel, similiar to how
+  # run_test_from_achive.py downloads in parallel (should probably reuse
+  # that code here, reworking if necessary).
+  for relfile, metadata in infiles.iteritems():
+    if 'link' in metadata:
+      # Skip links when uploading.
+      continue
+
+    try:
+      response = urllib2.urlopen(contains_hash_url + '?' + urllib.urlencode(
+          {'hash_key': metadata['sha-1']}))
+      if response.read() == 'True':
+        logging.debug('Hash key, %s, already exists on the server, no need to '
+                      'upload again', metadata['sha-1'])
+        continue
+    except urllib2.URLError:
+      # If we encounter any error checking if the file is already on the server,
+      # assume it isn't present.
+      pass
+
+    if metadata.get('touched_only') == True:
+      hash_data = ''
+    else:
+      infile = os.path.join(indir, relfile)
+      with open(infile, 'rb') as f:
+        hash_data = f.read()
+
+    response = None
+    for attempt in range(MAX_UPLOAD_ATTEMPTS):
+      try:
+        logging.debug('Attempting to upload hash key, %s, to %s',
+                      metadata['sha-1'], upload_hash_url)
+
+        response = upload_hash_content(upload_hash_url,
+                                       {'hash_key': metadata['sha-1']},
+                                       hash_data)
+        break
+      except urllib2.URLError:
+        logging.error('Failed to upload hash key, %s, on attempt %d.',
+                      metadata['sha-1'], attempt + 1)
+
+    if not response:
+      raise run_test_from_archive.MappingError('Unable to upload hash key, %s.',
+                                               metadata['sha-1'])
+
+
 def process_input(filepath, prevdict, level, read_only):
   """Processes an input file, a dependency, and return meta data about it.
 
@@ -1394,46 +1477,45 @@ def CMDhashtable(args):
   parser = OptionParserIsolate(command='hashtable')
   options, _ = parser.parse_args(args)
 
-  success = False
-  try:
-    complete_state = load_complete_state(options, WITH_HASH)
-    options.outdir = (
-        options.outdir or os.path.join(complete_state.resultdir, 'hashtable'))
-    recreate_tree(
-        outdir=options.outdir,
-        indir=complete_state.root_dir,
-        infiles=complete_state.result.files,
-        action=run_test_from_archive.HARDLINK,
-        as_sha1=True)
-
-    complete_state.save_files()
-
-    # Also archive the .result file.
-    with open(complete_state.result_file, 'rb') as f:
-      result_hash = hashlib.sha1(f.read()).hexdigest()
-    logging.info(
-        '%s -> %s' %
-        (os.path.basename(complete_state.result_file), result_hash))
-    outfile = os.path.join(options.outdir, result_hash)
-    if os.path.isfile(outfile):
-      # Just do a quick check that the file size matches. If they do, skip the
-      # archive. This mean the build result didn't change at all.
-      out_size = os.stat(outfile).st_size
-      in_size = os.stat(complete_state.result_file).st_size
-      if in_size == out_size:
-        success = True
-        return 0
-
-    run_test_from_archive.link_file(
-        outfile, complete_state.result_file,
-        run_test_from_archive.HARDLINK)
-    success = True
-    return 0
-  finally:
-    # If the command failed, delete the .results file if it exists. This is
-    # important so no stale swarm job is executed.
-    if not success and os.path.isfile(options.result):
-      os.remove(options.result)
+  with run_test_from_archive.Profiler('GenerateHashtable'):
+    success = False
+    try:
+      complete_state = load_complete_state(options, WITH_HASH)
+      options.outdir = (
+          options.outdir or os.path.join(complete_state.resultdir, 'hashtable'))
+      # Make sure that complete_state isn't modified until save_files() is
+      # called, because any changes made to it here will propagate to the files
+      # created (which is probably not intended).
+      complete_state.save_files()
+
+      logging.info('Creating content addressed object store with %d item',
+                   len(complete_state.result.files))
+
+      with open(complete_state.result_file, 'rb') as f:
+        manifest_hash = hashlib.sha1(f.read()).hexdigest()
+      manifest_metadata = {'sha-1': manifest_hash}
+
+      infiles = complete_state.result.files
+      infiles[complete_state.result_file] = manifest_metadata
+
+      if re.match(r'^https?://.+$', options.outdir):
+        upload_sha1_tree(
+            base_url=options.outdir,
+            indir=complete_state.root_dir,
+            infiles=infiles)
+      else:
+        recreate_tree(
+            outdir=options.outdir,
+            indir=complete_state.root_dir,
+            infiles=infiles,
+            action=run_test_from_archive.HARDLINK,
+            as_sha1=True)
+      success = True
+    finally:
+      # If the command failed, delete the .results file if it exists. This is
+      # important so no stale swarm job is executed.
+      if not success and os.path.isfile(options.result):
+        os.remove(options.result)
 
 
 def CMDnoop(args):
@@ -1668,7 +1750,7 @@ class OptionParserIsolate(trace_inputs.OptionParserWithNiceDescription):
           os.path.abspath(
               options.isolate.replace('/', os.path.sep)))
 
-    if options.outdir:
+    if options.outdir and not re.match(r'^https?://.+$', options.outdir):
       options.outdir = os.path.abspath(
           options.outdir.replace('/', os.path.sep))