summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordtu@chromium.org <dtu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-20 00:02:13 +0000
committerdtu@chromium.org <dtu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-20 00:02:13 +0000
commita035cf61635545d0343b2d441aa8da035625da8b (patch)
treeac8a99a181d2f224d48c530c85d515cef32b2de7
parent863563c4dd481eeacdfc39af64d77567ec938d0d (diff)
downloadchromium_src-a035cf61635545d0343b2d441aa8da035625da8b.zip
chromium_src-a035cf61635545d0343b2d441aa8da035625da8b.tar.gz
chromium_src-a035cf61635545d0343b2d441aa8da035625da8b.tar.bz2
[telemetry] Avoid deleting hash files with cloud_storage script.
lololol Also s/GetHash/CalculateHash, and add ReadHash function. BUG=None. TEST=None. Review URL: https://codereview.chromium.org/196423012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@258165 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--tools/perf/page_sets/PRESUBMIT.py5
-rw-r--r--tools/perf/page_sets/presubmit_unittest.py4
-rwxr-xr-xtools/telemetry/cloud_storage62
-rw-r--r--tools/telemetry/telemetry/page/cloud_storage.py12
-rw-r--r--tools/telemetry/telemetry/page/page_set_archive_info.py2
-rw-r--r--tools/telemetry/telemetry/page/page_set_archive_info_unittest.py2
-rw-r--r--tools/telemetry/telemetry/unittest/system_stub.py6
7 files changed, 49 insertions, 44 deletions
diff --git a/tools/perf/page_sets/PRESUBMIT.py b/tools/perf/page_sets/PRESUBMIT.py
index c5ca146..c352e79 100644
--- a/tools/perf/page_sets/PRESUBMIT.py
+++ b/tools/perf/page_sets/PRESUBMIT.py
@@ -49,8 +49,7 @@ def _GetFilesNotInCloud(input_api):
files = []
for hash_path in hash_paths:
- with open(hash_path, 'rb') as f:
- file_hash = f.read(1024).rstrip()
+ file_hash = cloud_storage.ReadHash(hash_path)
if file_hash not in hashes_in_cloud_storage:
files.append((hash_path, file_hash))
@@ -77,7 +76,7 @@ def _SyncFilesToCloud(input_api, output_api):
results.append(output_api.PresubmitError(
'Hash file exists, but file not found: %s' % hash_path))
continue
- if cloud_storage.GetHash(file_path) != file_hash:
+ if cloud_storage.CalculateHash(file_path) != file_hash:
results.append(output_api.PresubmitError(
'Hash file does not match file\'s actual hash: %s' % hash_path))
continue
diff --git a/tools/perf/page_sets/presubmit_unittest.py b/tools/perf/page_sets/presubmit_unittest.py
index b3d0569..9e3a0a9 100644
--- a/tools/perf/page_sets/presubmit_unittest.py
+++ b/tools/perf/page_sets/presubmit_unittest.py
@@ -59,7 +59,7 @@ class PresubmitTest(unittest.TestCase):
success_file_hash = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
self._stubs = system_stub.Override(
- PRESUBMIT, ['cloud_storage', 'open', 'os', 'raw_input'])
+ PRESUBMIT, ['cloud_storage', 'os', 'raw_input'])
self._stubs.raw_input.input = 'public'
# Files in Cloud Storage.
self._stubs.cloud_storage.remote_paths = [
@@ -75,7 +75,7 @@ class PresubmitTest(unittest.TestCase):
self._stubs.os.path.files = (
self._stubs.cloud_storage.local_file_hashes.keys())
# Local hash files and their contents.
- self._stubs.open.files = {
+ self._stubs.cloud_storage.local_hash_files = {
'/path/to/invalid_hash.wpr.sha1': 'invalid_hash',
'/path/to/missing.wpr.sha1': 'missing'.zfill(40),
'/path/to/success.wpr.sha1': success_file_hash,
diff --git a/tools/telemetry/cloud_storage b/tools/telemetry/cloud_storage
index 947586a..242b461 100755
--- a/tools/telemetry/cloud_storage
+++ b/tools/telemetry/cloud_storage
@@ -12,11 +12,13 @@ from telemetry.core import command_line
from telemetry.page import cloud_storage
-BUCKET_CHOICES = {
+BUCKET_ALIASES = {
'public': cloud_storage.PUBLIC_BUCKET,
'partner': cloud_storage.PARTNER_BUCKET,
'google-only': cloud_storage.INTERNAL_BUCKET,
}
+BUCKETS = {bucket: easy_bucket_name for easy_bucket_name, bucket
+ in BUCKET_ALIASES.iteritems()}
def _GetPaths(path):
@@ -31,33 +33,32 @@ def _GetPaths(path):
def _FindFilesInCloudStorage(files):
+ """Returns a dict of all files and which buckets they're in."""
+ # Preprocessing: get the contents of all buckets.
bucket_contents = {}
- for easy_bucket_name, bucket in BUCKET_CHOICES.iteritems():
+ for bucket in BUCKETS:
try:
- bucket_contents[easy_bucket_name] = cloud_storage.List(bucket)
+ bucket_contents[bucket] = cloud_storage.List(bucket)
except (cloud_storage.PermissionError, cloud_storage.CredentialsError):
pass
+ # Check if each file is in the bucket contents.
file_buckets = {}
for path in files:
file_path, hash_path = _GetPaths(path)
if file_path in file_buckets:
+ # Ignore duplicates, if both data and sha1 file were in the file list.
continue
if not os.path.exists(hash_path):
+ # Probably got some non-Cloud Storage files in the file list. Ignore.
continue
- with open(hash_path, 'rb') as f:
- file_hash = f.read(1024).rstrip()
-
- buckets = []
- for bucket in BUCKET_CHOICES:
- if bucket not in bucket_contents:
- continue
+ file_hash = cloud_storage.ReadHash(hash_path)
+ file_buckets[file_path] = []
+ for bucket in BUCKETS:
if file_hash in bucket_contents[bucket]:
- buckets.append(bucket)
-
- file_buckets[file_path] = buckets
+ file_buckets[file_path].append(bucket)
return file_buckets
@@ -74,10 +75,10 @@ class Ls(command_line.Command):
def ProcessCommandLineArgs(cls, parser, args):
for path in args.paths:
if not os.path.exists(path):
- parser.error('File not found: %s' % path)
+ parser.error('Path not found: %s' % path)
def Run(self, args):
- def GetFilesInPath(paths, recursive):
+ def GetFilesInPaths(paths, recursive):
"""If path is a dir, yields all files in path, otherwise just yields path.
If recursive is true, walks subdirectories recursively."""
@@ -94,7 +95,7 @@ class Ls(command_line.Command):
for filename in os.listdir(path):
yield os.path.join(path, filename)
- files = _FindFilesInCloudStorage(GetFilesInPath(args.paths, args.recursive))
+ files = _FindFilesInCloudStorage(GetFilesInPaths(args.paths, args.recursive))
if not files:
print 'No files in Cloud Storage.'
@@ -102,6 +103,7 @@ class Ls(command_line.Command):
for file_path, buckets in sorted(files.iteritems()):
if buckets:
+ buckets = [BUCKETS[bucket] for bucket in buckets]
print '%-11s %s' % (','.join(buckets), file_path)
else:
print '%-11s %s' % ('not found', file_path)
@@ -113,13 +115,13 @@ class Mv(command_line.Command):
@classmethod
def AddCommandLineArgs(cls, parser):
parser.add_argument('files', nargs='+')
- parser.add_argument('bucket', choices=BUCKET_CHOICES)
+ parser.add_argument('bucket', choices=BUCKET_ALIASES)
@classmethod
def ProcessCommandLineArgs(cls, parser, args):
- args.bucket = BUCKET_CHOICES[args.bucket]
+ args.bucket = BUCKET_ALIASES[args.bucket]
- for path in args.paths:
+ for path in args.files:
_, hash_path = _GetPaths(path)
if not os.path.exists(hash_path):
parser.error('File not found: %s' % hash_path)
@@ -132,12 +134,11 @@ class Mv(command_line.Command):
raise IOError('%s not found in Cloud Storage.' % file_path)
for file_path, buckets in sorted(files.iteritems()):
- hash_path = file_path + '.sha1'
- with open(hash_path, 'rb') as f:
- file_hash = f.read(1024).rstrip()
-
+ # Move to the target bucket.
+ file_hash = cloud_storage.ReadHash(file_path + '.sha1')
cloud_storage.Move(buckets.pop(), args.bucket, file_hash)
+ # Delete all additional copies.
for bucket in buckets:
if bucket == args.bucket:
continue
@@ -153,7 +154,7 @@ class Rm(command_line.Command):
@classmethod
def ProcessCommandLineArgs(cls, parser, args):
- for path in args.paths:
+ for path in args.files:
_, hash_path = _GetPaths(path)
if not os.path.exists(hash_path):
parser.error('File not found: %s' % hash_path)
@@ -161,10 +162,7 @@ class Rm(command_line.Command):
def Run(self, args):
files = _FindFilesInCloudStorage(args.files)
for file_path, buckets in sorted(files.iteritems()):
- hash_path = file_path + '.sha1'
- with open(hash_path, 'rb') as f:
- file_hash = f.read(1024).rstrip()
-
+ file_hash = cloud_storage.ReadHash(file_path + '.sha1')
for bucket in buckets:
cloud_storage.Delete(bucket, file_hash)
@@ -175,19 +173,19 @@ class Upload(command_line.Command):
@classmethod
def AddCommandLineArgs(cls, parser):
parser.add_argument('files', nargs='+')
- parser.add_argument('bucket', choices=BUCKET_CHOICES)
+ parser.add_argument('bucket', choices=BUCKET_ALIASES)
@classmethod
def ProcessCommandLineArgs(cls, parser, args):
- args.bucket = BUCKET_CHOICES[args.bucket]
+ args.bucket = BUCKET_ALIASES[args.bucket]
- for path in args.paths:
+ for path in args.files:
if not os.path.exists(path):
parser.error('File not found: %s' % path)
def Run(self, args):
for file_path in args.files:
- file_hash = cloud_storage.GetHash(file_path)
+ file_hash = cloud_storage.CalculateHash(file_path)
# Create or update the hash file.
hash_path = file_path + '.sha1'
diff --git a/tools/telemetry/telemetry/page/cloud_storage.py b/tools/telemetry/telemetry/page/cloud_storage.py
index 3250e31..4eb0745 100644
--- a/tools/telemetry/telemetry/page/cloud_storage.py
+++ b/tools/telemetry/telemetry/page/cloud_storage.py
@@ -178,9 +178,8 @@ def GetIfChanged(file_path, bucket=None):
if not os.path.exists(hash_path):
return False
- with open(hash_path, 'rb') as f:
- expected_hash = f.read(1024).rstrip()
- if os.path.exists(file_path) and GetHash(file_path) == expected_hash:
+ expected_hash = ReadHash(hash_path)
+ if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
return False
if bucket:
@@ -203,7 +202,7 @@ def GetIfChanged(file_path, bucket=None):
return found
-def GetHash(file_path):
+def CalculateHash(file_path):
"""Calculates and returns the hash of the file at file_path."""
sha1 = hashlib.sha1()
with open(file_path, 'rb') as f:
@@ -214,3 +213,8 @@ def GetHash(file_path):
break
sha1.update(chunk)
return sha1.hexdigest()
+
+
+def ReadHash(hash_path):
+ with open(hash_path, 'rb') as f:
+ return f.read(1024).rstrip()
diff --git a/tools/telemetry/telemetry/page/page_set_archive_info.py b/tools/telemetry/telemetry/page/page_set_archive_info.py
index 0a03efe..d69314d 100644
--- a/tools/telemetry/telemetry/page/page_set_archive_info.py
+++ b/tools/telemetry/telemetry/page/page_set_archive_info.py
@@ -78,7 +78,7 @@ class PageSetArchiveInfo(object):
# Update the hash file.
with open(target_wpr_file_path + '.sha1', 'wb') as f:
- f.write(cloud_storage.GetHash(target_wpr_file_path))
+ f.write(cloud_storage.CalculateHash(target_wpr_file_path))
f.flush()
self._WriteToFile()
diff --git a/tools/telemetry/telemetry/page/page_set_archive_info_unittest.py b/tools/telemetry/telemetry/page/page_set_archive_info_unittest.py
index 2fcd421..740bfbc 100644
--- a/tools/telemetry/telemetry/page/page_set_archive_info_unittest.py
+++ b/tools/telemetry/telemetry/page/page_set_archive_info_unittest.py
@@ -56,7 +56,7 @@ class TestPageSetArchiveInfo(unittest.TestCase):
def assertCorrectHashFile(self, file_path):
self.assertTrue(os.path.exists(file_path + '.sha1'))
with open(file_path + '.sha1', 'rb') as f:
- self.assertEquals(cloud_storage.GetHash(file_path), f.read())
+ self.assertEquals(cloud_storage.CalculateHash(file_path), f.read())
def testReadingArchiveInfo(self):
self.assertIsNotNone(self.archive_info.WprFilePathForPage(page1))
diff --git a/tools/telemetry/telemetry/unittest/system_stub.py b/tools/telemetry/telemetry/unittest/system_stub.py
index 1e9ddb5..36798f9 100644
--- a/tools/telemetry/telemetry/unittest/system_stub.py
+++ b/tools/telemetry/telemetry/unittest/system_stub.py
@@ -106,6 +106,7 @@ class CloudStorageModuleStub(object):
def __init__(self):
self.remote_paths = []
self.local_file_hashes = {}
+ self.local_hash_files = {}
def List(self, _):
return self.remote_paths
@@ -113,9 +114,12 @@ class CloudStorageModuleStub(object):
def Insert(self, bucket, remote_path, local_path):
pass
- def GetHash(self, file_path):
+ def CalculateHash(self, file_path):
return self.local_file_hashes[file_path]
+ def ReadHash(self, hash_path):
+ return self.local_hash_files[hash_path]
+
class OpenFunctionStub(object):
class FileStub(object):