1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import logging
import posixpath
import traceback
import urllib
from docs_server_utils import StringIdentity
from environment_wrappers import CreateUrlFetcher
from file_system import FileSystem, FileNotFoundError, StatInfo
from future import Future
from path_util import (
AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
# See gcs_file_system_provider.py for documentation on using Google Cloud
# Storage as a filesystem.
#
# Note that the path requirements for GCS are different for the docserver;
# GCS requires that paths start with a /, we require that they don't.
# Name of the file containing the Git hash of the latest commit sync'ed
# to Cloud Storage. This file is generated by the Github->GCS sync script
_LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
# Base URL for GCS requests.
_STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1'
class CloudStorageFileSystem(FileSystem):
'''FileSystem implementation which fetches resources from Google Cloud
Storage.
'''
def __init__(self, bucket, debug_bucket_prefix=None):
self._bucket = bucket
self._access_token = None
self._last_commit_hash = None
AssertIsValid(self._bucket)
def Read(self, paths, skip_not_found=False):
def resolve():
result = {}
for path in paths:
if IsDirectory(path):
result[path] = self._ListDir(path)
else:
result[path] = self._ReadFile(path)
return result
return Future(callback=resolve)
def Refresh(self):
return Future(value=())
def Stat(self, path):
AssertIsValid(path)
return self._CreateStatInfo(path)
def GetIdentity(self):
return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))
def _CreateStatInfo(self, path):
if not self._last_commit_hash:
self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME)
if IsDirectory(path):
child_versions = dict((filename, self._last_commit_hash)
for filename in self._ListDir(path))
else:
child_versions = None
return StatInfo(self._last_commit_hash, child_versions)
def _ReadFile(self, path):
AssertIsFile(path)
return self._FetchObjectData(path)
def _ListDir(self, path, recursive=False):
AssertIsDirectory(path)
# The listbucket method uses a prefix approach to simulate hierarchy.
# Calling it with the "delimiter" argument set to '/' gets only files
# directly inside the directory, not all recursive content.
# Subdirectories are returned in the 'prefixes' property, but they are
# full paths from the root. This plucks off the name of the leaf with a
# trailing slash.
def path_from_prefix(prefix):
return posixpath.split(posixpath.split(prefix)[0])[1] + '/'
query = { 'prefix': path }
if not recursive:
query['delimiter'] = '/'
root_object = json.loads(self._FetchObject('', query=query))
files = [posixpath.basename(o['name'])
for o in root_object.get('items', [])]
dirs = [path_from_prefix(prefix)
for prefix in root_object.get('prefixes', [])]
return files + dirs
def _FetchObject(self, path, query={}):
# Escape the path, including slashes.
url_path = urllib.quote(path.lstrip('/'), safe='')
fetcher = CreateUrlFetcher()
object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path)
response = fetcher.Fetch(object_url, query=query)
if response.status_code != 200:
raise FileNotFoundError(
'Path %s not found in GCS bucket %s' % (path, self._bucket))
return response.content
def _FetchObjectData(self, path, query={}):
q = query.copy()
q.update({ 'alt': 'media' })
return self._FetchObject(path, query=q)
def __repr__(self):
return 'CloudStorageFileSystem(%s)' % self._bucket
|