# Copyright 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import json import logging import posixpath import traceback import urllib from docs_server_utils import StringIdentity from environment_wrappers import CreateUrlFetcher from file_system import FileSystem, FileNotFoundError, StatInfo from future import Future from path_util import ( AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join) # See gcs_file_system_provider.py for documentation on using Google Cloud # Storage as a filesystem. # # Note that the path requirements for GCS are different for the docserver; # GCS requires that paths start with a /, we require that they don't. # Name of the file containing the Git hash of the latest commit sync'ed # to Cloud Storage. This file is generated by the Github->GCS sync script _LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt' # Base URL for GCS requests. _STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1' class CloudStorageFileSystem(FileSystem): '''FileSystem implementation which fetches resources from Google Cloud Storage. ''' def __init__(self, bucket, debug_bucket_prefix=None): self._bucket = bucket self._access_token = None self._last_commit_hash = None AssertIsValid(self._bucket) def Read(self, paths, skip_not_found=False): def resolve(): result = {} for path in paths: if IsDirectory(path): result[path] = self._ListDir(path) else: result[path] = self._ReadFile(path) return result return Future(callback=resolve) def Refresh(self): return Future(value=()) def Stat(self, path): AssertIsValid(path) return self._CreateStatInfo(path) def GetIdentity(self): return '@'.join((self.__class__.__name__, StringIdentity(self._bucket))) def _CreateStatInfo(self, path): if not self._last_commit_hash: self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME) if IsDirectory(path): child_versions = dict((filename, self._last_commit_hash) for filename in self._ListDir(path)) else: child_versions = None return StatInfo(self._last_commit_hash, child_versions) def _ReadFile(self, path): AssertIsFile(path) return self._FetchObjectData(path) def _ListDir(self, path, recursive=False): AssertIsDirectory(path) # The listbucket method uses a prefix approach to simulate hierarchy. # Calling it with the "delimiter" argument set to '/' gets only files # directly inside the directory, not all recursive content. # Subdirectories are returned in the 'prefixes' property, but they are # full paths from the root. This plucks off the name of the leaf with a # trailing slash. def path_from_prefix(prefix): return posixpath.split(posixpath.split(prefix)[0])[1] + '/' query = { 'prefix': path } if not recursive: query['delimiter'] = '/' root_object = json.loads(self._FetchObject('', query=query)) files = [posixpath.basename(o['name']) for o in root_object.get('items', [])] dirs = [path_from_prefix(prefix) for prefix in root_object.get('prefixes', [])] return files + dirs def _FetchObject(self, path, query={}): # Escape the path, including slashes. url_path = urllib.quote(path.lstrip('/'), safe='') fetcher = CreateUrlFetcher() object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path) response = fetcher.Fetch(object_url, query=query) if response.status_code != 200: raise FileNotFoundError( 'Path %s not found in GCS bucket %s' % (path, self._bucket)) return response.content def _FetchObjectData(self, path, query={}): q = query.copy() q.update({ 'alt': 'media' }) return self._FetchObject(path, query=q) def __repr__(self): return 'CloudStorageFileSystem(%s)' % self._bucket