summaryrefslogtreecommitdiffstats
path: root/chrome/common/extensions/docs/server2/gcs_file_system.py
blob: 62c21be73faed035957416a6a90c843c7d79e39c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import json
import logging
import posixpath
import traceback
import urllib

from docs_server_utils import StringIdentity
from environment_wrappers import CreateUrlFetcher
from file_system import FileSystem, FileNotFoundError, StatInfo
from future import Future
from path_util import (
    AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)


# See gcs_file_system_provider.py for documentation on using Google Cloud
# Storage as a filesystem.
#
# Note that the path requirements for GCS are different for the docserver;
# GCS requires that paths start with a /, we require that they don't.


# Name of the file containing the Git hash of the latest commit sync'ed
# to Cloud Storage. This file is generated by the Github->GCS sync script
_LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'


# Base URL for GCS requests.
_STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1'


class CloudStorageFileSystem(FileSystem):
  '''FileSystem implementation which fetches resources from Google Cloud
  Storage.
  '''
  def __init__(self, bucket, debug_bucket_prefix=None):
    self._bucket = bucket
    self._access_token = None
    self._last_commit_hash = None
    AssertIsValid(self._bucket)

  def Read(self, paths, skip_not_found=False):
    def resolve():
      result = {}
      for path in paths:
        if IsDirectory(path):
          result[path] = self._ListDir(path)
        else:
          result[path] = self._ReadFile(path)
      return result

    return Future(callback=resolve)

  def Refresh(self):
    return Future(value=())

  def Stat(self, path):
    AssertIsValid(path)
    return self._CreateStatInfo(path)

  def GetIdentity(self):
    return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))

  def _CreateStatInfo(self, path):
    if not self._last_commit_hash:
      self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME)
    if IsDirectory(path):
      child_versions = dict((filename, self._last_commit_hash)
                            for filename in self._ListDir(path))
    else:
      child_versions = None
    return StatInfo(self._last_commit_hash, child_versions)

  def _ReadFile(self, path):
    AssertIsFile(path)
    return self._FetchObjectData(path)

  def _ListDir(self, path, recursive=False):
    AssertIsDirectory(path)
    # The listbucket method uses a prefix approach to simulate hierarchy.
    # Calling it with the "delimiter" argument set to '/' gets only files
    # directly inside the directory, not all recursive content.

    # Subdirectories are returned in the 'prefixes' property, but they are
    # full paths from the root. This plucks off the name of the leaf with a
    # trailing slash.
    def path_from_prefix(prefix):
      return posixpath.split(posixpath.split(prefix)[0])[1] + '/'

    query = { 'prefix': path }
    if not recursive:
      query['delimiter'] = '/'
    root_object = json.loads(self._FetchObject('', query=query))
    files = [posixpath.basename(o['name'])
             for o in root_object.get('items', [])]
    dirs = [path_from_prefix(prefix)
            for prefix in root_object.get('prefixes', [])]
    return files + dirs

  def _FetchObject(self, path, query={}):
    # Escape the path, including slashes.
    url_path = urllib.quote(path.lstrip('/'), safe='')
    fetcher = CreateUrlFetcher()
    object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path)
    response = fetcher.Fetch(object_url, query=query)
    if response.status_code != 200:
      raise FileNotFoundError(
          'Path %s not found in GCS bucket %s' % (path, self._bucket))
    return response.content

  def _FetchObjectData(self, path, query={}):
    q = query.copy()
    q.update({ 'alt': 'media' })
    return self._FetchObject(path, query=q)

  def __repr__(self):
    return 'CloudStorageFileSystem(%s)' % self._bucket