summaryrefslogtreecommitdiffstats
path: root/chrome/common/extensions/docs/server2/gitiles_file_system.py
blob: ecfd78c9b59c381c0a5447ecdcd897b41af91413 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.


from base64 import b64decode
from itertools import izip
import json
import logging
import posixpath
import time
import traceback

from appengine_url_fetcher import AppEngineUrlFetcher
from appengine_wrappers import IsDownloadError, app_identity
from docs_server_utils import StringIdentity
from environment import IsDevServer
from file_system import (FileNotFoundError,
                         FileSystem,
                         FileSystemError,
                         FileSystemThrottledError,
                         StatInfo)
from future import All, Future
from path_util import AssertIsValid, IsDirectory, ToDirectory
from third_party.json_schema_compiler.memoize import memoize
from url_constants import (GITILES_BASE,
                           GITILES_SRC_ROOT,
                           GITILES_BRANCHES_PATH,
                           GITILES_OAUTH2_SCOPE)


_JSON_FORMAT = '?format=JSON'
_TEXT_FORMAT = '?format=TEXT'
_AUTH_PATH_PREFIX = '/a'


def _ParseGitilesJson(json_data):
  '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON
  data coming from Gitiles views.
  '''
  return json.loads(json_data[json_data.find('{'):])


def _CreateStatInfo(json_data):
  '''Returns a StatInfo object comprised of the tree ID for |json_data|,
  as well as the tree IDs for the entries in |json_data|.
  '''
  tree = _ParseGitilesJson(json_data)
  return StatInfo(tree['id'],
                  dict((e['name'], e['id']) for e in tree['entries']))


class GitilesFileSystem(FileSystem):
  '''Class to fetch filesystem data from the Chromium project's gitiles
  service.
  '''
  _logged_tokens = set()

  @classmethod
  def Create(cls, branch='master', commit=None):
    token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)

    # Log the access token (once per token) so that it can be sneakily re-used
    # in development.
    if token not in cls._logged_tokens:
      logging.info('Got token %s for scope %s' % (token, GITILES_OAUTH2_SCOPE))
      cls._logged_tokens.add(token)

    # Only include forced-auth (/a/) in the Gitiles URL if we have a token and
    # this is not the development server.
    path_prefix = ('' if token is None or IsDevServer()
                      else _AUTH_PATH_PREFIX)
    if commit:
      base_url = '%s%s/%s/%s' % (
          GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit)
    elif branch is 'master':
      base_url = '%s%s/%s/master' % (
          GITILES_BASE, path_prefix, GITILES_SRC_ROOT)
    else:
      base_url = '%s%s/%s/%s/%s' % (
          GITILES_BASE, path_prefix, GITILES_SRC_ROOT,
          GITILES_BRANCHES_PATH, branch)
    return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit)

  def __init__(self, fetcher, base_url, branch, commit):
    self._fetcher = fetcher
    self._base_url = base_url
    self._branch = branch
    self._commit = commit

  def _FetchAsync(self, url):
    '''Convenience wrapper for fetcher.FetchAsync, so callers don't
    need to use posixpath.join.
    '''
    AssertIsValid(url)
    access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
    return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url),
                                    access_token=access_token)

  def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False):
    '''Returns a future to cleanly resolve |fetch_future|.
    '''
    def handle(e):
      if skip_not_found and IsDownloadError(e):
        return None
      exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
      raise exc_type('%s fetching %s for Get from %s: %s' %
          (type(e).__name__, path, self._base_url, traceback.format_exc()))

    def get_content(result):
      if result.status_code == 404:
        if skip_not_found:
          return None
        raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
                                (path, self._base_url))
      if result.status_code == 429:
        logging.warning('Access throttled when fetching %s for Get from %s' %
            (path, self._base_url))
        raise FileSystemThrottledError(
            'Access throttled when fetching %s for Get from %s' %
            (path, self._base_url))
      if result.status_code != 200:
        raise FileSystemError(
            'Got %s when fetching %s for Get from %s, content %s' %
            (result.status_code, path, self._base_url, result.content))
      return result.content

    return fetch_future.Then(get_content, handle)

  def Read(self, paths, skip_not_found=False):
    # Directory content is formatted in JSON in Gitiles as follows:
    #
    #   {
    #     "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID.
    #     "entries": [
    #       {
    #         "mode": 33188,
    #         "type": "blob",
    #           "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID.
    #         "name": ".gitignore"
    #       },
    #       ...
    #     ]
    #   }
    def list_dir(json_data):
      entries = _ParseGitilesJson(json_data).get('entries', [])
      return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries]

    def fixup_url_format(path):
      # By default, Gitiles URLs display resources in HTML. To get resources
      # suitable for our consumption, a '?format=' string must be appended to
      # the URL. The format may be one of 'JSON' or 'TEXT' for directory or
      # text resources, respectively.
      return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT)

    # A list of tuples of the form (path, Future).
    fetches = [(path, self._FetchAsync(fixup_url_format(path)))
               for path in paths]

    def parse_contents(results):
      value = {}
      for path, content in izip(paths, results):
        if content is None:
          continue
        # Gitiles encodes text content in base64 (see
        # http://tools.ietf.org/html/rfc4648 for info about base64).
        value[path] = (list_dir if IsDirectory(path) else b64decode)(content)
      return value

    return All(self._ResolveFetchContent(path, future, skip_not_found)
               for path, future in fetches).Then(parse_contents)

  def Refresh(self):
    return Future(value=())

  @memoize
  def _GetCommitInfo(self, key):
    '''Gets the commit information specified by |key|.

    The JSON view for commit info looks like:
      {
        "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID.
        "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53",   # Tree ID.
        "parents": [
          "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID.
        ],
        "author": {
          "name": "...",
          "email": "...",
          "time": "Tue Aug 12 17:17:21 2014"
        },
        "committer": {
          "name": "...",
          "email": "...",
          "time": "Tue Aug 12 17:18:28 2014"
        },
        "message": "...",
        "tree_diff": [...]
      }
    '''
    # Commit information for a branch is obtained by appending '?format=JSON'
    # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is
    # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves
    # the root directory JSON content, whereas the former serves the branch
    # commit info JSON content.

    access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
    fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT,
                                            access_token=access_token)
    content_future = self._ResolveFetchContent(self._base_url, fetch_future)
    return content_future.Then(lambda json: _ParseGitilesJson(json)[key])

  def GetCommitID(self):
    '''Returns a future that resolves to the commit ID for this branch.
    '''
    return self._GetCommitInfo('commit')

  def GetPreviousCommitID(self):
    '''Returns a future that resolves to the previous commit ID for this branch.
    '''
    return self._GetCommitInfo('parents').Then(lambda parents: parents[0])

  def StatAsync(self, path):
    dir_, filename = posixpath.split(path)
    def stat(content):
      stat_info = _CreateStatInfo(content)
      if stat_info.version is None:
        raise FileSystemError('Failed to find version of dir %s' % dir_)
      if IsDirectory(path):
        return stat_info
      if filename not in stat_info.child_versions:
        raise FileNotFoundError(
            '%s from %s was not in child versions for Stat' % (filename, path))
      return StatInfo(stat_info.child_versions[filename])

    fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT)
    return self._ResolveFetchContent(path, fetch_future).Then(stat)

  def GetIdentity(self):
    if self._branch == 'master':
      # A master FS always carries the same identity even if pinned to a commit.
      str_id = 'master'
    elif self._commit is not None:
      str_id = self._commit
    else:
      str_id = '%s/%s' % (GITILES_BRANCHES_PATH, self._branch)
    return '@'.join((self.__class__.__name__, StringIdentity(
        '%s/%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT, str_id))))

  def GetVersion(self):
    return self._commit