1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import posixpath
from future import Future
from path_util import SplitParent
from special_paths import SITE_VERIFICATION_FILE
def _Normalize(file_name, splittext=False):
normalized = file_name
if splittext:
normalized = posixpath.splitext(file_name)[0]
normalized = normalized.replace('.', '').replace('-', '').replace('_', '')
return normalized.lower()
def _CommonNormalizedPrefix(first_file, second_file):
return posixpath.commonprefix((_Normalize(first_file),
_Normalize(second_file)))
class PathCanonicalizer(object):
'''Transforms paths into their canonical forms. Since the docserver has had
many incarnations - e.g. there didn't use to be apps/ - there may be old
paths lying around the webs. We try to redirect those to where they are now.
'''
def __init__(self,
file_system,
object_store_creator,
strip_extensions):
# |strip_extensions| is a list of file extensions (e.g. .html) that should
# be stripped for a path's canonical form.
self._cache = object_store_creator.Create(
PathCanonicalizer, category=file_system.GetIdentity())
self._file_system = file_system
self._strip_extensions = strip_extensions
def _LoadCache(self):
cached_future = self._cache.GetMulti(('canonical_paths',
'simplified_paths_map'))
def resolve():
# |canonical_paths| is the pre-calculated set of canonical paths.
# |simplified_paths_map| is a lazily populated mapping of simplified file
# names to a list of full paths that contain them. For example,
# - browseraction: [extensions/browserAction.html]
# - storage: [apps/storage.html, extensions/storage.html]
cached = cached_future.Get()
canonical_paths, simplified_paths_map = (
cached.get('canonical_paths'), cached.get('simplified_paths_map'))
if canonical_paths is None:
assert simplified_paths_map is None
canonical_paths = set()
simplified_paths_map = defaultdict(list)
for base, dirs, files in self._file_system.Walk(''):
for path in dirs + files:
path_without_ext, ext = posixpath.splitext(path)
canonical_path = posixpath.join(base, path_without_ext)
if (ext not in self._strip_extensions or
path == SITE_VERIFICATION_FILE):
canonical_path += ext
canonical_paths.add(canonical_path)
simplified_paths_map[_Normalize(path, splittext=True)].append(
canonical_path)
# Store |simplified_paths_map| sorted. Ties in length are broken by
# taking the shortest, lexicographically smallest path.
for path_list in simplified_paths_map.itervalues():
path_list.sort(key=lambda p: (len(p), p))
self._cache.SetMulti({
'canonical_paths': canonical_paths,
'simplified_paths_map': simplified_paths_map,
})
else:
assert simplified_paths_map is not None
return canonical_paths, simplified_paths_map
return Future(callback=resolve)
def Canonicalize(self, path):
'''Returns the canonical path for |path|.
'''
canonical_paths, simplified_paths_map = self._LoadCache().Get()
# Path may already be the canonical path.
if path in canonical_paths:
return path
# Path not found. Our single heuristic: find |base| in the directory
# structure with the longest common prefix of |path|.
_, base = SplitParent(path)
# Paths with a non-extension dot separator lose information in
# _SimplifyFileName, so we try paths both with and without the dot to
# maximize the possibility of finding the right path.
potential_paths = (
simplified_paths_map.get(_Normalize(base), []) +
simplified_paths_map.get(_Normalize(base, splittext=True), []))
if potential_paths == []:
# There is no file with anything close to that name.
return path
# The most likely canonical file is the one with the longest common prefix
# with |path|. This is slightly weaker than it could be; |path| is
# compared without symbols, not the simplified form of |path|,
# which may matter.
max_prefix = potential_paths[0]
max_prefix_length = len(_CommonNormalizedPrefix(max_prefix, path))
for path_for_file in potential_paths[1:]:
prefix_length = len(_CommonNormalizedPrefix(path_for_file, path))
if prefix_length > max_prefix_length:
max_prefix, max_prefix_length = path_for_file, prefix_length
return max_prefix
def Cron(self):
return self._LoadCache()
|