1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/string_piece.h"
#include "base/string_util.h"
#include "chrome/common/extensions/url_pattern.h"
// TODO(aa): Consider adding chrome-extension? What about more obscure ones
// like data: and javascript: ?
static const char* kValidSchemes[] = {
"http",
"https",
"file",
"ftp",
"chrome-ui"
};
static const char kSchemeSeparator[] = "://";
static const char kPathSeparator[] = "/";
static bool IsValidScheme(const std::string& scheme) {
for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
if (scheme == kValidSchemes[i])
return true;
}
return false;
}
bool URLPattern::Parse(const std::string& pattern) {
size_t scheme_end_pos = pattern.find(kSchemeSeparator);
if (scheme_end_pos == std::string::npos)
return false;
scheme_ = pattern.substr(0, scheme_end_pos);
if (!IsValidScheme(scheme_))
return false;
size_t host_start_pos = scheme_end_pos + strlen(kSchemeSeparator);
if (host_start_pos >= pattern.length())
return false;
// Parse out the host and path.
size_t path_start_pos = 0;
// File URLs are special because they have no host. There are other schemes
// with the same structure, but we don't support them (yet).
if (scheme_ == "file") {
path_start_pos = host_start_pos;
} else {
size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
if (host_end_pos == std::string::npos)
return false;
host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
// The first component can optionally be '*' to match all subdomains.
std::vector<std::string> host_components;
SplitString(host_, '.', &host_components);
if (host_components[0] == "*") {
match_subdomains_ = true;
host_components.erase(host_components.begin(),
host_components.begin() + 1);
}
host_ = JoinString(host_components, '.');
// No other '*' can occur in the host, though. This isn't necessary, but is
// done as a convenience to developers who might otherwise be confused and
// think '*' works as a glob in the host.
if (host_.find('*') != std::string::npos)
return false;
path_start_pos = host_end_pos;
}
path_ = pattern.substr(path_start_pos);
return true;
}
bool URLPattern::MatchesUrl(const GURL &test) {
if (test.scheme() != scheme_)
return false;
if (!MatchesHost(test))
return false;
if (!MatchesPath(test))
return false;
return true;
}
bool URLPattern::MatchesHost(const GURL& test) {
if (test.host() == host_)
return true;
if (!match_subdomains_ || test.HostIsIPAddress())
return false;
// If we're matching subdomains, and we have no host, that means the pattern
// was <scheme>://*/<whatever>, so we match anything.
if (host_.empty())
return true;
// Check if the test host is a subdomain of our host.
if (test.host().length() <= (host_.length() + 1))
return false;
if (test.host().compare(test.host().length() - host_.length(),
host_.length(), host_) != 0)
return false;
return test.host()[test.host().length() - host_.length() - 1] == '.';
}
bool URLPattern::MatchesPath(const GURL& test) {
if (path_escaped_.empty()) {
path_escaped_ = path_;
ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
}
if (!MatchPattern(test.PathForRequest(), path_escaped_))
return false;
return true;
}
|