chrome/common/extensions/url_pattern.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/string_piece.h"
#include "base/string_util.h"
#include "chrome/common/extensions/url_pattern.h"

// TODO(aa): Consider adding chrome-extension? What about more obscure ones
// like data: and javascript: ?
static const char* kValidSchemes[] = {
  "http",
  "https",
  "file",
  "ftp",
  "chrome-ui"
};

static const char kSchemeSeparator[] = "://";
static const char kPathSeparator[] = "/";

static bool IsValidScheme(const std::string& scheme) {
  for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
    if (scheme == kValidSchemes[i])
      return true;
  }

  return false;
}

bool URLPattern::Parse(const std::string& pattern) {
  size_t scheme_end_pos = pattern.find(kSchemeSeparator);
  if (scheme_end_pos == std::string::npos)
    return false;

  scheme_ = pattern.substr(0, scheme_end_pos);
  if (!IsValidScheme(scheme_))
    return false;

  size_t host_start_pos = scheme_end_pos + strlen(kSchemeSeparator);
  if (host_start_pos >= pattern.length())
    return false;

  // Parse out the host and path.
  size_t path_start_pos = 0;

  // File URLs are special because they have no host. There are other schemes
  // with the same structure, but we don't support them (yet).
  if (scheme_ == "file") {
    path_start_pos = host_start_pos;
  } else {
    size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
    if (host_end_pos == std::string::npos)
      return false;

    host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);

    // The first component can optionally be '*' to match all subdomains.
    std::vector<std::string> host_components;
    SplitString(host_, '.', &host_components);
    if (host_components[0] == "*") {
      match_subdomains_ = true;
      host_components.erase(host_components.begin(),
                            host_components.begin() + 1);
    }
    host_ = JoinString(host_components, '.');

    // No other '*' can occur in the host, though. This isn't necessary, but is
    // done as a convenience to developers who might otherwise be confused and
    // think '*' works as a glob in the host.
    if (host_.find('*') != std::string::npos)
      return false;

    path_start_pos = host_end_pos;
  }
  
  path_ = pattern.substr(path_start_pos);
  return true;
}

bool URLPattern::MatchesUrl(const GURL &test) {
  if (test.scheme() != scheme_)
    return false;

  if (!MatchesHost(test))
    return false;

  if (!MatchesPath(test))
    return false;

  return true;
}

bool URLPattern::MatchesHost(const GURL& test) {
  if (test.host() == host_)
    return true;

  if (!match_subdomains_ || test.HostIsIPAddress())
    return false;

  // If we're matching subdomains, and we have no host, that means the pattern
  // was <scheme>://*/<whatever>, so we match anything.
  if (host_.empty())
    return true;

  // Check if the test host is a subdomain of our host.
  if (test.host().length() <= (host_.length() + 1))
    return false;

  if (test.host().compare(test.host().length() - host_.length(),
                          host_.length(), host_) != 0)
    return false;

  return test.host()[test.host().length() - host_.length() - 1] == '.';
}

bool URLPattern::MatchesPath(const GURL& test) {
  if (path_escaped_.empty()) {
    path_escaped_ = path_;
    ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
    ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
  }

  if (!MatchPattern(test.PathForRequest(), path_escaped_))
    return false;

  return true;
}