summaryrefslogtreecommitdiffstats
path: root/url
diff options
context:
space:
mode:
authormkwst <mkwst@chromium.org>2015-07-14 15:41:06 -0700
committerCommit bot <commit-bot@chromium.org>2015-07-14 22:41:53 +0000
commit28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49 (patch)
treec5ee030e88775f54e19d94ed328fc28f6966c97a /url
parent6fdafbf5e1ca5fbe4e9357c25978906453557225 (diff)
downloadchromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.zip
chromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.tar.gz
chromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.tar.bz2
Introduce 'url::SchemeHostPort'.
BUG=490074 Review URL: https://codereview.chromium.org/1211253014 Cr-Commit-Position: refs/heads/master@{#338761}
Diffstat (limited to 'url')
-rw-r--r--url/BUILD.gn3
-rw-r--r--url/scheme_host_port.cc116
-rw-r--r--url/scheme_host_port.h132
-rw-r--r--url/scheme_host_port_unittest.cc153
-rw-r--r--url/url.gyp1
-rw-r--r--url/url_srcs.gypi2
6 files changed, 407 insertions, 0 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn
index f019d39..db5a6ad 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn
@@ -24,6 +24,8 @@ component("url") {
"deprecated_serialized_origin.h",
"gurl.cc",
"gurl.h",
+ "scheme_host_port.cc",
+ "scheme_host_port.h",
"third_party/mozilla/url_parse.cc",
"third_party/mozilla/url_parse.h",
"url_canon.h",
@@ -95,6 +97,7 @@ if (!is_android) {
sources = [
"deprecated_serialized_origin_unittest.cc",
"gurl_unittest.cc",
+ "scheme_host_port_unittest.cc",
"url_canon_icu_unittest.cc",
"url_canon_unittest.cc",
"url_parse_unittest.cc",
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
new file mode 100644
index 0000000..cb2d5cc
--- /dev/null
+++ b/url/scheme_host_port.cc
@@ -0,0 +1,116 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/scheme_host_port.h"
+
+#include <string.h>
+
+#include "base/logging.h"
+#include "base/strings/string_number_conversions.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+SchemeHostPort::SchemeHostPort() : port_(0) {
+}
+
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16 port)
+ : scheme_(scheme.data(), scheme.length()),
+ host_(host.data(), host.length()),
+ port_(port) {
+#if DCHECK_IS_ON()
+ DCHECK(url::IsStandard(scheme.data(),
+ url::Component(0, static_cast<int>(scheme.length()))));
+
+ // Try to canonicalize the host (copy/pasted from net/base. :( ).
+ const url::Component raw_host_component(0, static_cast<int>(host.length()));
+ std::string canon_host;
+ url::StdStringCanonOutput canon_host_output(&canon_host);
+ url::CanonHostInfo host_info;
+ url::CanonicalizeHostVerbose(host.data(), raw_host_component,
+ &canon_host_output, &host_info);
+
+ if (host_info.out_host.is_nonempty() &&
+ host_info.family != url::CanonHostInfo::BROKEN) {
+ // Success! Assert that there's no extra garbage.
+ canon_host_output.Complete();
+ DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
+ } else {
+ // Empty host, or canonicalization failed.
+ canon_host.clear();
+ }
+ DCHECK_EQ(host, canon_host);
+
+ DCHECK(scheme == kFileScheme ? port == 0 : port != 0);
+ DCHECK(!host.empty() || port == 0);
+#endif
+}
+
+SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
+ if (!url.is_valid() || !url.IsStandard())
+ return;
+
+ // These schemes do not follow the generic URL syntax, so we treat them as
+ // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
+ // have a (scheme, host, port) tuple, they themselves do not).
+ if (url.SchemeIsBlob() || url.SchemeIsFileSystem())
+ return;
+
+ scheme_ = url.scheme();
+ host_ = url.host();
+ port_ = url.EffectiveIntPort() == url::PORT_UNSPECIFIED
+ ? 0
+ : url.EffectiveIntPort();
+}
+
+SchemeHostPort::~SchemeHostPort() {
+}
+
+bool SchemeHostPort::IsInvalid() const {
+ return scheme_.empty() && host_.empty() && !port_;
+}
+
+std::string SchemeHostPort::Serialize() const {
+ std::string result;
+ if (IsInvalid())
+ return result;
+
+ bool is_default_port =
+ port_ == url::DefaultPortForScheme(scheme_.data(),
+ static_cast<int>(scheme_.length()));
+
+ result.append(scheme_);
+ result.append(kStandardSchemeSeparator);
+ result.append(host_);
+
+ if (scheme_ != kFileScheme && !is_default_port) {
+ result.push_back(':');
+ result.append(base::IntToString(port_));
+ }
+
+ return result;
+}
+
+bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
+ return port_ == other.port() && scheme_ == other.scheme() &&
+ host_ == other.host();
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+ if (port_ != other.port_)
+ return port_ < other.port_;
+ if (scheme_ != other.scheme_)
+ return scheme_ < other.scheme_;
+ if (host_ != other.host_)
+ return host_ < other.host_;
+ return false;
+}
+
+} // namespace url
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h
new file mode 100644
index 0000000..227c1f1
--- /dev/null
+++ b/url/scheme_host_port.h
@@ -0,0 +1,132 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_SCHEME_HOST_PORT_H_
+#define URL_SCHEME_HOST_PORT_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/strings/string_piece.h"
+#include "url/url_export.h"
+
+class GURL;
+
+namespace url {
+
+// This class represents a (scheme, host, port) tuple extracted from a URL.
+//
+// The primary purpose of this class is to represent relevant network-authority
+// information for a URL. It is _not_ an Origin, as described in RFC 6454. In
+// particular, it is generally NOT the right thing to use for security
+// decisions.
+//
+// Instead, this class is a mechanism for simplifying URLs with standard schemes
+// (that is, those which follow the generic syntax of RFC 3986) down to the
+// uniquely identifying information necessary for network fetches. This makes it
+// suitable as a cache key for a collection of active connections, for instance.
+// It may, however, be inappropriate to use as a cache key for persistent
+// storage associated with a host.
+//
+// In particular, note that:
+//
+// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
+// (e.g. those registered with GURL as "standard schemes"). Non-standard
+// schemes such as "blob", "filesystem", "data", and "javascript" can only be
+// represented as invalid SchemeHostPort objects.
+//
+// * The "file" scheme follows the standard syntax, but it is important to note
+// that the authority portion (host, port) is optional. URLs without an
+// authority portion will be represented with an empty string for the host,
+// and a port of 0 (e.g. "file:///etc/hosts" => ("file", "", 0)), and URLs
+// with a host-only authority portion will be represented with a port of 0
+// (e.g. "file://example.com/etc/hosts" => ("file", "example.com", 0)). See
+// Section 3 of RFC 3986 to better understand these constructs.
+//
+// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
+// particular, it has no notion of a "unique" Origin. If you need to take
+// uniqueness into account (and, if you're making security-relevant decisions
+// then you absolutely do), please use 'url::Origin' instead[1].
+//
+// [1]: // TODO(mkwst): Land 'url::Origin'. :)
+//
+// Usage:
+//
+// * SchemeHostPort objects are commonly created from GURL objects:
+//
+// GURL url("https://example.com/");
+// url::SchemeHostPort tuple(url);
+// tuple.scheme(); // "https"
+// tuple.host(); // "example.com"
+// tuple.port(); // 443
+//
+// * Objects may also be explicitly created and compared:
+//
+// url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
+// tuple.scheme(); // "https"
+// tuple.host(); // "example.com"
+// tuple.port(); // 443
+//
+// GURL url("https://example.com/");
+// tuple.Equals(url::SchemeHostPort(url)); // true
+class URL_EXPORT SchemeHostPort {
+ public:
+ // Creates an invalid (scheme, host, port) tuple, which represents an invalid
+ // or non-standard URL.
+ SchemeHostPort();
+
+ // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
+ // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
+ // must be a standard scheme. |port| must not be 0, unless |scheme| does not
+ // support ports (e.g. 'file'). In that case, |port| must be 0.
+ //
+ // Copies the data in |scheme| and |host|.
+ SchemeHostPort(base::StringPiece scheme, base::StringPiece host, uint16 port);
+
+ // Creates a (scheme, host, port) tuple from |url|, as described at
+ // https://tools.ietf.org/html/rfc6454#section-4
+ //
+ // If |url| is invalid or non-standard, the result will be an invalid
+ // SchemeHostPort object.
+ explicit SchemeHostPort(const GURL& url);
+
+ ~SchemeHostPort();
+
+ // Returns the host component, in URL form. That is all IDN domain names will
+ // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
+ // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
+ std::string host() const { return host_; }
+ std::string scheme() const { return scheme_; }
+ uint16 port() const { return port_; }
+ bool IsInvalid() const;
+
+ // Serializes the SchemeHostPort tuple to a canonical form.
+ //
+ // While this string form resembles the Origin serialization specified in
+ // Section 6.2 of RFC 6454, it is important to note that invalid
+ // SchemeHostPort tuples serialize to the empty string, rather than being
+ // serialized as a unique Origin.
+ std::string Serialize() const;
+
+ // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
+ // are exact matches.
+ //
+ // Note that this comparison is _not_ the same as an origin-based comparison.
+ // In particular, invalid SchemeHostPort objects match each other (and
+ // themselves). Unique origins, on the other hand, would not.
+ bool Equals(const SchemeHostPort& other) const;
+
+ // Allows SchemeHostPort to used as a key in STL (for example, a std::set or
+ // std::map).
+ bool operator<(const SchemeHostPort& other) const;
+
+ private:
+ std::string scheme_;
+ std::string host_;
+ uint16 port_;
+};
+
+} // namespace url
+
+#endif // URL_SCHEME_HOST_PORT_H_
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
new file mode 100644
index 0000000..3001d24
--- /dev/null
+++ b/url/scheme_host_port_unittest.cc
@@ -0,0 +1,153 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/scheme_host_port.h"
+
+namespace {
+
+TEST(SchemeHostPortTest, Invalid) {
+ url::SchemeHostPort invalid;
+ EXPECT_EQ("", invalid.scheme());
+ EXPECT_EQ("", invalid.host());
+ EXPECT_EQ(0, invalid.port());
+ EXPECT_TRUE(invalid.IsInvalid());
+ EXPECT_TRUE(invalid.Equals(invalid));
+
+ const char* urls[] = {"data:text/html,Hello!",
+ "javascript:alert(1)",
+ "file://example.com:443/etc/passwd",
+ "blob:https://example.com/uuid-goes-here",
+ "filesystem:https://example.com/temporary/yay.png"};
+
+ for (const auto& test : urls) {
+ SCOPED_TRACE(test);
+ GURL url(test);
+ url::SchemeHostPort tuple(url);
+ EXPECT_EQ("", tuple.scheme());
+ EXPECT_EQ("", tuple.host());
+ EXPECT_EQ(0, tuple.port());
+ EXPECT_TRUE(tuple.IsInvalid());
+ EXPECT_TRUE(tuple.Equals(tuple));
+ EXPECT_TRUE(tuple.Equals(invalid));
+ EXPECT_TRUE(invalid.Equals(tuple));
+ }
+}
+
+TEST(SchemeHostPortTest, ExplicitConstruction) {
+ struct TestCases {
+ const char* scheme;
+ const char* host;
+ uint16 port;
+ } cases[] = {
+ {"http", "example.com", 80},
+ {"http", "example.com", 123},
+ {"https", "example.com", 443},
+ {"https", "example.com", 123},
+ {"file", "", 0},
+ {"file", "example.com", 0},
+ };
+
+ for (const auto& test : cases) {
+ SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+ << test.port);
+ url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+ EXPECT_EQ(test.scheme, tuple.scheme());
+ EXPECT_EQ(test.host, tuple.host());
+ EXPECT_EQ(test.port, tuple.port());
+ EXPECT_FALSE(tuple.IsInvalid());
+ EXPECT_TRUE(tuple.Equals(tuple));
+ }
+}
+
+TEST(SchemeHostPortTest, GURLConstruction) {
+ struct TestCases {
+ const char* url;
+ const char* scheme;
+ const char* host;
+ uint16 port;
+ } cases[] = {
+ {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+ {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+ {"http://☃.net/", "http", "xn--n3h.net", 80},
+ {"http://example.com/", "http", "example.com", 80},
+ {"http://example.com:123/", "http", "example.com", 123},
+ {"https://example.com/", "https", "example.com", 443},
+ {"https://example.com:123/", "https", "example.com", 123},
+ {"file:///etc/passwd", "file", "", 0},
+ {"file://example.com/etc/passwd", "file", "example.com", 0},
+ {"http://u:p@example.com/", "http", "example.com", 80},
+ {"http://u:p@example.com/path", "http", "example.com", 80},
+ {"http://u:p@example.com/path?123", "http", "example.com", 80},
+ {"http://u:p@example.com/path?123#hash", "http", "example.com", 80},
+ };
+
+ for (const auto& test : cases) {
+ SCOPED_TRACE(test.url);
+ GURL url(test.url);
+ EXPECT_TRUE(url.is_valid());
+ url::SchemeHostPort tuple(url);
+ EXPECT_EQ(test.scheme, tuple.scheme());
+ EXPECT_EQ(test.host, tuple.host());
+ EXPECT_EQ(test.port, tuple.port());
+ EXPECT_FALSE(tuple.IsInvalid());
+ EXPECT_TRUE(tuple.Equals(tuple));
+ }
+}
+
+TEST(SchemeHostPortTest, Serialization) {
+ struct TestCases {
+ const char* url;
+ const char* expected;
+ } cases[] = {
+ {"http://192.168.9.1/", "http://192.168.9.1"},
+ {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+ {"http://☃.net/", "http://xn--n3h.net"},
+ {"http://example.com/", "http://example.com"},
+ {"http://example.com:123/", "http://example.com:123"},
+ {"https://example.com/", "https://example.com"},
+ {"https://example.com:123/", "https://example.com:123"},
+ {"file:///etc/passwd", "file://"},
+ {"file://example.com/etc/passwd", "file://example.com"},
+ };
+
+ for (const auto& test : cases) {
+ SCOPED_TRACE(test.url);
+ GURL url(test.url);
+ url::SchemeHostPort tuple(url);
+ EXPECT_EQ(test.expected, tuple.Serialize());
+ }
+}
+
+TEST(SchemeHostPortTest, Comparison) {
+ // These tuples are arranged in increasing order:
+ struct SchemeHostPorts {
+ const char* scheme;
+ const char* host;
+ uint16 port;
+ } tuples[] = {
+ {"http", "a", 80},
+ {"http", "b", 80},
+ {"https", "a", 80},
+ {"https", "b", 80},
+ {"http", "a", 81},
+ {"http", "b", 81},
+ {"https", "a", 81},
+ {"https", "b", 81},
+ };
+
+ for (size_t i = 0; i < arraysize(tuples); i++) {
+ url::SchemeHostPort current(tuples[i].scheme, tuples[i].host,
+ tuples[i].port);
+ for (size_t j = i; j < arraysize(tuples); j++) {
+ url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host,
+ tuples[j].port);
+ EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+ EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+ }
+ }
+}
+
+} // namespace url
diff --git a/url/url.gyp b/url/url.gyp
index 8a78744..198d448 100644
--- a/url/url.gyp
+++ b/url/url.gyp
@@ -48,6 +48,7 @@
'sources': [
'gurl_unittest.cc',
'deprecated_serialized_origin_unittest.cc',
+ 'scheme_host_port_unittest.cc',
'url_canon_icu_unittest.cc',
'url_canon_unittest.cc',
'url_parse_unittest.cc',
diff --git a/url/url_srcs.gypi b/url/url_srcs.gypi
index cd9a9bd..525598b 100644
--- a/url/url_srcs.gypi
+++ b/url/url_srcs.gypi
@@ -11,6 +11,8 @@
'deprecated_serialized_origin.h',
'gurl.cc',
'gurl.h',
+ 'scheme_host_port.cc',
+ 'scheme_host_port.h',
'third_party/mozilla/url_parse.cc',
'third_party/mozilla/url_parse.h',
'url_canon.h',