diff options
author | mkwst <mkwst@chromium.org> | 2015-07-14 15:41:06 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-14 22:41:53 +0000 |
commit | 28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49 (patch) | |
tree | c5ee030e88775f54e19d94ed328fc28f6966c97a /url | |
parent | 6fdafbf5e1ca5fbe4e9357c25978906453557225 (diff) | |
download | chromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.zip chromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.tar.gz chromium_src-28c7c11feb94dc2593e5a4bf2b7aeb9d15bd0e49.tar.bz2 |
Introduce 'url::SchemeHostPort'.
BUG=490074
Review URL: https://codereview.chromium.org/1211253014
Cr-Commit-Position: refs/heads/master@{#338761}
Diffstat (limited to 'url')
-rw-r--r-- | url/BUILD.gn | 3 | ||||
-rw-r--r-- | url/scheme_host_port.cc | 116 | ||||
-rw-r--r-- | url/scheme_host_port.h | 132 | ||||
-rw-r--r-- | url/scheme_host_port_unittest.cc | 153 | ||||
-rw-r--r-- | url/url.gyp | 1 | ||||
-rw-r--r-- | url/url_srcs.gypi | 2 |
6 files changed, 407 insertions, 0 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn index f019d39..db5a6ad 100644 --- a/url/BUILD.gn +++ b/url/BUILD.gn @@ -24,6 +24,8 @@ component("url") { "deprecated_serialized_origin.h", "gurl.cc", "gurl.h", + "scheme_host_port.cc", + "scheme_host_port.h", "third_party/mozilla/url_parse.cc", "third_party/mozilla/url_parse.h", "url_canon.h", @@ -95,6 +97,7 @@ if (!is_android) { sources = [ "deprecated_serialized_origin_unittest.cc", "gurl_unittest.cc", + "scheme_host_port_unittest.cc", "url_canon_icu_unittest.cc", "url_canon_unittest.cc", "url_parse_unittest.cc", diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc new file mode 100644 index 0000000..cb2d5cc --- /dev/null +++ b/url/scheme_host_port.cc @@ -0,0 +1,116 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "url/scheme_host_port.h" + +#include <string.h> + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "url/gurl.h" +#include "url/url_canon.h" +#include "url/url_canon_stdstring.h" +#include "url/url_constants.h" +#include "url/url_util.h" + +namespace url { + +SchemeHostPort::SchemeHostPort() : port_(0) { +} + +SchemeHostPort::SchemeHostPort(base::StringPiece scheme, + base::StringPiece host, + uint16 port) + : scheme_(scheme.data(), scheme.length()), + host_(host.data(), host.length()), + port_(port) { +#if DCHECK_IS_ON() + DCHECK(url::IsStandard(scheme.data(), + url::Component(0, static_cast<int>(scheme.length())))); + + // Try to canonicalize the host (copy/pasted from net/base. :( ). + const url::Component raw_host_component(0, static_cast<int>(host.length())); + std::string canon_host; + url::StdStringCanonOutput canon_host_output(&canon_host); + url::CanonHostInfo host_info; + url::CanonicalizeHostVerbose(host.data(), raw_host_component, + &canon_host_output, &host_info); + + if (host_info.out_host.is_nonempty() && + host_info.family != url::CanonHostInfo::BROKEN) { + // Success! Assert that there's no extra garbage. + canon_host_output.Complete(); + DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length())); + } else { + // Empty host, or canonicalization failed. + canon_host.clear(); + } + DCHECK_EQ(host, canon_host); + + DCHECK(scheme == kFileScheme ? port == 0 : port != 0); + DCHECK(!host.empty() || port == 0); +#endif +} + +SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) { + if (!url.is_valid() || !url.IsStandard()) + return; + + // These schemes do not follow the generic URL syntax, so we treat them as + // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might + // have a (scheme, host, port) tuple, they themselves do not). + if (url.SchemeIsBlob() || url.SchemeIsFileSystem()) + return; + + scheme_ = url.scheme(); + host_ = url.host(); + port_ = url.EffectiveIntPort() == url::PORT_UNSPECIFIED + ? 0 + : url.EffectiveIntPort(); +} + +SchemeHostPort::~SchemeHostPort() { +} + +bool SchemeHostPort::IsInvalid() const { + return scheme_.empty() && host_.empty() && !port_; +} + +std::string SchemeHostPort::Serialize() const { + std::string result; + if (IsInvalid()) + return result; + + bool is_default_port = + port_ == url::DefaultPortForScheme(scheme_.data(), + static_cast<int>(scheme_.length())); + + result.append(scheme_); + result.append(kStandardSchemeSeparator); + result.append(host_); + + if (scheme_ != kFileScheme && !is_default_port) { + result.push_back(':'); + result.append(base::IntToString(port_)); + } + + return result; +} + +bool SchemeHostPort::Equals(const SchemeHostPort& other) const { + return port_ == other.port() && scheme_ == other.scheme() && + host_ == other.host(); +} + +bool SchemeHostPort::operator<(const SchemeHostPort& other) const { + if (port_ != other.port_) + return port_ < other.port_; + if (scheme_ != other.scheme_) + return scheme_ < other.scheme_; + if (host_ != other.host_) + return host_ < other.host_; + return false; +} + +} // namespace url diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h new file mode 100644 index 0000000..227c1f1 --- /dev/null +++ b/url/scheme_host_port.h @@ -0,0 +1,132 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef URL_SCHEME_HOST_PORT_H_ +#define URL_SCHEME_HOST_PORT_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/strings/string_piece.h" +#include "url/url_export.h" + +class GURL; + +namespace url { + +// This class represents a (scheme, host, port) tuple extracted from a URL. +// +// The primary purpose of this class is to represent relevant network-authority +// information for a URL. It is _not_ an Origin, as described in RFC 6454. In +// particular, it is generally NOT the right thing to use for security +// decisions. +// +// Instead, this class is a mechanism for simplifying URLs with standard schemes +// (that is, those which follow the generic syntax of RFC 3986) down to the +// uniquely identifying information necessary for network fetches. This makes it +// suitable as a cache key for a collection of active connections, for instance. +// It may, however, be inappropriate to use as a cache key for persistent +// storage associated with a host. +// +// In particular, note that: +// +// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax +// (e.g. those registered with GURL as "standard schemes"). Non-standard +// schemes such as "blob", "filesystem", "data", and "javascript" can only be +// represented as invalid SchemeHostPort objects. +// +// * The "file" scheme follows the standard syntax, but it is important to note +// that the authority portion (host, port) is optional. URLs without an +// authority portion will be represented with an empty string for the host, +// and a port of 0 (e.g. "file:///etc/hosts" => ("file", "", 0)), and URLs +// with a host-only authority portion will be represented with a port of 0 +// (e.g. "file://example.com/etc/hosts" => ("file", "example.com", 0)). See +// Section 3 of RFC 3986 to better understand these constructs. +// +// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in +// particular, it has no notion of a "unique" Origin. If you need to take +// uniqueness into account (and, if you're making security-relevant decisions +// then you absolutely do), please use 'url::Origin' instead[1]. +// +// [1]: // TODO(mkwst): Land 'url::Origin'. :) +// +// Usage: +// +// * SchemeHostPort objects are commonly created from GURL objects: +// +// GURL url("https://example.com/"); +// url::SchemeHostPort tuple(url); +// tuple.scheme(); // "https" +// tuple.host(); // "example.com" +// tuple.port(); // 443 +// +// * Objects may also be explicitly created and compared: +// +// url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443); +// tuple.scheme(); // "https" +// tuple.host(); // "example.com" +// tuple.port(); // 443 +// +// GURL url("https://example.com/"); +// tuple.Equals(url::SchemeHostPort(url)); // true +class URL_EXPORT SchemeHostPort { + public: + // Creates an invalid (scheme, host, port) tuple, which represents an invalid + // or non-standard URL. + SchemeHostPort(); + + // Creates a (scheme, host, port) tuple. |host| must be a canonicalized + // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme| + // must be a standard scheme. |port| must not be 0, unless |scheme| does not + // support ports (e.g. 'file'). In that case, |port| must be 0. + // + // Copies the data in |scheme| and |host|. + SchemeHostPort(base::StringPiece scheme, base::StringPiece host, uint16 port); + + // Creates a (scheme, host, port) tuple from |url|, as described at + // https://tools.ietf.org/html/rfc6454#section-4 + // + // If |url| is invalid or non-standard, the result will be an invalid + // SchemeHostPort object. + explicit SchemeHostPort(const GURL& url); + + ~SchemeHostPort(); + + // Returns the host component, in URL form. That is all IDN domain names will + // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and + // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]"). + std::string host() const { return host_; } + std::string scheme() const { return scheme_; } + uint16 port() const { return port_; } + bool IsInvalid() const; + + // Serializes the SchemeHostPort tuple to a canonical form. + // + // While this string form resembles the Origin serialization specified in + // Section 6.2 of RFC 6454, it is important to note that invalid + // SchemeHostPort tuples serialize to the empty string, rather than being + // serialized as a unique Origin. + std::string Serialize() const; + + // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports + // are exact matches. + // + // Note that this comparison is _not_ the same as an origin-based comparison. + // In particular, invalid SchemeHostPort objects match each other (and + // themselves). Unique origins, on the other hand, would not. + bool Equals(const SchemeHostPort& other) const; + + // Allows SchemeHostPort to used as a key in STL (for example, a std::set or + // std::map). + bool operator<(const SchemeHostPort& other) const; + + private: + std::string scheme_; + std::string host_; + uint16 port_; +}; + +} // namespace url + +#endif // URL_SCHEME_HOST_PORT_H_ diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc new file mode 100644 index 0000000..3001d24 --- /dev/null +++ b/url/scheme_host_port_unittest.cc @@ -0,0 +1,153 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" +#include "url/scheme_host_port.h" + +namespace { + +TEST(SchemeHostPortTest, Invalid) { + url::SchemeHostPort invalid; + EXPECT_EQ("", invalid.scheme()); + EXPECT_EQ("", invalid.host()); + EXPECT_EQ(0, invalid.port()); + EXPECT_TRUE(invalid.IsInvalid()); + EXPECT_TRUE(invalid.Equals(invalid)); + + const char* urls[] = {"data:text/html,Hello!", + "javascript:alert(1)", + "file://example.com:443/etc/passwd", + "blob:https://example.com/uuid-goes-here", + "filesystem:https://example.com/temporary/yay.png"}; + + for (const auto& test : urls) { + SCOPED_TRACE(test); + GURL url(test); + url::SchemeHostPort tuple(url); + EXPECT_EQ("", tuple.scheme()); + EXPECT_EQ("", tuple.host()); + EXPECT_EQ(0, tuple.port()); + EXPECT_TRUE(tuple.IsInvalid()); + EXPECT_TRUE(tuple.Equals(tuple)); + EXPECT_TRUE(tuple.Equals(invalid)); + EXPECT_TRUE(invalid.Equals(tuple)); + } +} + +TEST(SchemeHostPortTest, ExplicitConstruction) { + struct TestCases { + const char* scheme; + const char* host; + uint16 port; + } cases[] = { + {"http", "example.com", 80}, + {"http", "example.com", 123}, + {"https", "example.com", 443}, + {"https", "example.com", 123}, + {"file", "", 0}, + {"file", "example.com", 0}, + }; + + for (const auto& test : cases) { + SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" + << test.port); + url::SchemeHostPort tuple(test.scheme, test.host, test.port); + EXPECT_EQ(test.scheme, tuple.scheme()); + EXPECT_EQ(test.host, tuple.host()); + EXPECT_EQ(test.port, tuple.port()); + EXPECT_FALSE(tuple.IsInvalid()); + EXPECT_TRUE(tuple.Equals(tuple)); + } +} + +TEST(SchemeHostPortTest, GURLConstruction) { + struct TestCases { + const char* url; + const char* scheme; + const char* host; + uint16 port; + } cases[] = { + {"http://192.168.9.1/", "http", "192.168.9.1", 80}, + {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80}, + {"http://☃.net/", "http", "xn--n3h.net", 80}, + {"http://example.com/", "http", "example.com", 80}, + {"http://example.com:123/", "http", "example.com", 123}, + {"https://example.com/", "https", "example.com", 443}, + {"https://example.com:123/", "https", "example.com", 123}, + {"file:///etc/passwd", "file", "", 0}, + {"file://example.com/etc/passwd", "file", "example.com", 0}, + {"http://u:p@example.com/", "http", "example.com", 80}, + {"http://u:p@example.com/path", "http", "example.com", 80}, + {"http://u:p@example.com/path?123", "http", "example.com", 80}, + {"http://u:p@example.com/path?123#hash", "http", "example.com", 80}, + }; + + for (const auto& test : cases) { + SCOPED_TRACE(test.url); + GURL url(test.url); + EXPECT_TRUE(url.is_valid()); + url::SchemeHostPort tuple(url); + EXPECT_EQ(test.scheme, tuple.scheme()); + EXPECT_EQ(test.host, tuple.host()); + EXPECT_EQ(test.port, tuple.port()); + EXPECT_FALSE(tuple.IsInvalid()); + EXPECT_TRUE(tuple.Equals(tuple)); + } +} + +TEST(SchemeHostPortTest, Serialization) { + struct TestCases { + const char* url; + const char* expected; + } cases[] = { + {"http://192.168.9.1/", "http://192.168.9.1"}, + {"http://[2001:db8::1]/", "http://[2001:db8::1]"}, + {"http://☃.net/", "http://xn--n3h.net"}, + {"http://example.com/", "http://example.com"}, + {"http://example.com:123/", "http://example.com:123"}, + {"https://example.com/", "https://example.com"}, + {"https://example.com:123/", "https://example.com:123"}, + {"file:///etc/passwd", "file://"}, + {"file://example.com/etc/passwd", "file://example.com"}, + }; + + for (const auto& test : cases) { + SCOPED_TRACE(test.url); + GURL url(test.url); + url::SchemeHostPort tuple(url); + EXPECT_EQ(test.expected, tuple.Serialize()); + } +} + +TEST(SchemeHostPortTest, Comparison) { + // These tuples are arranged in increasing order: + struct SchemeHostPorts { + const char* scheme; + const char* host; + uint16 port; + } tuples[] = { + {"http", "a", 80}, + {"http", "b", 80}, + {"https", "a", 80}, + {"https", "b", 80}, + {"http", "a", 81}, + {"http", "b", 81}, + {"https", "a", 81}, + {"https", "b", 81}, + }; + + for (size_t i = 0; i < arraysize(tuples); i++) { + url::SchemeHostPort current(tuples[i].scheme, tuples[i].host, + tuples[i].port); + for (size_t j = i; j < arraysize(tuples); j++) { + url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host, + tuples[j].port); + EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; + EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; + } + } +} + +} // namespace url diff --git a/url/url.gyp b/url/url.gyp index 8a78744..198d448 100644 --- a/url/url.gyp +++ b/url/url.gyp @@ -48,6 +48,7 @@ 'sources': [ 'gurl_unittest.cc', 'deprecated_serialized_origin_unittest.cc', + 'scheme_host_port_unittest.cc', 'url_canon_icu_unittest.cc', 'url_canon_unittest.cc', 'url_parse_unittest.cc', diff --git a/url/url_srcs.gypi b/url/url_srcs.gypi index cd9a9bd..525598b 100644 --- a/url/url_srcs.gypi +++ b/url/url_srcs.gypi @@ -11,6 +11,8 @@ 'deprecated_serialized_origin.h', 'gurl.cc', 'gurl.h', + 'scheme_host_port.cc', + 'scheme_host_port.h', 'third_party/mozilla/url_parse.cc', 'third_party/mozilla/url_parse.h', 'url_canon.h', |