diff options
author | mkwst <mkwst@chromium.org> | 2015-07-21 23:03:25 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-22 06:03:58 +0000 |
commit | 9f2cc898386458c3fab414666eb205ecb5b88277 (patch) | |
tree | 20533a6844ab7d3257ea80872bac51bd2ee6aa0f | |
parent | 5b864132129fe01715e190d14aaa59e2f3c2a9d6 (diff) | |
download | chromium_src-9f2cc898386458c3fab414666eb205ecb5b88277.zip chromium_src-9f2cc898386458c3fab414666eb205ecb5b88277.tar.gz chromium_src-9f2cc898386458c3fab414666eb205ecb5b88277.tar.bz2 |
Introduce 'url::Origin'.
https://docs.google.com/document/d/19NACt9PXOUTJi60klT2ZGcFlgHM5wM1Owtcw2GQOKPI/edit
describes the plan.
BUG=490074
Review URL: https://codereview.chromium.org/1224293002
Cr-Commit-Position: refs/heads/master@{#339841}
-rw-r--r-- | url/BUILD.gn | 3 | ||||
-rw-r--r-- | url/origin.cc | 69 | ||||
-rw-r--r-- | url/origin.h | 125 | ||||
-rw-r--r-- | url/origin_unittest.cc | 160 | ||||
-rw-r--r-- | url/url.gyp | 3 | ||||
-rw-r--r-- | url/url_srcs.gypi | 2 |
6 files changed, 361 insertions, 1 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn index db5a6ad..2beed04 100644 --- a/url/BUILD.gn +++ b/url/BUILD.gn @@ -24,6 +24,8 @@ component("url") { "deprecated_serialized_origin.h", "gurl.cc", "gurl.h", + "origin.cc", + "origin.h", "scheme_host_port.cc", "scheme_host_port.h", "third_party/mozilla/url_parse.cc", @@ -97,6 +99,7 @@ if (!is_android) { sources = [ "deprecated_serialized_origin_unittest.cc", "gurl_unittest.cc", + "origin_unittest.cc", "scheme_host_port_unittest.cc", "url_canon_icu_unittest.cc", "url_canon_unittest.cc", diff --git a/url/origin.cc b/url/origin.cc new file mode 100644 index 0000000..e80eb72 --- /dev/null +++ b/url/origin.cc @@ -0,0 +1,69 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "url/origin.h" + +#include <string.h> + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "url/gurl.h" +#include "url/url_canon.h" +#include "url/url_canon_stdstring.h" +#include "url/url_constants.h" +#include "url/url_util.h" + +namespace url { + +Origin::Origin() : unique_(true) { +} + +Origin::Origin(const GURL& url) : unique_(true) { + if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob())) + return; + + if (url.SchemeIsFileSystem()) { + tuple_ = SchemeHostPort(*url.inner_url()); + } else if (url.SchemeIsBlob()) { + // TODO(mkwst): This relies on the fact that GURL pushes the unparseable + // bits and pieces of a non-standard scheme into the GURL's path. It seems + // fairly fragile, so it might be worth teaching GURL about blobs' data in + // the same way it's been taught about filesystems' inner URLs. + tuple_ = SchemeHostPort(GURL(url.path())); + } else { + tuple_ = SchemeHostPort(url); + } + + unique_ = tuple_.IsInvalid(); +} + +Origin::~Origin() { +} + +std::string Origin::Serialize() const { + if (unique()) + return "null"; + + if (scheme() == kFileScheme) + return "file://"; + + return tuple_.Serialize(); +} + +bool Origin::IsSameOriginWith(const Origin& other) const { + if (unique_ || other.unique_) + return false; + + return tuple_.Equals(other.tuple_); +} + +bool Origin::operator<(const Origin& other) const { + return tuple_ < other.tuple_; +} + +std::ostream& operator<<(std::ostream& out, const url::Origin& origin) { + return out << origin.Serialize(); +} + +} // namespace url diff --git a/url/origin.h b/url/origin.h new file mode 100644 index 0000000..15fe0ea --- /dev/null +++ b/url/origin.h @@ -0,0 +1,125 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef URL_ORIGIN_H_ +#define URL_ORIGIN_H_ + +#include <string> + +#include "base/strings/string16.h" +#include "url/scheme_host_port.h" +#include "url/third_party/mozilla/url_parse.h" +#include "url/url_canon.h" +#include "url/url_constants.h" +#include "url/url_export.h" + +class GURL; + +namespace url { + +// An Origin is a tuple of (scheme, host, port), as described in RFC 6454. +// +// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'. +// If you only need to extract the bits of a URL which are relevant for a +// network connection, use 'url::SchemeHostPort'. +// +// STL;SDR: If you aren't making actual network connections, use 'url::Origin'. +// +// 'Origin', like 'SchemeHostPort', is composed of a tuple of (scheme, host, +// port), but contains a number of additional concepts which make it appropriate +// for use as a security boundary and access control mechanism between contexts. +// +// This class ought to be used when code needs to determine if two resources +// are "same-origin", and when a canonical serialization of an origin is +// required. Note that some origins are "unique", meaning that they are not +// same-origin with any other origin (including themselves). +// +// There are a few subtleties to note: +// +// * Invalid and non-standard GURLs are parsed as unique origins. This includes +// non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'. +// +// * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the +// internals of the URL. That is, 'filesystem:https://example.com/temporary/f' +// is parsed as ('https', 'example.com', 443). +// +// * Unique origins all serialize to the string "null"; this means that the +// serializations of two unique origins are identical to each other, though +// the origins themselves are not "the same". This means that origins' +// serializations must not be relied upon for security checks. +// +// * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0), +// but their behavior may differ from embedder to embedder. +// +// * The host component of an IPv6 address includes brackets, just like the URL +// representation. +// +// Usage: +// +// * Origins are generally constructed from an already-canonicalized GURL: +// +// GURL url("https://example.com/"); +// url::Origin origin(url); +// origin.scheme(); // "https" +// origin.host(); // "example.com" +// origin.port(); // 443 +// origin.IsUnique(); // false +// +// * To answer the question "Are |this| and |that| "same-origin" with each +// other?", use |Origin::IsSameOriginWith|: +// +// if (this.IsSameOriginWith(that)) { +// // Amazingness goes here. +// } +class URL_EXPORT Origin { + public: + // Creates a unique Origin. + Origin(); + + // Creates an Origin from |url|, as described at + // https://url.spec.whatwg.org/#origin, with the following additions: + // + // 1. If |url| is invalid or non-standard, a unique Origin is constructed. + // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed + // out of everything in the URL which follows the scheme). + // 3. 'file' URLs all parse as ("file", "", 0). + explicit Origin(const GURL& url); + + ~Origin(); + + // For unique origins, these return ("", "", 0). + // + // TODO(mkwst): These should be 'const std::string&', along with their + // 'url::SchemeHostPort' analogs. + std::string scheme() const { return tuple_.scheme(); } + std::string host() const { return tuple_.host(); } + uint16 port() const { return tuple_.port(); } + + bool unique() const { return unique_; } + + // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with + // the addition that all Origins with a 'file' scheme serialize to "file://". + std::string Serialize() const; + + // Two Origins are "same-origin" if their schemes, hosts, and ports are exact + // matches; and neither is unique. + bool IsSameOriginWith(const Origin& other) const; + + // Allows SchemeHostPort to used as a key in STL (for example, a std::set or + // std::map). + bool operator<(const Origin& other) const; + + private: + SchemeHostPort tuple_; + bool unique_; + + DISALLOW_COPY_AND_ASSIGN(Origin); +}; + +URL_EXPORT std::ostream& operator<<(std::ostream& out, + const Origin& origin); + +} // namespace url + +#endif // URL_SCHEME_HOST_PORT_H_ diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc new file mode 100644 index 0000000..a774c62 --- /dev/null +++ b/url/origin_unittest.cc @@ -0,0 +1,160 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "url/origin.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" + +namespace { + +TEST(OriginTest, UniqueOriginComparison) { + url::Origin unique_origin; + EXPECT_EQ("", unique_origin.scheme()); + EXPECT_EQ("", unique_origin.host()); + EXPECT_EQ(0, unique_origin.port()); + EXPECT_TRUE(unique_origin.unique()); + EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin)); + + const char* const urls[] = {"data:text/html,Hello!", + "javascript:alert(1)", + "file://example.com:443/etc/passwd", + "yay", + "http::///invalid.example.com/"}; + + for (const auto& test_url : urls) { + SCOPED_TRACE(test_url); + GURL url(test_url); + url::Origin origin(url); + EXPECT_EQ("", origin.scheme()); + EXPECT_EQ("", origin.host()); + EXPECT_EQ(0, origin.port()); + EXPECT_TRUE(origin.unique()); + EXPECT_FALSE(origin.IsSameOriginWith(origin)); + EXPECT_FALSE(unique_origin.IsSameOriginWith(origin)); + EXPECT_FALSE(origin.IsSameOriginWith(unique_origin)); + } +} + +TEST(OriginTest, ConstructFromGURL) { + url::Origin different_origin(GURL("https://not-in-the-list.test/")); + + struct TestCases { + const char* const url; + const char* const expected_scheme; + const char* const expected_host; + const uint16 expected_port; + } cases[] = { + // IP Addresses + {"http://192.168.9.1/", "http", "192.168.9.1", 80}, + {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80}, + + // Punycode + {"http://☃.net/", "http", "xn--n3h.net", 80}, + {"blob:http://☃.net/", "http", "xn--n3h.net", 80}, + + // Generic URLs + {"http://example.com/", "http", "example.com", 80}, + {"http://example.com:123/", "http", "example.com", 123}, + {"https://example.com/", "https", "example.com", 443}, + {"https://example.com:123/", "https", "example.com", 123}, + {"http://user:pass@example.com/", "http", "example.com", 80}, + {"http://example.com:123/?query", "http", "example.com", 123}, + {"https://example.com/#1234", "https", "example.com", 443}, + {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123}, + + // Registered URLs + {"ftp://example.com/", "ftp", "example.com", 21}, + {"gopher://example.com/", "gopher", "example.com", 70}, + {"ws://example.com/", "ws", "example.com", 80}, + {"wss://example.com/", "wss", "example.com", 443}, + + // file: URLs + {"file:///etc/passwd", "file", "", 0}, + {"file://example.com/etc/passwd", "file", "example.com", 0}, + + // Filesystem: + {"filesystem:http://example.com/type/", "http", "example.com", 80}, + {"filesystem:http://example.com:123/type/", "http", "example.com", 123}, + {"filesystem:https://example.com/type/", "https", "example.com", 443}, + {"filesystem:https://example.com:123/type/", "https", "example.com", 123}, + + // Blob: + {"blob:http://example.com/guid-goes-here", "http", "example.com", 80}, + {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123}, + {"blob:https://example.com/guid-goes-here", "https", "example.com", 443}, + {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.url); + GURL url(test_case.url); + EXPECT_TRUE(url.is_valid()); + url::Origin origin(url); + EXPECT_EQ(test_case.expected_scheme, origin.scheme()); + EXPECT_EQ(test_case.expected_host, origin.host()); + EXPECT_EQ(test_case.expected_port, origin.port()); + EXPECT_FALSE(origin.unique()); + EXPECT_TRUE(origin.IsSameOriginWith(origin)); + EXPECT_FALSE(different_origin.IsSameOriginWith(origin)); + EXPECT_FALSE(origin.IsSameOriginWith(different_origin)); + } +} + +TEST(OriginTest, Serialization) { + struct TestCases { + const char* const url; + const char* const expected; + } cases[] = { + {"http://192.168.9.1/", "http://192.168.9.1"}, + {"http://[2001:db8::1]/", "http://[2001:db8::1]"}, + {"http://☃.net/", "http://xn--n3h.net"}, + {"http://example.com/", "http://example.com"}, + {"http://example.com:123/", "http://example.com:123"}, + {"https://example.com/", "https://example.com"}, + {"https://example.com:123/", "https://example.com:123"}, + {"file:///etc/passwd", "file://"}, + {"file://example.com/etc/passwd", "file://"}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.url); + GURL url(test_case.url); + EXPECT_TRUE(url.is_valid()); + url::Origin origin(url); + EXPECT_EQ(test_case.expected, origin.Serialize()); + + // The '<<' operator should produce the same serialization as Serialize(). + std::stringstream out; + out << origin; + EXPECT_EQ(test_case.expected, out.str()); + } +} + +TEST(OriginTest, Comparison) { + // These URLs are arranged in increasing order: + const char* const urls[] = { + "data:uniqueness", + "http://a:80", + "http://b:80", + "https://a:80", + "https://b:80", + "http://a:81", + "http://b:81", + "https://a:81", + "https://b:81", + }; + + for (size_t i = 0; i < arraysize(urls); i++) { + GURL current_url(urls[i]); + url::Origin current(current_url); + for (size_t j = i; j < arraysize(urls); j++) { + GURL compare_url(urls[j]); + url::Origin to_compare(compare_url); + EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; + EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; + } + } +} + +} // namespace url diff --git a/url/url.gyp b/url/url.gyp index 198d448..b8355a8 100644 --- a/url/url.gyp +++ b/url/url.gyp @@ -46,8 +46,9 @@ 'url_lib', ], 'sources': [ - 'gurl_unittest.cc', 'deprecated_serialized_origin_unittest.cc', + 'gurl_unittest.cc', + 'origin_unittest.cc', 'scheme_host_port_unittest.cc', 'url_canon_icu_unittest.cc', 'url_canon_unittest.cc', diff --git a/url/url_srcs.gypi b/url/url_srcs.gypi index 525598b..21d2fe1 100644 --- a/url/url_srcs.gypi +++ b/url/url_srcs.gypi @@ -11,6 +11,8 @@ 'deprecated_serialized_origin.h', 'gurl.cc', 'gurl.h', + 'origin.cc', + 'origin.h', 'scheme_host_port.cc', 'scheme_host_port.h', 'third_party/mozilla/url_parse.cc', |