From 11a7c9fe93850341043844ab48bf379c485d05b1 Mon Sep 17 00:00:00 2001 From: tyoshino Date: Wed, 19 Aug 2015 01:51:46 -0700 Subject: Allow url::SchemeHostPort to hold non-file scheme without port WebSockets use url::Origin to pass origin info between renderer and browser. Currently, it cannot hold an origin with non-file scheme and no port. Chrome extensions have been using such origins, so we need to keep the channel to convey origin info work for such origins. BUG=516971 R=sleevi,brettw Committed: https://crrev.com/1ac9ec7bccd1b5178b18338b10149f36292f5fb6 Cr-Commit-Position: refs/heads/master@{#343895} Review URL: https://codereview.chromium.org/1272113002 Cr-Commit-Position: refs/heads/master@{#344181} --- url/scheme_host_port.cc | 153 +++++++++++++++++++++++++++++++---------------- url/scheme_host_port.h | 19 +++--- url/url_util.cc | 67 ++++++++++++++------- url/url_util.h | 33 +++++++++- url/url_util_unittest.cc | 34 ++++++++++- 5 files changed, 220 insertions(+), 86 deletions(-) (limited to 'url') diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index c2fe830..9c12295 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc @@ -7,6 +7,7 @@ #include #include "base/logging.h" +#include "base/numerics/safe_conversions.h" #include "base/strings/string_number_conversions.h" #include "url/gurl.h" #include "url/url_canon.h" @@ -16,25 +17,21 @@ namespace url { -SchemeHostPort::SchemeHostPort() : port_(0) { -} +namespace { -SchemeHostPort::SchemeHostPort(base::StringPiece scheme, - base::StringPiece host, - uint16 port) - : scheme_(scheme.data(), scheme.length()), - host_(host.data(), host.length()), - port_(port) { - // Try to canonicalize the host (copy/pasted from net/base. :( ). - const url::Component raw_host_component(0, static_cast(host.length())); +bool IsCanonicalHost(const base::StringPiece& host) { std::string canon_host; - url::StdStringCanonOutput canon_host_output(&canon_host); - url::CanonHostInfo host_info; - url::CanonicalizeHostVerbose(host.data(), raw_host_component, - &canon_host_output, &host_info); + + // Try to canonicalize the host (copy/pasted from net/base. :( ). + const Component raw_host_component(0, + base::checked_cast(host.length())); + StdStringCanonOutput canon_host_output(&canon_host); + CanonHostInfo host_info; + CanonicalizeHostVerbose(host.data(), raw_host_component, + &canon_host_output, &host_info); if (host_info.out_host.is_nonempty() && - host_info.family != url::CanonHostInfo::BROKEN) { + host_info.family != CanonHostInfo::BROKEN) { // Success! Assert that there's no extra garbage. canon_host_output.Complete(); DCHECK_EQ(host_info.out_host.len, static_cast(canon_host.length())); @@ -43,44 +40,95 @@ SchemeHostPort::SchemeHostPort(base::StringPiece scheme, canon_host.clear(); } - // Return an invalid SchemeHostPort object if any of the following conditions - // hold: - // - // 1. The provided scheme is non-standard, 'blob:', or 'filesystem:'. - // 2. The provided host is non-canonical. - // 3. The scheme is 'file' and the port is non-zero. - // 4. The scheme is not 'file', and the port is zero or the host is empty. - bool isUnsupportedScheme = - !url::IsStandard(scheme.data(), - url::Component(0, static_cast(scheme.length()))) || - scheme == kFileSystemScheme || scheme == kBlobScheme; - bool isNoncanonicalHost = host != canon_host; - bool isFileSchemeWithPort = scheme == kFileScheme && port != 0; - bool isNonFileSchemeWithoutPortOrHost = - scheme != kFileScheme && (port == 0 || host.empty()); - if (isUnsupportedScheme || isNoncanonicalHost || isFileSchemeWithPort || - isNonFileSchemeWithoutPortOrHost) { - scheme_.clear(); - host_.clear(); - port_ = 0; - } + return host == canon_host; } -SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) { - if (!url.is_valid() || !url.IsStandard()) - return; +bool IsValidInput(const base::StringPiece& scheme, + const base::StringPiece& host, + uint16 port) { + SchemeType scheme_type = SCHEME_WITH_PORT; + bool is_standard = GetStandardSchemeType( + scheme.data(), + Component(0, base::checked_cast(scheme.length())), + &scheme_type); + if (!is_standard) + return false; // These schemes do not follow the generic URL syntax, so we treat them as // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might // have a (scheme, host, port) tuple, they themselves do not). - if (url.SchemeIsBlob() || url.SchemeIsFileSystem()) + if (scheme == kFileSystemScheme || scheme == kBlobScheme) + return false; + + switch (scheme_type) { + case SCHEME_WITH_PORT: + // A URL with |scheme| is required to have the host and port (may be + // omitted in a serialization if it's the same as the default value). + // Return an invalid instance if either of them is not given. + if (host.empty() || port == 0) + return false; + + if (!IsCanonicalHost(host)) + return false; + + return true; + + case SCHEME_WITHOUT_PORT: + if (port != 0) { + // Return an invalid object if a URL with the scheme never represents + // the port data but the given |port| is non-zero. + return false; + } + + if (!IsCanonicalHost(host)) + return false; + + return true; + + case SCHEME_WITHOUT_AUTHORITY: + return false; + + default: + NOTREACHED(); + return false; + } +} + +} // namespace + +SchemeHostPort::SchemeHostPort() : port_(0) { +} + +SchemeHostPort::SchemeHostPort(base::StringPiece scheme, + base::StringPiece host, + uint16 port) + : port_(0) { + if (!IsValidInput(scheme, host, port)) return; - scheme_ = url.scheme(); - host_ = url.host(); - port_ = url.EffectiveIntPort() == url::PORT_UNSPECIFIED - ? 0 - : url.EffectiveIntPort(); + scheme.CopyToString(&scheme_); + host.CopyToString(&host_); + port_ = port; +} + +SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) { + if (!url.is_valid()) + return; + + const std::string& scheme = url.scheme(); + const std::string& host = url.host(); + + // A valid GURL never returns PORT_INVALID. + int port = url.EffectiveIntPort(); + if (port == PORT_UNSPECIFIED) + port = 0; + + if (!IsValidInput(scheme, host, port)) + return; + + scheme_ = scheme; + host_ = host; + port_ = port; } SchemeHostPort::~SchemeHostPort() { @@ -95,15 +143,20 @@ std::string SchemeHostPort::Serialize() const { if (IsInvalid()) return result; - bool is_default_port = - port_ == url::DefaultPortForScheme(scheme_.data(), - static_cast(scheme_.length())); - result.append(scheme_); result.append(kStandardSchemeSeparator); result.append(host_); - if (scheme_ != kFileScheme && !is_default_port) { + if (port_ == 0) + return result; + + // Omit the port component if the port matches with the default port + // defined for the scheme, if any. + int default_port = DefaultPortForScheme(scheme_.data(), + static_cast(scheme_.length())); + if (default_port == PORT_UNSPECIFIED) + return result; + if (port_ != default_port) { result.push_back(':'); result.append(base::IntToString(port_)); } diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h index 2cc9e07..6e35a25 100644 --- a/url/scheme_host_port.h +++ b/url/scheme_host_port.h @@ -36,20 +36,19 @@ namespace url { // schemes such as "blob", "filesystem", "data", and "javascript" can only be // represented as invalid SchemeHostPort objects. // -// * The "file" scheme follows the standard syntax, but it is important to note -// that the authority portion (host, port) is optional. URLs without an -// authority portion will be represented with an empty string for the host, -// and a port of 0 (e.g. "file:///etc/hosts" => ("file", "", 0)), and URLs -// with a host-only authority portion will be represented with a port of 0 -// (e.g. "file://example.com/etc/hosts" => ("file", "example.com", 0)). See -// Section 3 of RFC 3986 to better understand these constructs. +// * For example, the "file" scheme follows the standard syntax, but it is +// important to note that the authority portion (host, port) is optional. +// URLs without an authority portion will be represented with an empty string +// for the host, and a port of 0 (e.g. "file:///etc/hosts" => +// ("file", "", 0)), and URLs with a host-only authority portion will be +// represented with a port of 0 (e.g. "file://example.com/etc/hosts" => +// ("file", "example.com", 0)). See Section 3 of RFC 3986 to better understand +// these constructs. // // * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in // particular, it has no notion of a "unique" Origin. If you need to take // uniqueness into account (and, if you're making security-relevant decisions -// then you absolutely do), please use 'url::Origin' instead[1]. -// -// [1]: // TODO(mkwst): Land 'url::Origin'. :) +// then you absolutely do), please use 'url::Origin' instead. // // Usage: // diff --git a/url/url_util.cc b/url/url_util.cc index 279ab7e..21bf3cc 100644 --- a/url/url_util.cc +++ b/url/url_util.cc @@ -19,21 +19,24 @@ namespace url { namespace { const int kNumStandardURLSchemes = 8; -const char* kStandardURLSchemes[kNumStandardURLSchemes] = { - kHttpScheme, - kHttpsScheme, - kFileScheme, // Yes, file URLs can have a hostname! - kFtpScheme, - kGopherScheme, - kWsScheme, // WebSocket. - kWssScheme, // WebSocket secure. - kFileSystemScheme, +const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { + {kHttpScheme, SCHEME_WITH_PORT}, + {kHttpsScheme, SCHEME_WITH_PORT}, + // Yes, file URLs can have a hostname, so file URLs should be handled as + // "standard". File URLs never have a port as specified by the SchemeType + // field. + {kFileScheme, SCHEME_WITHOUT_PORT}, + {kFtpScheme, SCHEME_WITH_PORT}, + {kGopherScheme, SCHEME_WITH_PORT}, + {kWsScheme, SCHEME_WITH_PORT}, // WebSocket. + {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure. + {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY}, }; // List of the currently installed standard schemes. This list is lazily // initialized by InitStandardSchemes and is leaked on shutdown to prevent // any destructors from being called that will slow us down or cause problems. -std::vector* standard_schemes = NULL; +std::vector* standard_schemes = NULL; // See the LockStandardSchemes declaration in the header. bool standard_schemes_locked = false; @@ -54,7 +57,7 @@ template<> struct CharToStringPiece { void InitStandardSchemes() { if (standard_schemes) return; - standard_schemes = new std::vector; + standard_schemes = new std::vector; for (int i = 0; i < kNumStandardURLSchemes; i++) standard_schemes->push_back(kStandardURLSchemes[i]); } @@ -73,10 +76,13 @@ inline bool DoCompareSchemeComponent(const CHAR* spec, compare_to); } -// Returns true if the given scheme identified by |scheme| within |spec| is one -// of the registered "standard" schemes. +// Returns true and sets |type| to the SchemeType of the given scheme +// identified by |scheme| within |spec| if the scheme is one of the registered +// "standard" schemes. template -bool DoIsStandard(const CHAR* spec, const Component& scheme) { +bool DoIsStandard(const CHAR* spec, + const Component& scheme, + SchemeType* type) { if (!scheme.is_nonempty()) return false; // Empty or invalid schemes are non-standard. @@ -85,8 +91,10 @@ bool DoIsStandard(const CHAR* spec, const Component& scheme) { if (base::LowerCaseEqualsASCII( typename CharToStringPiece::Piece( &spec[scheme.begin], scheme.len), - standard_schemes->at(i))) + standard_schemes->at(i).scheme)) { + *type = standard_schemes->at(i).type; return true; + } } return false; } @@ -156,6 +164,7 @@ bool DoCanonicalize(const CHAR* in_spec, // This is the parsed version of the input URL, we have to canonicalize it // before storing it in our object. bool success; + SchemeType unused_scheme_type = SCHEME_WITH_PORT; if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { // File URLs are special. ParseFileURL(spec, spec_len, &parsed_input); @@ -168,7 +177,7 @@ bool DoCanonicalize(const CHAR* in_spec, charset_converter, output, output_parsed); - } else if (DoIsStandard(spec, scheme)) { + } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) { // All "normal" URLs. ParseStandardURL(spec, spec_len, &parsed_input); success = CanonicalizeStandardURL(spec, spec_len, parsed_input, @@ -217,9 +226,10 @@ bool DoResolveRelative(const char* base_spec, base_is_hierarchical = num_slashes > 0; } + SchemeType unused_scheme_type = SCHEME_WITH_PORT; bool standard_base_scheme = base_parsed.scheme.is_nonempty() && - DoIsStandard(base_spec, base_parsed.scheme); + DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type); bool is_relative; Component relative_component; @@ -340,7 +350,8 @@ bool DoReplaceComponents(const char* spec, return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, output, out_parsed); } - if (DoIsStandard(spec, parsed.scheme)) { + SchemeType unused_scheme_type = SCHEME_WITH_PORT; + if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) { return ReplaceStandardURL(spec, parsed, replacements, charset_converter, output, out_parsed); } @@ -365,7 +376,8 @@ void Shutdown() { } } -void AddStandardScheme(const char* new_scheme) { +void AddStandardScheme(const char* new_scheme, + SchemeType type) { // If this assert triggers, it means you've called AddStandardScheme after // LockStandardSchemes have been called (see the header file for // LockStandardSchemes for more). @@ -388,7 +400,10 @@ void AddStandardScheme(const char* new_scheme) { memcpy(dup_scheme, new_scheme, scheme_len + 1); InitStandardSchemes(); - standard_schemes->push_back(dup_scheme); + SchemeWithType scheme_with_type; + scheme_with_type.scheme = dup_scheme; + scheme_with_type.type = type; + standard_schemes->push_back(scheme_with_type); } void LockStandardSchemes() { @@ -396,11 +411,19 @@ void LockStandardSchemes() { } bool IsStandard(const char* spec, const Component& scheme) { - return DoIsStandard(spec, scheme); + SchemeType unused_scheme_type; + return DoIsStandard(spec, scheme, &unused_scheme_type); +} + +bool GetStandardSchemeType(const char* spec, + const Component& scheme, + SchemeType* type) { + return DoIsStandard(spec, scheme, type); } bool IsStandard(const base::char16* spec, const Component& scheme) { - return DoIsStandard(spec, scheme); + SchemeType unused_scheme_type; + return DoIsStandard(spec, scheme, &unused_scheme_type); } bool FindAndCompareScheme(const char* str, diff --git a/url/url_util.h b/url/url_util.h index 5817044..36e7814 100644 --- a/url/url_util.h +++ b/url/url_util.h @@ -37,6 +37,25 @@ URL_EXPORT void Shutdown(); // Schemes -------------------------------------------------------------------- +// Types of a scheme representing the requirements on the data represented by +// the authority component of a URL with the scheme. +enum URL_EXPORT SchemeType { + // The authority component of a URL with the scheme, if any, has the port + // (the default values may be omitted in a serialization). + SCHEME_WITH_PORT, + // The authority component of a URL with the scheme, if any, doesn't have a + // port. + SCHEME_WITHOUT_PORT, + // A URL with the scheme doesn't have the authority component. + SCHEME_WITHOUT_AUTHORITY, +}; + +// A pair for representing a standard scheme name and the SchemeType for it. +struct URL_EXPORT SchemeWithType { + const char* scheme; + SchemeType type; +}; + // Adds an application-defined scheme to the internal list of "standard-format" // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). @@ -44,7 +63,8 @@ URL_EXPORT void Shutdown(); // This function is not threadsafe and can not be called concurrently with any // other url_util function. It will assert if the list of standard schemes has // been locked (see LockStandardSchemes). -URL_EXPORT void AddStandardScheme(const char* new_scheme); +URL_EXPORT void AddStandardScheme(const char* new_scheme, + SchemeType scheme_type); // Sets a flag to prevent future calls to AddStandardScheme from succeeding. // @@ -87,11 +107,18 @@ inline bool FindAndCompareScheme(const base::string16& str, compare, found_scheme); } -// Returns true if the given string represents a URL whose scheme is in the list -// of known standard-format schemes (see AddStandardScheme). +// Returns true if the given scheme identified by |scheme| within |spec| is in +// the list of known standard-format schemes (see AddStandardScheme). URL_EXPORT bool IsStandard(const char* spec, const Component& scheme); URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme); +// Returns true and sets |type| to the SchemeType of the given scheme +// identified by |scheme| within |spec| if the scheme is in the list of known +// standard-format schemes (see AddStandardScheme). +URL_EXPORT bool GetStandardSchemeType(const char* spec, + const Component& scheme, + SchemeType* type); + // URL library wrappers ------------------------------------------------------- // Parses the given spec according to the extracted scheme type. Normal users diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc index 9297765..b89bfa1 100644 --- a/url/url_util_unittest.cc +++ b/url/url_util_unittest.cc @@ -61,6 +61,38 @@ TEST(URLUtilTest, FindAndCompareScheme) { EXPECT_TRUE(found_scheme == Component(1, 11)); } +TEST(URLUtilTest, IsStandard) { + const char kHTTPScheme[] = "http"; + EXPECT_TRUE(IsStandard(kHTTPScheme, Component(0, strlen(kHTTPScheme)))); + + const char kFooScheme[] = "foo"; + EXPECT_FALSE(IsStandard(kFooScheme, Component(0, strlen(kFooScheme)))); +} + +TEST(URLUtilTest, GetStandardSchemeType) { + url::SchemeType scheme_type; + + const char kHTTPScheme[] = "http"; + scheme_type = url::SCHEME_WITHOUT_AUTHORITY; + EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme, + Component(0, strlen(kHTTPScheme)), + &scheme_type)); + EXPECT_EQ(url::SCHEME_WITH_PORT, scheme_type); + + const char kFilesystemScheme[] = "filesystem"; + scheme_type = url::SCHEME_WITH_PORT; + EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme, + Component(0, strlen(kFilesystemScheme)), + &scheme_type)); + EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type); + + const char kFooScheme[] = "foo"; + scheme_type = url::SCHEME_WITH_PORT; + EXPECT_FALSE(GetStandardSchemeType(kFooScheme, + Component(0, strlen(kFooScheme)), + &scheme_type)); +} + TEST(URLUtilTest, ReplaceComponents) { Parsed parsed; RawCanonOutputT output; @@ -220,7 +252,7 @@ TEST(URLUtilTest, TestEncodeURIComponent) { } TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) { - // This tests non-standard (in the sense that GIsStandard() == false) + // This tests non-standard (in the sense that IsStandard() == false) // hierarchical schemes. struct ResolveRelativeCase { const char* base; -- cgit v1.1