Preparing to support fragment resolution against non-hierarchical schemes

Adds a new GURL::GetContent() to retrieve the text after scheme: specifically intended for use in non-hierarchical schemes (data: etc). Doing the first part of https://codereview.chromium.org/23835019/ in a preparation CL. BUG=291747 Review URL: https://codereview.chromium.org/23549039 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@230781 0039d316-1c4b-4281-b951-d872f2087c98
author: kristianm@chromium.org <kristianm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-10-24 19:05:17 +0000
committer: kristianm@chromium.org <kristianm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-10-24 19:05:17 +0000
commit: 5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3 (patch)
tree: 91055362c58de7699dea1022efe8ca9732f7c01d /url
parent: 22abbd82c93a9c85f2364586919a6e3485586bcc (diff)
download: chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.zip
chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.tar.gz
chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.tar.bz2
4 files changed, 29 insertions, 4 deletions
diff --git a/url/gurl.cc b/url/gurl.cc
index 559b386..60850e6 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -422,6 +422,10 @@ std::string GURL::HostNoBrackets() const {
   return ComponentString(h);
 }
 
+std::string GURL::GetContent() const {
+  return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
+}
+
 bool GURL::HostIsIPAddress() const {
   if (!is_valid_ || spec_.empty())
      return false;
diff --git a/url/gurl.h b/url/gurl.h
index 254f64b..f750c3a 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -227,6 +227,11 @@ class URL_EXPORT GURL {
         (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
   }
 
+  // The "content" of the URL is everything after the scheme (skipping the
+  // scheme delimiting colon). It is an error to get the origin of an invalid
+  // URL. The result will be an empty string.
+  std::string GetContent() const;
+
   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
   // This currently identifies only IPv4 addresses (bug 822685).
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index 52c6196..fbc8a9b 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc
@@ -792,6 +792,15 @@ int Parsed::CountCharactersBefore(ComponentType type,
   return cur;
 }
 
+Component Parsed::GetContent() const {
+  const int begin = CountCharactersBefore(USERNAME, false);
+  const int len = Length() - begin;
+  // For compatability with the standard URL parser, we treat no content as
+  // -1, rather than having a length of 0 (we normally wouldn't care so
+  // much for these non-standard URLs).
+  return len ? Component(begin, len) : Component();
+}
+
 bool ExtractScheme(const char* url, int url_len, Component* scheme) {
   return DoExtractScheme(url, url_len, scheme);
 }
diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h
index fd974f8..5fa9322 100644
--- a/url/third_party/mozilla/url_parse.h
+++ b/url/third_party/mozilla/url_parse.h
@@ -159,10 +159,11 @@ struct URL_EXPORT Parsed {
   // Port number.
   Component port;
 
-  // Path, this is everything following the host name. Length will be -1 if
-  // unspecified. This includes the preceeding slash, so the path on
-  // http://www.google.com/asdf" is "/asdf". As a result, it is impossible to
-  // have a 0 length path, it will be -1 in cases like "http://host?foo".
+  // Path, this is everything following the host name, stopping at the query of
+  // ref delimiter (if any). Length will be -1 if unspecified. This includes
+  // the preceeding slash, so the path on http://www.google.com/asdf" is
+  // "/asdf". As a result, it is impossible to have a 0 length path, it will
+  // be -1 in cases like "http://host?foo".
   // Note that we treat backslashes the same as slashes.
   Component path;
 
@@ -177,6 +178,12 @@ struct URL_EXPORT Parsed {
   // nothing follows it.
   Component ref;
 
+  // The URL spec from the character after the scheme: until the end of the
+  // URL, regardless of the scheme. This is mostly useful for 'opaque' non-
+  // hierarchical schemes like data: and javascript: as a convient way to get
+  // the string with the scheme stripped off.
+  Component GetContent() const;
+
   // This is used for nested URL types, currently only filesystem.  If you
   // parse a filesystem URL, the resulting Parsed will have a nested
   // inner_parsed_ to hold the parsed inner URL's component information.
author	kristianm@chromium.org <kristianm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-10-24 19:05:17 +0000
committer	kristianm@chromium.org <kristianm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-10-24 19:05:17 +0000
commit	5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3 (patch)
tree	91055362c58de7699dea1022efe8ca9732f7c01d /url
parent	22abbd82c93a9c85f2364586919a6e3485586bcc (diff)
download	chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.zip chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.tar.gz chromium_src-5f50c5dbbad30f25ee6f983f555237c3f4e0a1a3.tar.bz2