Support URL fragment resolution against non-hierarchical schemes

Support URL fragment resolution against non-hierarchical schemes As a result, data: about: etc now have 'query' and 'ref' components parsed; as a result a new GURL::GetContent() convenience is added to retrieve the spec with the scheme stripped off. A complication in supporting this is that we now need to allow whitespace to trailing whitespace to be preserved when transferring url_parse::Parsed structs between KURL and GURL. Without this, the URL prior to the #fragment can change (i.e. whitespace stripped) when following an anchor link which breaks the page (causes reload from source). See http://crbug.com/291747 for more details on this. R=brettw@chromium.org TBR=cbentzel@chromium.org BUG=291747 Review URL: https://codereview.chromium.org/23835019 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@236917 0039d316-1c4b-4281-b951-d872f2087c98
author: joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-11-23 01:53:52 +0000
committer: joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-11-23 01:53:52 +0000
commit: 369e84f70d256d188a1866d8cef52edf4468cd9b (patch)
tree: e7e90408125f4831ce7983fd37414ad763b965b1 /url/third_party/mozilla
parent: a7e3691579181327dc65b02d043e7c01d4b06cb9 (diff)
download: chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.zip
chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.gz
chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.bz2
2 files changed, 39 insertions, 21 deletions
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index fbc8a9b..84a7558 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc
@@ -455,45 +455,53 @@ void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
 // Initializes a path URL which is merely a scheme followed by a path. Examples
 // include "about:foo" and "javascript:alert('bar');"
 template<typename CHAR>
-void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+void DoParsePathURL(const CHAR* spec, int spec_len,
+                    bool trim_path_end,
+                    Parsed* parsed) {
   // Get the non-path and non-scheme parts of the URL out of the way, we never
   // use them.
   parsed->username.reset();
   parsed->password.reset();
   parsed->host.reset();
   parsed->port.reset();
+  parsed->path.reset();
   parsed->query.reset();
   parsed->ref.reset();
 
   // Strip leading & trailing spaces and control characters.
-  int begin = 0;
-  TrimURL(spec, &begin, &spec_len);
+  int scheme_begin = 0;
+  TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
 
   // Handle empty specs or ones that contain only whitespace or control chars.
-  if (begin == spec_len) {
+  if (scheme_begin == spec_len) {
     parsed->scheme.reset();
     parsed->path.reset();
     return;
   }
 
+  int path_begin;
   // Extract the scheme, with the path being everything following. We also
   // handle the case where there is no scheme.
-  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+  if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
+                    &parsed->scheme)) {
     // Offset the results since we gave ExtractScheme a substring.
-    parsed->scheme.begin += begin;
-
-    // For compatability with the standard URL parser, we treat no path as
-    // -1, rather than having a length of 0 (we normally wouldn't care so
-    // much for these non-standard URLs).
-    if (parsed->scheme.end() == spec_len - 1)
-      parsed->path.reset();
-    else
-      parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);
+    parsed->scheme.begin += scheme_begin;
+    path_begin = parsed->scheme.end() + 1;
   } else {
-    // No scheme found, just path.
+    // No scheme case.
     parsed->scheme.reset();
-    parsed->path = MakeRange(begin, spec_len);
+    path_begin = scheme_begin;
   }
+
+  if (path_begin == spec_len)
+    return;
+  DCHECK_LT(path_begin, spec_len);
+
+  ParsePath(spec,
+            MakeRange(path_begin, spec_len),
+            &parsed->path,
+            &parsed->query,
+            &parsed->ref);
 }
 
 template<typename CHAR>
@@ -875,12 +883,18 @@ void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {
   DoParseStandardURL(url, url_len, parsed);
 }
 
-void ParsePathURL(const char* url, int url_len, Parsed* parsed) {
-  DoParsePathURL(url, url_len, parsed);
+void ParsePathURL(const char* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
 }
 
-void ParsePathURL(const base::char16* url, int url_len, Parsed* parsed) {
-  DoParsePathURL(url, url_len, parsed);
+void ParsePathURL(const base::char16* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
 }
 
 void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h
index 5fa9322..9d943e5 100644
--- a/url/third_party/mozilla/url_parse.h
+++ b/url/third_party/mozilla/url_parse.h
@@ -238,9 +238,13 @@ URL_EXPORT void ParseStandardURL(const base::char16* url,
 // section but that aren't file URLs either. The scheme is parsed, and
 // everything after the scheme is considered as the path. This is used for
 // things like "about:" and "javascript:"
-URL_EXPORT void ParsePathURL(const char* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParsePathURL(const char* url,
+                             int url_len,
+                             bool trim_path_end,
+                             Parsed* parsed);
 URL_EXPORT void ParsePathURL(const base::char16* url,
                              int url_len,
+                             bool trim_path_end,
                              Parsed* parsed);
 
 // FileURL is for file URLs. There are some special rules for interpreting
author	joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-11-23 01:53:52 +0000
committer	joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-11-23 01:53:52 +0000
commit	369e84f70d256d188a1866d8cef52edf4468cd9b (patch)
tree	e7e90408125f4831ce7983fd37414ad763b965b1 /url/third_party/mozilla
parent	a7e3691579181327dc65b02d043e7c01d4b06cb9 (diff)
download	chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.zip chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.gz chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.bz2