Make relative file url parsing fail where there is a host:port in the relative URL.

BUG=285720 R=brettw@chromium.org, jar@chromium.org Committed: https://src.chromium.org/viewvc/chrome?view=rev&revision=223928 Review URL: https://codereview.chromium.org/23464046 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@223969 0039d316-1c4b-4281-b951-d872f2087c98
author: tsepez@chromium.org <tsepez@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-18 22:55:31 +0000
committer: tsepez@chromium.org <tsepez@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-18 22:55:31 +0000
commit: 598c8382c9e0556f16a486d758c61d108b4732e0 (patch)
tree: c3133ca8061e4eee70d10060d84fb1f04bd6621c
parent: b887a2620262a8b51be8973cbd366e03e5ddaed9 (diff)
download: chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.zip
chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.tar.gz
chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.tar.bz2
5 files changed, 105 insertions, 27 deletions
diff --git a/chrome/renderer/net/net_error_helper.cc b/chrome/renderer/net/net_error_helper.cc
index 73e63b7..364ff93 100644
--- a/chrome/renderer/net/net_error_helper.cc
+++ b/chrome/renderer/net/net_error_helper.cc
@@ -40,6 +40,8 @@ namespace {
 
 bool IsLoadingErrorPage(WebKit::WebFrame* frame) {
   GURL url = frame->provisionalDataSource()->request().url();
+  if (!url.is_valid())
+    return false;
   return url.spec() == kUnreachableWebDataURL;
 }
 
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index 84317f8..4edd6ced 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -475,11 +475,14 @@ bool DoResolveRelativeURL(const char* base_url,
 #else
   // Other platforms need explicit handling for file: URLs with multiple
   // slashes because the generic scheme parsing always extracts a host, but a
-  // file: URL only has a host if it has exactly 2 slashes. This also
-  // handles the special case where the URL is only slashes, since that
-  // doesn't have a host part either.
+  // file: URL only has a host if it has exactly 2 slashes. Even if it does
+  // have a host, we want to use the special host detection logic for file
+  // URLs provided by DoResolveAbsoluteFile(), as opposed to the generic host
+  // detection logic, for consistency with parsing file URLs from scratch.
+  // This also handles the special case where the URL is only slashes,
+  // since that doesn't have a host part either.
   if (base_is_file &&
-      (num_slashes > 2 || num_slashes == relative_component.len)) {
+      (num_slashes >= 2 || num_slashes == relative_component.len)) {
     return DoResolveAbsoluteFile(relative_url, relative_component,
                                  query_converter, output, out_parsed);
   }
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index f6cd3a2..2e93f92 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -2001,6 +2001,11 @@ TEST(URLCanonTest, ResolveRelativeURL) {
       // is not file.
     {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
     {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+      // Ensure that ports aren't allowed for hosts relative to a file url.
+      // Although the result string shows a host:port portion, the call to
+      // resolve the relative URL returns false, indicating parse failure,
+      // which is what is required.
+    {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
       // Filesystem URL tests; filesystem URLs are only valid and relative if
       // they have no scheme, e.g. "./index.html".  There's no valid equivalent
       // to http:index.html.
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc
index a8907e7..a2f7ce3 100644
--- a/url/url_parse_file.cc
+++ b/url/url_parse_file.cc
@@ -110,8 +110,9 @@ void DoParseLocalFile(const CHAR* spec,
 }
 
 // Backend for the external functions that operates on either char type.
-// We are handed the character after the "file:" at the beginning of the spec.
-// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
+// Handles cases where there is a scheme, but also when handed the first
+// character following the "file:" at the beginning of the spec. If so,
+// this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
 template<typename CHAR>
 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   DCHECK(spec_len >= 0);
@@ -130,8 +131,8 @@ void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   int begin = 0;
   TrimURL(spec, &begin, &spec_len);
 
-  // Find the scheme.
-  int num_slashes;
+  // Find the scheme, if any.
+  int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
   int after_scheme;
   int after_slashes;
 #ifdef WIN32
@@ -140,7 +141,6 @@ void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   // links like "c:/foo/bar" or "//foo/bar". This is also called by the
   // relative URL resolver when it determines there is an absolute URL, which
   // may give us input like "/c:/foo".
-  num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
   after_slashes = begin + num_slashes;
   if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
     // Windows path, don't try to extract the scheme (for example, "c:\foo").
@@ -153,7 +153,12 @@ void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   } else
 #endif
   {
-    if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+    // ExtractScheme doesn't understand the possibility of filenames with
+    // colons in them, in which case it returns the entire spec up to the
+    // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
+    // the foo.c: scheme.
+    if (!num_slashes &&
+        ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
       // Offset the results since we gave ExtractScheme a substring.
       parsed->scheme.begin += begin;
       after_scheme = parsed->scheme.end() + 1;
@@ -173,7 +178,6 @@ void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   }
 
   num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
-
   after_slashes = after_scheme + num_slashes;
 #ifdef WIN32
   // Check whether the input is a drive again. We checked above for windows
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index a65ff85..ad47616 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc
@@ -371,12 +371,9 @@ TEST(URLParser, PathURL) {
   }
 }
 
-#ifdef WIN32
-
-// WindowsFile ----------------------------------------------------------------
-
-// Various incarnations of file URLs. These are for Windows only.
+// Various incarnations of file URLs.
 static URLParseCase file_cases[] = {
+#ifdef WIN32
 {"file:server",              "file", NULL, NULL, "server", -1, NULL,          NULL, NULL},
 {"  file: server  \t",       "file", NULL, NULL, " server",-1, NULL,          NULL, NULL},
 {"FiLe:c|",                  "FiLe", NULL, NULL, NULL,     -1, "c|",          NULL, NULL},
@@ -404,29 +401,96 @@ static URLParseCase file_cases[] = {
   // Queries and refs are valid for file URLs as well.
 {"file:///C:/foo.html?#",   "file", NULL, NULL,  NULL,     -1, "/C:/foo.html",  "",   ""},
 {"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"},
+#else  // WIN32
+  // No slashes.
+  {"file:",                    "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file:path",                "file", NULL, NULL, NULL,      -1, "path",           NULL, NULL},
+  {"file:path/",               "file", NULL, NULL, NULL,      -1, "path/",          NULL, NULL},
+  {"file:path/f.txt",          "file", NULL, NULL, NULL,      -1, "path/f.txt",     NULL, NULL},
+  // One slash.
+  {"file:/",                   "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:/path",               "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:/path/",              "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:/path/f.txt",         "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Two slashes.
+  {"file://",                  "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file://server",            "file", NULL, NULL, "server",  -1, NULL,             NULL, NULL},
+  {"file://server/",           "file", NULL, NULL, "server",  -1, "/",              NULL, NULL},
+  {"file://server/f.txt",      "file", NULL, NULL, "server",  -1, "/f.txt",         NULL, NULL},
+  // Three slashes.
+  {"file:///",                 "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:///path",             "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:///path/",            "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:///path/f.txt",       "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // More than three slashes.
+  {"file:////",                "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:////path",            "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:////path/",           "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:////path/f.txt",      "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Schemeless URLs
+  {"path/f.txt",               NULL,   NULL, NULL, NULL,       -1, "path/f.txt",    NULL, NULL},
+  {"path:80/f.txt",            "path", NULL, NULL, NULL,       -1, "80/f.txt",      NULL, NULL},
+  {"path/f.txt:80",            "path/f.txt",NULL, NULL, NULL,  -1, "80",            NULL, NULL}, // Wrong.
+  {"/path/f.txt",              NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"/path:80/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"/path/f.txt:80",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"//server/f.txt",           NULL,   NULL, NULL, "server",   -1, "/f.txt",        NULL, NULL},
+  {"//server:80/f.txt",        NULL,   NULL, NULL, "server:80",-1, "/f.txt",        NULL, NULL},
+  {"//server/f.txt:80",        NULL,   NULL, NULL, "server",   -1, "/f.txt:80",     NULL, NULL},
+  {"///path/f.txt",            NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"///path:80/f.txt",         NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"///path/f.txt:80",         NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"////path/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"////path:80/f.txt",        NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"////path/f.txt:80",        NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  // Queries and refs are valid for file URLs as well.
+  {"file:///foo.html?#",       "file", NULL, NULL, NULL,       -1, "/foo.html",     "",   ""},
+  {"file:///foo.html?q=y#ref", "file", NULL, NULL, NULL,       -1, "/foo.html",    "q=y", "ref"},
+#endif  // WIN32
 };
 
-TEST(URLParser, WindowsFile) {
+TEST(URLParser, ParseFileURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
   // to reset something that is reset by the construtor.
   url_parse::Parsed parsed;
-  for (int i = 0; i < arraysize(file_cases); i++) {
+  for (size_t i = 0; i < arraysize(file_cases); i++) {
     const char* url = file_cases[i].input;
     url_parse::ParseFileURL(url, static_cast<int>(strlen(url)), &parsed);
     int port = url_parse::ParsePort(url, parsed.port);
 
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host));
-    EXPECT_EQ(file_cases[i].port, port);
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref));
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.scheme.begin << ", " << parsed.scheme.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.username.begin << ", " << parsed.username.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.password.begin << ", " << parsed.password.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.host.begin << ", " << parsed.host.len;
+
+    EXPECT_EQ(file_cases[i].port, port)
+        << " for case #" << i << " [ " << url << "] " << port;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.path.begin << ", " << parsed.path.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.query.begin << ", " << parsed.query.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref))
+        << " for case #" << i << " [ "<< url << "] "
+        << parsed.query.begin << ", " << parsed.scheme.len;
   }
 }
 
-#endif  // WIN32
 
 TEST(URLParser, ExtractFileName) {
   struct FileCase {
author	tsepez@chromium.org <tsepez@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-18 22:55:31 +0000
committer	tsepez@chromium.org <tsepez@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-18 22:55:31 +0000
commit	598c8382c9e0556f16a486d758c61d108b4732e0 (patch)
tree	c3133ca8061e4eee70d10060d84fb1f04bd6621c
parent	b887a2620262a8b51be8973cbd366e03e5ddaed9 (diff)
download	chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.zip chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.tar.gz chromium_src-598c8382c9e0556f16a486d758c61d108b4732e0.tar.bz2