summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-02 02:37:40 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-02 02:37:40 +0000
commit79845effcd569cb61784e8a8c221f839e6e23525 (patch)
treed86e942097765499876b48a5d764592e2b9c376f /net
parent9902c201466fe4a701f43a124f4d22b6829c87e6 (diff)
downloadchromium_src-79845effcd569cb61784e8a8c221f839e6e23525.zip
chromium_src-79845effcd569cb61784e8a8c221f839e6e23525.tar.gz
chromium_src-79845effcd569cb61784e8a8c221f839e6e23525.tar.bz2
Strip the trailing slash from URLs like "http://google.com/". This especially helps when the scheme has also been stripped, as it makes the hostname look less unbalanced. We're careful to avoid stripping the slash when doing so would confuse the omnibox.
This also moves to more aggressive stripping and/or unescaping in several places. In general, it seems like we should be as aggressive as is feasible. BUG=43587 TEST=Visit google.com. There should be no slash in the address bar. Review URL: http://codereview.chromium.org/2389002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@48691 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/net_util.cc48
-rw-r--r--net/base/net_util.h18
-rw-r--r--net/base/net_util_unittest.cc24
3 files changed, 66 insertions, 24 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 99b21b8..66c2dba 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -774,11 +774,12 @@ std::wstring FormatViewSourceUrl(const GURL& url,
namespace net {
-const FormatUrlType kFormatUrlOmitNothing = 0;
-const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
-const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
+const FormatUrlType kFormatUrlOmitNothing = 0;
+const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
+const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
+const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword |
- kFormatUrlOmitHTTP;
+ kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname;
std::set<int> explicitly_allowed_ports;
@@ -1441,21 +1442,17 @@ std::wstring FormatUrl(const GURL& url,
true),
std::back_inserter(url_string));
- const wchar_t* const kHTTP = L"http://";
- const char* const kFTP = "ftp.";
- const size_t kHTTPSize = std::wstring(kHTTP).size();
- // The omnibox treats ftp.foo.com as ftp://foo.com. This means that if we
- // trimmed http off a string that starts with http://ftp and the user tried to
- // reload the page the user would end up with a scheme of ftp://. For example,
- // 'http://ftp.foo.com' -> 'ftp.foo.com' -> 'ftp://foo.com'. For this reason
- // don't strip http off url's whose scheme is http and the host starts with
- // 'ftp.'.
+ const wchar_t kHTTP[] = L"http://";
+ const char kFTP[] = "ftp.";
+ // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
+ // means that if we trim "http://" off a URL whose host starts with "ftp." and
+ // the user inputs this into any field subject to fixup (which is basically
+ // all input fields), the meaning would be changed. (In fact, often the
+ // formatted URL is directly pre-filled into an input field.) For this reason
+ // we avoid stripping "http://" in this case.
bool omit_http =
- ((format_types & kFormatUrlOmitHTTP) != 0 &&
- url_string == kHTTP && (!parsed.host.is_valid() ||
- (parsed.host.is_nonempty() &&
- spec.compare(parsed.host.begin,
- std::string(kFTP).size(), kFTP))));
+ (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) &&
+ (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
new_parsed->scheme = parsed.scheme;
@@ -1522,8 +1519,11 @@ std::wstring FormatUrl(const GURL& url,
}
// Path and query both get the same general unescape & convert treatment.
- AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
- &new_parsed->path, offset_for_adjustment);
+ if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
+ !CanStripTrailingSlash(url)) {
+ AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
+ &new_parsed->path, offset_for_adjustment);
+ }
if (parsed.query.is_valid())
url_string.push_back('?');
AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
@@ -1561,6 +1561,7 @@ std::wstring FormatUrl(const GURL& url,
// If we need to strip out http do it after the fact. This way we don't need
// to worry about how offset_for_adjustment is interpreted.
+ const size_t kHTTPSize = arraysize(kHTTP) - 1;
if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {
url_string = url_string.substr(kHTTPSize);
if (*offset_for_adjustment != std::wstring::npos) {
@@ -1582,6 +1583,13 @@ std::wstring FormatUrl(const GURL& url,
return url_string;
}
+bool CanStripTrailingSlash(const GURL& url) {
+ // Omit the path only for standard, non-file URLs with nothing but "/" after
+ // the hostname.
+ return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&
+ !url.has_ref() && url.path() == "/";
+}
+
GURL SimplifyUrlForRequest(const GURL& url) {
DCHECK(url.is_valid());
GURL::Replacements replacements;
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 5c3e37e..a66b45c 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -49,6 +49,10 @@ extern const FormatUrlType kFormatUrlOmitUsernamePassword;
// If the scheme is 'http://', it's removed.
extern const FormatUrlType kFormatUrlOmitHTTP;
+// Omits the path if it is just a slash and there is no query or ref. This is
+// meaningful for non-file "standard" URLs.
+extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
+
// Convenience for omitting all unecessary types.
extern const FormatUrlType kFormatUrlOmitAll;
@@ -293,13 +297,19 @@ std::wstring FormatUrl(const GURL& url,
size_t* prefix_end,
size_t* offset_for_adjustment);
-// This is a convenience for FormatUrl with
-// format_types=kFormatUrlOmitUsernamePassword and unescape=SPACES.
+// This is a convenience function for FormatUrl() with
+// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
+// set of flags for "URLs to display to the user". You should be cautious about
+// using this for URLs which will be parsed or sent to other applications.
inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
- return FormatUrl(url, languages, kFormatUrlOmitUsernamePassword,
- UnescapeRule::SPACES, NULL, NULL, NULL);
+ return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
+ NULL, NULL, NULL);
}
+// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
+// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
+bool CanStripTrailingSlash(const GURL& url);
+
// Strip the portions of |url| that aren't core to the network request.
// - user name / password
// - reference section
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 3e1661d..44cdd06 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1400,6 +1400,30 @@ TEST(NetUtilTest, FormatUrl) {
"http://ftp.google.com/", L"en", net::kFormatUrlOmitHTTP,
UnescapeRule::NORMAL, L"http://ftp.google.com/",
7},
+
+ // -------- omit trailing lash on bare hostname --------
+ {"omit slash when it's the entire path",
+ "http://www.google.com/", L"en",
+ net::kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com", 7},
+ {"omit slash when there's a ref",
+ "http://www.google.com/#ref", L"en",
+ net::kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/#ref", 7},
+ {"omit slash when there's a query",
+ "http://www.google.com/?", L"en",
+ net::kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/?", 7},
+ {"omit slash when it's not the entire path",
+ "http://www.google.com/foo", L"en",
+ net::kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/foo", 7},
+ {"omit slash for nonstandard URLs",
+ "data:/", L"en", net::kFormatUrlOmitTrailingSlashOnBareHostname,
+ UnescapeRule::NORMAL, L"data:/", 5},
+ {"omit slash for file URLs",
+ "file:///", L"en", net::kFormatUrlOmitTrailingSlashOnBareHostname,
+ UnescapeRule::NORMAL, L"file:///", 7},
};
for (size_t i = 0; i < arraysize(tests); ++i) {