From ae8e3675d7d5ed8e243c3106b90d614a782a9979 Mon Sep 17 00:00:00 2001 From: "grt@chromium.org" Date: Wed, 20 Mar 2013 09:00:08 +0000 Subject: Convert http; into http: (and other valid schemes). Also: * Use the unnamed namespace instead of static functions. * Make ordering of functions in .cc file match that in .h file. BUG=176106 Review URL: https://chromiumcodereview.appspot.com/12393047 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@189213 0039d316-1c4b-4281-b951-d872f2087c98 --- .../autocomplete/autocomplete_input_unittest.cc | 2 +- chrome/browser/net/url_fixer_upper.cc | 165 ++++++++++++--------- chrome/browser/net/url_fixer_upper_unittest.cc | 15 +- 3 files changed, 108 insertions(+), 74 deletions(-) diff --git a/chrome/browser/autocomplete/autocomplete_input_unittest.cc b/chrome/browser/autocomplete/autocomplete_input_unittest.cc index b9a54d5..11d3658 100644 --- a/chrome/browser/autocomplete/autocomplete_input_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_input_unittest.cc @@ -34,7 +34,7 @@ TEST(AutocompleteInputTest, InputType) { { ASCIIToUTF16("foo/bar/"), AutocompleteInput::URL }, { ASCIIToUTF16("foo/bar baz\\"), AutocompleteInput::URL }, { ASCIIToUTF16("foo.com/bar"), AutocompleteInput::URL }, - { ASCIIToUTF16("foo;bar"), AutocompleteInput::QUERY }, + { ASCIIToUTF16("foo;bar"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("foo/bar baz"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("foo bar.com"), AutocompleteInput::QUERY }, { ASCIIToUTF16("foo bar"), AutocompleteInput::QUERY }, diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc index f590702..1dadc5b 100644 --- a/chrome/browser/net/url_fixer_upper.cc +++ b/chrome/browser/net/url_fixer_upper.cc @@ -87,11 +87,9 @@ TrimPositions TrimWhitespaceUTF8(const std::string& input, return result; } -} // namespace - // does some basic fixes for input that we want to test for file-ness -static void PrepareStringForFileOps(const base::FilePath& text, - base::FilePath::StringType* output) { +void PrepareStringForFileOps(const base::FilePath& text, + base::FilePath::StringType* output) { #if defined(OS_WIN) TrimWhitespace(text.value(), TRIM_ALL, output); replace(output->begin(), output->end(), '/', '\\'); @@ -103,8 +101,8 @@ static void PrepareStringForFileOps(const base::FilePath& text, // Tries to create a full path from |text|. If the result is valid and the // file exists, returns true and sets |full_path| to the result. Otherwise, // returns false and leaves |full_path| unchanged. -static bool ValidPathForFile(const base::FilePath::StringType& text, - base::FilePath* full_path) { +bool ValidPathForFile(const base::FilePath::StringType& text, + base::FilePath* full_path) { base::FilePath file_path(text); if (!file_util::AbsolutePath(&file_path)) return false; @@ -119,7 +117,7 @@ static bool ValidPathForFile(const base::FilePath::StringType& text, #if defined(OS_POSIX) // Given a path that starts with ~, return a path that starts with an // expanded-out /user/foobar directory. -static std::string FixupHomedir(const std::string& text) { +std::string FixupHomedir(const std::string& text) { DCHECK(text.length() > 0 && text[0] == '~'); if (text.length() == 1 || text[1] == '/') { @@ -151,7 +149,7 @@ static std::string FixupHomedir(const std::string& text) { // (possibly invalid) file: URL in |fixed_up_url| for input beginning // with a drive specifier or "\\". Returns the unchanged input in other cases // (including file: URLs: these don't look like filenames). -static std::string FixupPath(const std::string& text) { +std::string FixupPath(const std::string& text) { DCHECK(!text.empty()); base::FilePath::StringType filename; @@ -183,8 +181,7 @@ static std::string FixupPath(const std::string& text) { // Checks |domain| to see if a valid TLD is already present. If not, appends // |desired_tld| to the domain, and prepends "www." unless it's already present. -static void AddDesiredTLD(const std::string& desired_tld, - std::string* domain) { +void AddDesiredTLD(const std::string& desired_tld, std::string* domain) { if (desired_tld.empty() || domain->empty()) return; @@ -218,9 +215,9 @@ static void AddDesiredTLD(const std::string& desired_tld, } } -static inline void FixupUsername(const std::string& text, - const url_parse::Component& part, - std::string* url) { +inline void FixupUsername(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid()) return; @@ -230,9 +227,9 @@ static inline void FixupUsername(const std::string& text, // password. FixupURL itself will append the '@' for us. } -static inline void FixupPassword(const std::string& text, - const url_parse::Component& part, - std::string* url) { +inline void FixupPassword(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid()) return; @@ -241,11 +238,11 @@ static inline void FixupPassword(const std::string& text, url->append(text, part.begin, part.len); } -static void FixupHost(const std::string& text, - const url_parse::Component& part, - bool has_scheme, - const std::string& desired_tld, - std::string* url) { +void FixupHost(const std::string& text, + const url_parse::Component& part, + bool has_scheme, + const std::string& desired_tld, + std::string* url) { if (!part.is_valid()) return; @@ -270,9 +267,9 @@ static void FixupHost(const std::string& text, url->append(domain); } -static void FixupPort(const std::string& text, - const url_parse::Component& part, - std::string* url) { +void FixupPort(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid()) return; @@ -281,9 +278,9 @@ static void FixupPort(const std::string& text, url->append(text, part.begin, part.len); } -static inline void FixupPath(const std::string& text, - const url_parse::Component& part, - std::string* url) { +inline void FixupPath(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid() || part.len == 0) { // We should always have a path. url->append("/"); @@ -294,9 +291,9 @@ static inline void FixupPath(const std::string& text, url->append(text, part.begin, part.len); } -static inline void FixupQuery(const std::string& text, - const url_parse::Component& part, - std::string* url) { +inline void FixupQuery(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid()) return; @@ -305,9 +302,9 @@ static inline void FixupQuery(const std::string& text, url->append(text, part.begin, part.len); } -static inline void FixupRef(const std::string& text, - const url_parse::Component& part, - std::string* url) { +inline void FixupRef(const std::string& text, + const url_parse::Component& part, + std::string* url) { if (!part.is_valid()) return; @@ -316,8 +313,8 @@ static inline void FixupRef(const std::string& text, url->append(text, part.begin, part.len); } -static bool HasPort(const std::string& original_text, - const url_parse::Component& scheme_component) { +bool HasPort(const std::string& original_text, + const url_parse::Component& scheme_component) { // Find the range between the ":" and the "/". size_t port_start = scheme_component.end() + 1; size_t port_end = port_start; @@ -340,13 +337,14 @@ static bool HasPort(const std::string& original_text, // If successful, set |scheme_component| to the text range where the scheme // was located, and fill |canon_scheme| with its canonicalized form. // Otherwise, return false and leave the outputs in an indeterminate state. -static bool GetValidScheme(const std::string &text, - url_parse::Component* scheme_component, - std::string* canon_scheme) { +bool GetValidScheme(const std::string &text, + url_parse::Component* scheme_component, + std::string* canon_scheme) { // Locate everything up to (but not including) the first ':' if (!url_parse::ExtractScheme(text.data(), static_cast(text.length()), - scheme_component)) + scheme_component)) { return false; + } // Make sure the scheme contains only valid characters, and convert // to lowercase. This also catches IPv6 literals like [::1], because @@ -377,13 +375,15 @@ static bool GetValidScheme(const std::string &text, return true; } -std::string URLFixerUpper::SegmentURL(const std::string& text, - url_parse::Parsed* parts) { +// Performs the work for URLFixerUpper::SegmentURL. |text| may be modified on +// output on success: a semicolon following a valid scheme is replaced with a +// colon. +std::string SegmentURLInternal(std::string* text, url_parse::Parsed* parts) { // Initialize the result. *parts = url_parse::Parsed(); std::string trimmed; - TrimWhitespaceUTF8(text, TRIM_ALL, &trimmed); + TrimWhitespaceUTF8(*text, TRIM_ALL, &trimmed); if (trimmed.empty()) return std::string(); // Nothing to segment. @@ -400,11 +400,24 @@ std::string URLFixerUpper::SegmentURL(const std::string& text, // Otherwise, we need to look at things carefully. std::string scheme; - if (!GetValidScheme(text, &parts->scheme, &scheme)) { - // Couldn't determine the scheme, so just pick one. - parts->scheme.reset(); - scheme.assign(StartsWithASCII(text, "ftp.", false) ? - chrome::kFtpScheme : chrome::kHttpScheme); + if (!GetValidScheme(*text, &parts->scheme, &scheme)) { + // Try again if there is a ';' in the text. If changing it to a ':' results + // in a scheme being found, continue processing with the modified text. + bool found_scheme = false; + size_t semicolon = text->find(';'); + if (semicolon != 0 && semicolon != std::string::npos) { + (*text)[semicolon] = ':'; + if (GetValidScheme(*text, &parts->scheme, &scheme)) + found_scheme = true; + else + (*text)[semicolon] = ';'; + } + if (!found_scheme) { + // Couldn't determine the scheme, so just pick one. + parts->scheme.reset(); + scheme.assign(StartsWithASCII(*text, "ftp.", false) ? + chrome::kFtpScheme : chrome::kHttpScheme); + } } // Proceed with about and chrome schemes, but not file or nonstandard schemes. @@ -415,30 +428,30 @@ std::string URLFixerUpper::SegmentURL(const std::string& text, if (scheme == chrome::kFileSystemScheme) { // Have the GURL parser do the heavy lifting for us. - url_parse::ParseFileSystemURL(text.data(), - static_cast(text.length()), parts); + url_parse::ParseFileSystemURL(text->data(), + static_cast(text->length()), parts); return scheme; } if (parts->scheme.is_valid()) { // Have the GURL parser do the heavy lifting for us. - url_parse::ParseStandardURL(text.data(), static_cast(text.length()), + url_parse::ParseStandardURL(text->data(), static_cast(text->length()), parts); return scheme; } // We need to add a scheme in order for ParseStandardURL to be happy. // Find the first non-whitespace character. - std::string::const_iterator first_nonwhite = text.begin(); - while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) + std::string::iterator first_nonwhite = text->begin(); + while ((first_nonwhite != text->end()) && IsWhitespace(*first_nonwhite)) ++first_nonwhite; // Construct the text to parse by inserting the scheme. std::string inserted_text(scheme); inserted_text.append(content::kStandardSchemeSeparator); - std::string text_to_parse(text.begin(), first_nonwhite); + std::string text_to_parse(text->begin(), first_nonwhite); text_to_parse.append(inserted_text); - text_to_parse.append(first_nonwhite, text.end()); + text_to_parse.append(first_nonwhite, text->end()); // Have the GURL parser do the heavy lifting for us. url_parse::ParseStandardURL(text_to_parse.data(), @@ -447,18 +460,35 @@ std::string URLFixerUpper::SegmentURL(const std::string& text, // Offset the results of the parse to match the original text. const int offset = -static_cast(inserted_text.length()); - OffsetComponent(offset, &parts->scheme); - OffsetComponent(offset, &parts->username); - OffsetComponent(offset, &parts->password); - OffsetComponent(offset, &parts->host); - OffsetComponent(offset, &parts->port); - OffsetComponent(offset, &parts->path); - OffsetComponent(offset, &parts->query); - OffsetComponent(offset, &parts->ref); + URLFixerUpper::OffsetComponent(offset, &parts->scheme); + URLFixerUpper::OffsetComponent(offset, &parts->username); + URLFixerUpper::OffsetComponent(offset, &parts->password); + URLFixerUpper::OffsetComponent(offset, &parts->host); + URLFixerUpper::OffsetComponent(offset, &parts->port); + URLFixerUpper::OffsetComponent(offset, &parts->path); + URLFixerUpper::OffsetComponent(offset, &parts->query); + URLFixerUpper::OffsetComponent(offset, &parts->ref); return scheme; } +} // namespace + +std::string URLFixerUpper::SegmentURL(const std::string& text, + url_parse::Parsed* parts) { + std::string mutable_text(text); + return SegmentURLInternal(&mutable_text, parts); +} + +string16 URLFixerUpper::SegmentURL(const string16& text, + url_parse::Parsed* parts) { + std::string text_utf8 = UTF16ToUTF8(text); + url_parse::Parsed parts_utf8; + std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); + UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); + return UTF8ToUTF16(scheme_utf8); +} + GURL URLFixerUpper::FixupURL(const std::string& text, const std::string& desired_tld) { std::string trimmed; @@ -468,7 +498,7 @@ GURL URLFixerUpper::FixupURL(const std::string& text, // Segment the URL. url_parse::Parsed parts; - std::string scheme(SegmentURL(trimmed, &parts)); + std::string scheme(SegmentURLInternal(&trimmed, &parts)); // For view-source: URLs, we strip "view-source:", do fixup, and stick it back // on. This allows us to handle things like "view-source:google.com". @@ -598,15 +628,6 @@ GURL URLFixerUpper::FixupRelativeFile(const base::FilePath& base_dir, return FixupURL(text_utf8, std::string()); } -string16 URLFixerUpper::SegmentURL(const string16& text, - url_parse::Parsed* parts) { - std::string text_utf8 = UTF16ToUTF8(text); - url_parse::Parsed parts_utf8; - std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); - UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); - return UTF8ToUTF16(scheme_utf8); -} - void URLFixerUpper::OffsetComponent(int offset, url_parse::Component* part) { DCHECK(part); diff --git a/chrome/browser/net/url_fixer_upper_unittest.cc b/chrome/browser/net/url_fixer_upper_unittest.cc index 8c6cfc8..cdfd77a 100644 --- a/chrome/browser/net/url_fixer_upper_unittest.cc +++ b/chrome/browser/net/url_fixer_upper_unittest.cc @@ -311,13 +311,26 @@ struct fixup_case { {"[::]:180/path", "", "http://[::]:180/path"}, // TODO(pmarks): Maybe we should parse bare IPv6 literals someday. {"::1", "", "::1"}, + // Semicolon as scheme separator for standard schemes. + {"http;//www.google.com/", "", "http://www.google.com/"}, + {"about;chrome", "", "chrome://chrome/"}, + // Semicolon left as-is for non-standard schemes. + {"whatsup;//fool", "", "whatsup://fool"}, + // Semicolon left as-is in URL itself. + {"http://host/port?query;moar", "", "http://host/port?query;moar"}, + // Fewer slashes than expected. + {"http;www.google.com/", "", "http://www.google.com/"}, + {"http;/www.google.com/", "", "http://www.google.com/"}, + // Semicolon at start. + {";http://www.google.com/", "", "http://%3Bhttp//www.google.com/"}, }; TEST(URLFixerUpperTest, FixupURL) { for (size_t i = 0; i < arraysize(fixup_cases); ++i) { fixup_case value = fixup_cases[i]; EXPECT_EQ(value.output, URLFixerUpper::FixupURL(value.input, - value.desired_tld).possibly_invalid_spec()); + value.desired_tld).possibly_invalid_spec()) + << "input: " << value.input; } // Check the TLD-appending functionality -- cgit v1.1