diff options
author | vitbar <vitbar@yandex-team.ru> | 2015-04-15 03:54:41 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-04-15 10:55:04 +0000 |
commit | 27804d10158be631d7519099cdd01480339db175 (patch) | |
tree | be0477f9f885fe8c353e090a4ee619256daf8aa5 /components | |
parent | 262c0ca3f9c20df38e5c55d9db0db2f234cde831 (diff) | |
download | chromium_src-27804d10158be631d7519099cdd01480339db175.zip chromium_src-27804d10158be631d7519099cdd01480339db175.tar.gz chromium_src-27804d10158be631d7519099cdd01480339db175.tar.bz2 |
Fixed ExtractSearchTermsFromURL for search engines with encoding != "UTF-8".
Some search engines specify non UTF-8 input encoding, e.g. the "mail.ru" engine specifies "windows-1251". After this patch the TemplateURL::ExtractSearchTermsFromURL function works correctly in such cases.
R=pkasting@chromium.org
Review URL: https://codereview.chromium.org/1088523002
Cr-Commit-Position: refs/heads/master@{#325220}
Diffstat (limited to 'components')
-rw-r--r-- | components/search_engines/template_url.cc | 20 | ||||
-rw-r--r-- | components/search_engines/template_url_unittest.cc | 75 |
2 files changed, 85 insertions, 10 deletions
diff --git a/components/search_engines/template_url.cc b/components/search_engines/template_url.cc index f95acb7..cb4c6db 100644 --- a/components/search_engines/template_url.cc +++ b/components/search_engines/template_url.cc @@ -459,10 +459,12 @@ base::string16 TemplateURLRef::SearchTermToString16( const std::vector<std::string>& encodings = owner_->input_encodings(); base::string16 result; - std::string unescaped = net::UnescapeURLComponent( - term, - net::UnescapeRule::REPLACE_PLUS_WITH_SPACE | - net::UnescapeRule::URL_SPECIAL_CHARS); + net::UnescapeRule::Type unescape_rules = + net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS; + if (search_term_key_location_ != url::Parsed::PATH) + unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; + + std::string unescaped = net::UnescapeURLComponent(term, unescape_rules); for (size_t i = 0; i < encodings.size(); ++i) { if (base::CodepageToUTF16(unescaped, encodings[i].c_str(), base::OnStringConversionError::FAIL, &result)) @@ -478,7 +480,8 @@ base::string16 TemplateURLRef::SearchTermToString16( // encoding is. We need to substitute spaces for pluses ourselves since we're // not sending it through an unescaper. result = base::UTF8ToUTF16(term); - std::replace(result.begin(), result.end(), '+', ' '); + if (unescape_rules & net::UnescapeRule::REPLACE_PLUS_WITH_SPACE) + std::replace(result.begin(), result.end(), '+', ' '); return result; } @@ -524,8 +527,6 @@ bool TemplateURLRef::ExtractSearchTermsFromURL( std::string source; url::Component position; - net::UnescapeRule::Type unescape_rules = - net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS; if (search_term_key_location_ == url::Parsed::PATH) { source = url.path(); @@ -560,12 +561,11 @@ bool TemplateURLRef::ExtractSearchTermsFromURL( } if (!key_found) return false; - unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; } // Extract the search term. - *search_terms = net::UnescapeAndDecodeUTF8URLComponent( - source.substr(position.begin, position.len), unescape_rules); + *search_terms = SearchTermToString16( + source.substr(position.begin, position.len)); if (search_terms_component) *search_terms_component = search_term_key_location_; if (search_terms_position) diff --git a/components/search_engines/template_url_unittest.cc b/components/search_engines/template_url_unittest.cc index 6d09bf8..861ce79 100644 --- a/components/search_engines/template_url_unittest.cc +++ b/components/search_engines/template_url_unittest.cc @@ -1142,6 +1142,81 @@ TEST_F(TemplateURLTest, ExtractSearchTermsFromURLPath) { EXPECT_EQ(base::string16(), result); } +// Checks that the ExtractSearchTermsFromURL function works correctly +// for urls containing non-latin characters in UTF8 encoding. +TEST_F(TemplateURLTest, ExtractSearchTermsFromUTF8URL) { + TemplateURLData data; + data.SetURL("http://utf-8.ru/?q={searchTerms}"); + data.alternate_urls.push_back("http://utf-8.ru/#q={searchTerms}"); + data.alternate_urls.push_back("http://utf-8.ru/path/{searchTerms}"); + TemplateURL url(data); + base::string16 result; + + // Russian text encoded with UTF-8. + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://utf-8.ru/?q=\xD0\x97\xD0\xB4\xD1\x80\xD0\xB0\xD0\xB2\xD1\x81" + "\xD1\x82\xD0\xB2\xD1\x83\xD0\xB9,+\xD0\xBC\xD0\xB8\xD1\x80!"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16( + L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, " + L"\x043C\x0438\x0440!"), + result); + + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://utf-8.ru/#q=\xD0\xB4\xD0\xB2\xD0\xB0+\xD1\x81\xD0\xBB" + "\xD0\xBE\xD0\xB2\xD0\xB0"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"), + result); + + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://utf-8.ru/path/\xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\x8B%20" + "\xD0\x90%20\xD0\xB8%20A"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"), + result); +} + +// Checks that the ExtractSearchTermsFromURL function works correctly +// for urls containing non-latin characters in non-UTF8 encoding. +TEST_F(TemplateURLTest, ExtractSearchTermsFromNonUTF8URL) { + TemplateURLData data; + data.SetURL("http://windows-1251.ru/?q={searchTerms}"); + data.alternate_urls.push_back("http://windows-1251.ru/#q={searchTerms}"); + data.alternate_urls.push_back("http://windows-1251.ru/path/{searchTerms}"); + data.input_encodings.push_back("windows-1251"); + TemplateURL url(data); + base::string16 result; + + // Russian text encoded with Windows-1251. + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://windows-1251.ru/?q=%C7%E4%F0%E0%E2%F1%F2%E2%F3%E9%2C+" + "%EC%E8%F0!"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16( + L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, " + L"\x043C\x0438\x0440!"), + result); + + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://windows-1251.ru/#q=%E4%E2%E0+%F1%EB%EE%E2%E0"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"), + result); + + EXPECT_TRUE(url.ExtractSearchTermsFromURL( + GURL("http://windows-1251.ru/path/%E1%F3%EA%E2%FB%20%C0%20%E8%20A"), + search_terms_data_, &result)); + EXPECT_EQ( + base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"), + result); +} + TEST_F(TemplateURLTest, HasSearchTermsReplacementKey) { TemplateURLData data; data.SetURL("http://google.com/?q={searchTerms}"); |