summaryrefslogtreecommitdiffstats
path: root/components
diff options
context:
space:
mode:
authorvitbar <vitbar@yandex-team.ru>2015-04-15 03:54:41 -0700
committerCommit bot <commit-bot@chromium.org>2015-04-15 10:55:04 +0000
commit27804d10158be631d7519099cdd01480339db175 (patch)
treebe0477f9f885fe8c353e090a4ee619256daf8aa5 /components
parent262c0ca3f9c20df38e5c55d9db0db2f234cde831 (diff)
downloadchromium_src-27804d10158be631d7519099cdd01480339db175.zip
chromium_src-27804d10158be631d7519099cdd01480339db175.tar.gz
chromium_src-27804d10158be631d7519099cdd01480339db175.tar.bz2
Fixed ExtractSearchTermsFromURL for search engines with encoding != "UTF-8".
Some search engines specify non UTF-8 input encoding, e.g. the "mail.ru" engine specifies "windows-1251". After this patch the TemplateURL::ExtractSearchTermsFromURL function works correctly in such cases. R=pkasting@chromium.org Review URL: https://codereview.chromium.org/1088523002 Cr-Commit-Position: refs/heads/master@{#325220}
Diffstat (limited to 'components')
-rw-r--r--components/search_engines/template_url.cc20
-rw-r--r--components/search_engines/template_url_unittest.cc75
2 files changed, 85 insertions, 10 deletions
diff --git a/components/search_engines/template_url.cc b/components/search_engines/template_url.cc
index f95acb7..cb4c6db 100644
--- a/components/search_engines/template_url.cc
+++ b/components/search_engines/template_url.cc
@@ -459,10 +459,12 @@ base::string16 TemplateURLRef::SearchTermToString16(
const std::vector<std::string>& encodings = owner_->input_encodings();
base::string16 result;
- std::string unescaped = net::UnescapeURLComponent(
- term,
- net::UnescapeRule::REPLACE_PLUS_WITH_SPACE |
- net::UnescapeRule::URL_SPECIAL_CHARS);
+ net::UnescapeRule::Type unescape_rules =
+ net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS;
+ if (search_term_key_location_ != url::Parsed::PATH)
+ unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
+
+ std::string unescaped = net::UnescapeURLComponent(term, unescape_rules);
for (size_t i = 0; i < encodings.size(); ++i) {
if (base::CodepageToUTF16(unescaped, encodings[i].c_str(),
base::OnStringConversionError::FAIL, &result))
@@ -478,7 +480,8 @@ base::string16 TemplateURLRef::SearchTermToString16(
// encoding is. We need to substitute spaces for pluses ourselves since we're
// not sending it through an unescaper.
result = base::UTF8ToUTF16(term);
- std::replace(result.begin(), result.end(), '+', ' ');
+ if (unescape_rules & net::UnescapeRule::REPLACE_PLUS_WITH_SPACE)
+ std::replace(result.begin(), result.end(), '+', ' ');
return result;
}
@@ -524,8 +527,6 @@ bool TemplateURLRef::ExtractSearchTermsFromURL(
std::string source;
url::Component position;
- net::UnescapeRule::Type unescape_rules =
- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS;
if (search_term_key_location_ == url::Parsed::PATH) {
source = url.path();
@@ -560,12 +561,11 @@ bool TemplateURLRef::ExtractSearchTermsFromURL(
}
if (!key_found)
return false;
- unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
}
// Extract the search term.
- *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
- source.substr(position.begin, position.len), unescape_rules);
+ *search_terms = SearchTermToString16(
+ source.substr(position.begin, position.len));
if (search_terms_component)
*search_terms_component = search_term_key_location_;
if (search_terms_position)
diff --git a/components/search_engines/template_url_unittest.cc b/components/search_engines/template_url_unittest.cc
index 6d09bf8..861ce79 100644
--- a/components/search_engines/template_url_unittest.cc
+++ b/components/search_engines/template_url_unittest.cc
@@ -1142,6 +1142,81 @@ TEST_F(TemplateURLTest, ExtractSearchTermsFromURLPath) {
EXPECT_EQ(base::string16(), result);
}
+// Checks that the ExtractSearchTermsFromURL function works correctly
+// for urls containing non-latin characters in UTF8 encoding.
+TEST_F(TemplateURLTest, ExtractSearchTermsFromUTF8URL) {
+ TemplateURLData data;
+ data.SetURL("http://utf-8.ru/?q={searchTerms}");
+ data.alternate_urls.push_back("http://utf-8.ru/#q={searchTerms}");
+ data.alternate_urls.push_back("http://utf-8.ru/path/{searchTerms}");
+ TemplateURL url(data);
+ base::string16 result;
+
+ // Russian text encoded with UTF-8.
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://utf-8.ru/?q=\xD0\x97\xD0\xB4\xD1\x80\xD0\xB0\xD0\xB2\xD1\x81"
+ "\xD1\x82\xD0\xB2\xD1\x83\xD0\xB9,+\xD0\xBC\xD0\xB8\xD1\x80!"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(
+ L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, "
+ L"\x043C\x0438\x0440!"),
+ result);
+
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://utf-8.ru/#q=\xD0\xB4\xD0\xB2\xD0\xB0+\xD1\x81\xD0\xBB"
+ "\xD0\xBE\xD0\xB2\xD0\xB0"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"),
+ result);
+
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://utf-8.ru/path/\xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\x8B%20"
+ "\xD0\x90%20\xD0\xB8%20A"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"),
+ result);
+}
+
+// Checks that the ExtractSearchTermsFromURL function works correctly
+// for urls containing non-latin characters in non-UTF8 encoding.
+TEST_F(TemplateURLTest, ExtractSearchTermsFromNonUTF8URL) {
+ TemplateURLData data;
+ data.SetURL("http://windows-1251.ru/?q={searchTerms}");
+ data.alternate_urls.push_back("http://windows-1251.ru/#q={searchTerms}");
+ data.alternate_urls.push_back("http://windows-1251.ru/path/{searchTerms}");
+ data.input_encodings.push_back("windows-1251");
+ TemplateURL url(data);
+ base::string16 result;
+
+ // Russian text encoded with Windows-1251.
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://windows-1251.ru/?q=%C7%E4%F0%E0%E2%F1%F2%E2%F3%E9%2C+"
+ "%EC%E8%F0!"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(
+ L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, "
+ L"\x043C\x0438\x0440!"),
+ result);
+
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://windows-1251.ru/#q=%E4%E2%E0+%F1%EB%EE%E2%E0"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"),
+ result);
+
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL(
+ GURL("http://windows-1251.ru/path/%E1%F3%EA%E2%FB%20%C0%20%E8%20A"),
+ search_terms_data_, &result));
+ EXPECT_EQ(
+ base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"),
+ result);
+}
+
TEST_F(TemplateURLTest, HasSearchTermsReplacementKey) {
TemplateURLData data;
data.SetURL("http://google.com/?q={searchTerms}");