diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-31 23:57:16 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-31 23:57:16 +0000 |
commit | c31a979c877a9ac5f0b90727df6a4e00ce47b705 (patch) | |
tree | 83d78d30775f2c3a9da29d89988ea913c9fe6ef4 | |
parent | 1ef3399b0a1c8b170cf53ff47c678e263980bc0d (diff) | |
download | chromium_src-c31a979c877a9ac5f0b90727df6a4e00ce47b705.zip chromium_src-c31a979c877a9ac5f0b90727df6a4e00ce47b705.tar.gz chromium_src-c31a979c877a9ac5f0b90727df6a4e00ce47b705.tar.bz2 |
Escape search terms correctly in the path portion of a custom search engine.
This regressed with the fix for bug 111923. This attempts to correct the problem without breaking that bug, which requires being able to encode path components using arbitrary encodings.
This also eliminates a variety of functions as cleanup:
* BrowserAccessibilityWin::Escape(); this was not actually used.
* The 4-arg form of EscapeQueryParamValue(); only template_url.cc used this, and the necessary implementation can just move there.
* EscapeQueryParamValueUTF8(); the few remaining callers using that can just manually convert on the caller side.
BUG=127475
TEST=Create custom search engine with url "http://google.com/%s", try using your custom engine to search for "foo/bar", and make sure you wind up at "google.com/foo/bar" and not "google.com/foo%2Fbar"
Review URL: https://chromiumcodereview.appspot.com/10444117
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@139929 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/search_engines/template_url.cc | 109 | ||||
-rw-r--r-- | chrome/browser/search_engines/template_url_unittest.cc | 18 | ||||
-rw-r--r-- | content/browser/accessibility/browser_accessibility_win.cc | 5 | ||||
-rw-r--r-- | content/browser/accessibility/browser_accessibility_win.h | 3 | ||||
-rw-r--r-- | net/base/escape.cc | 16 | ||||
-rw-r--r-- | net/base/escape.h | 33 | ||||
-rw-r--r-- | net/base/escape_icu.cc | 29 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 23 | ||||
-rw-r--r-- | net/net.gyp | 1 |
9 files changed, 94 insertions, 143 deletions
diff --git a/chrome/browser/search_engines/template_url.cc b/chrome/browser/search_engines/template_url.cc index ada51cc..8879948 100644 --- a/chrome/browser/search_engines/template_url.cc +++ b/chrome/browser/search_engines/template_url.cc @@ -21,59 +21,89 @@ #include "net/base/escape.h" #include "ui/base/l10n/l10n_util.h" +namespace { + // The TemplateURLRef has any number of terms that need to be replaced. Each of // the terms is enclosed in braces. If the character preceeding the final // brace is a ?, it indicates the term is optional and can be replaced with // an empty string. -static const char kStartParameter = '{'; -static const char kEndParameter = '}'; -static const char kOptional = '?'; +const char kStartParameter = '{'; +const char kEndParameter = '}'; +const char kOptional = '?'; // Known parameters found in the URL. -static const char kSearchTermsParameter[] = "searchTerms"; -static const char kSearchTermsParameterFull[] = "{searchTerms}"; -static const char kCountParameter[] = "count"; -static const char kStartIndexParameter[] = "startIndex"; -static const char kStartPageParameter[] = "startPage"; -static const char kLanguageParameter[] = "language"; -static const char kInputEncodingParameter[] = "inputEncoding"; -static const char kOutputEncodingParameter[] = "outputEncoding"; - -static const char kGoogleAcceptedSuggestionParameter[] = - "google:acceptedSuggestion"; +const char kSearchTermsParameter[] = "searchTerms"; +const char kSearchTermsParameterFull[] = "{searchTerms}"; +const char kCountParameter[] = "count"; +const char kStartIndexParameter[] = "startIndex"; +const char kStartPageParameter[] = "startPage"; +const char kLanguageParameter[] = "language"; +const char kInputEncodingParameter[] = "inputEncoding"; +const char kOutputEncodingParameter[] = "outputEncoding"; + +const char kGoogleAcceptedSuggestionParameter[] = "google:acceptedSuggestion"; // Host/Domain Google searches are relative to. -static const char kGoogleBaseURLParameter[] = "google:baseURL"; -static const char kGoogleBaseURLParameterFull[] = "{google:baseURL}"; +const char kGoogleBaseURLParameter[] = "google:baseURL"; +const char kGoogleBaseURLParameterFull[] = "{google:baseURL}"; // Like google:baseURL, but for the Search Suggest capability. -static const char kGoogleBaseSuggestURLParameter[] = - "google:baseSuggestURL"; -static const char kGoogleBaseSuggestURLParameterFull[] = - "{google:baseSuggestURL}"; -static const char kGoogleInstantEnabledParameter[] = - "google:instantEnabledParameter"; -static const char kGoogleOriginalQueryForSuggestionParameter[] = +const char kGoogleBaseSuggestURLParameter[] = "google:baseSuggestURL"; +const char kGoogleBaseSuggestURLParameterFull[] = "{google:baseSuggestURL}"; +const char kGoogleInstantEnabledParameter[] = "google:instantEnabledParameter"; +const char kGoogleOriginalQueryForSuggestionParameter[] = "google:originalQueryForSuggestion"; -static const char kGoogleRLZParameter[] = "google:RLZ"; +const char kGoogleRLZParameter[] = "google:RLZ"; // Same as kSearchTermsParameter, with no escaping. -static const char kGoogleSearchFieldtrialParameter[] = +const char kGoogleSearchFieldtrialParameter[] = "google:searchFieldtrialParameter"; -static const char kGoogleUnescapedSearchTermsParameter[] = +const char kGoogleUnescapedSearchTermsParameter[] = "google:unescapedSearchTerms"; -static const char kGoogleUnescapedSearchTermsParameterFull[] = +const char kGoogleUnescapedSearchTermsParameterFull[] = "{google:unescapedSearchTerms}"; // Display value for kSearchTermsParameter. -static const char kDisplaySearchTerms[] = "%s"; +const char kDisplaySearchTerms[] = "%s"; // Display value for kGoogleUnescapedSearchTermsParameter. -static const char kDisplayUnescapedSearchTerms[] = "%S"; +const char kDisplayUnescapedSearchTerms[] = "%S"; // Used if the count parameter is not optional. Indicates we want 10 search // results. -static const char kDefaultCount[] = "10"; +const char kDefaultCount[] = "10"; // Used if the parameter kOutputEncodingParameter is required. -static const char kOutputEncodingType[] = "UTF-8"; +const char kOutputEncodingType[] = "UTF-8"; + +// Attempts to encode |terms| and |original_query| in |encoding| and escape +// them. |terms| may be escaped as path or query depending on |is_in_query|; +// |original_query| is always escaped as query. Returns whether the encoding +// process succeeded. +bool TryEncoding(const string16& terms, + const string16& original_query, + const char* encoding, + bool is_in_query, + string16* escaped_terms, + string16* escaped_original_query) { + DCHECK(escaped_terms); + DCHECK(escaped_original_query); + std::string encoded_terms; + if (!base::UTF16ToCodepage(terms, encoding, + base::OnStringConversionError::SKIP, &encoded_terms)) + return false; + *escaped_terms = UTF8ToUTF16(is_in_query ? + net::EscapeQueryParamValue(encoded_terms, true) : + net::EscapePath(encoded_terms)); + if (original_query.empty()) + return true; + std::string encoded_original_query; + if (!base::UTF16ToCodepage(original_query, encoding, + base::OnStringConversionError::SKIP, &encoded_original_query)) + return false; + *escaped_original_query = + UTF8ToUTF16(net::EscapeQueryParamValue(encoded_original_query, true)); + return true; +} + +} // namespace // TemplateURLRef ------------------------------------------------------------- @@ -152,23 +182,18 @@ std::string TemplateURLRef::ReplaceSearchTermsUsingTermsData( for (std::vector<std::string>::const_iterator i( owner_->input_encodings().begin()); i != owner_->input_encodings().end(); ++i) { - if (net::EscapeQueryParamValue(terms, i->c_str(), is_in_query, - &encoded_terms)) { - if (is_in_query && !original_query_for_suggestion.empty()) { - net::EscapeQueryParamValue(original_query_for_suggestion, i->c_str(), - true, &encoded_original_query); - } + if (TryEncoding(terms, original_query_for_suggestion, i->c_str(), + is_in_query, &encoded_terms, &encoded_original_query)) { input_encoding = *i; break; } } if (input_encoding.empty()) { - encoded_terms = net::EscapeQueryParamValueUTF8(terms, is_in_query); - if (is_in_query && !original_query_for_suggestion.empty()) { - encoded_original_query = - net::EscapeQueryParamValueUTF8(original_query_for_suggestion, true); - } input_encoding = "UTF-8"; + if (!TryEncoding(terms, original_query_for_suggestion, + input_encoding.c_str(), is_in_query, &encoded_terms, + &encoded_original_query)) + NOTREACHED(); } std::string url = parsed_url_; diff --git a/chrome/browser/search_engines/template_url_unittest.cc b/chrome/browser/search_engines/template_url_unittest.cc index 656e7b8..42aa172 100644 --- a/chrome/browser/search_engines/template_url_unittest.cc +++ b/chrome/browser/search_engines/template_url_unittest.cc @@ -76,16 +76,15 @@ TEST_F(TemplateURLTest, URLRefTestSearchTerms) { const char* url; const string16 terms; const std::string output; - bool valid_url; } search_term_cases[] = { { "http://foo{searchTerms}", ASCIIToUTF16("sea rch/bar"), - "http://foosea%20rch%2Fbar", false }, + "http://foosea%20rch/bar" }, { "http://foo{searchTerms}?boo=abc", ASCIIToUTF16("sea rch/bar"), - "http://foosea%20rch%2Fbar?boo=abc", false }, + "http://foosea%20rch/bar?boo=abc" }, { "http://foo/?boo={searchTerms}", ASCIIToUTF16("sea rch/bar"), - "http://foo/?boo=sea+rch%2Fbar", true }, + "http://foo/?boo=sea+rch%2Fbar" }, { "http://en.wikipedia.org/{searchTerms}", ASCIIToUTF16("wiki/?"), - "http://en.wikipedia.org/wiki%2F%3F", true } + "http://en.wikipedia.org/wiki/%3F" } }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(search_term_cases); ++i) { const SearchTermsCase& value = search_term_cases[i]; @@ -94,11 +93,10 @@ TEST_F(TemplateURLTest, URLRefTestSearchTerms) { TemplateURL url(NULL, data); EXPECT_TRUE(url.url_ref().IsValid()); ASSERT_TRUE(url.url_ref().SupportsReplacement()); - std::string result = url.url_ref().ReplaceSearchTerms(value.terms, - TemplateURLRef::NO_SUGGESTIONS_AVAILABLE, string16()); - EXPECT_EQ(value.output, result); - GURL result_url(result); - EXPECT_EQ(value.valid_url, result_url.is_valid()); + GURL result(url.url_ref().ReplaceSearchTerms(value.terms, + TemplateURLRef::NO_SUGGESTIONS_AVAILABLE, string16())); + ASSERT_TRUE(result.is_valid()); + EXPECT_EQ(value.output, result.spec()); } } diff --git a/content/browser/accessibility/browser_accessibility_win.cc b/content/browser/accessibility/browser_accessibility_win.cc index ac6ffea..e481cdd 100644 --- a/content/browser/accessibility/browser_accessibility_win.cc +++ b/content/browser/accessibility/browser_accessibility_win.cc @@ -15,7 +15,6 @@ #include "base/win/windows_version.h" #include "content/browser/accessibility/browser_accessibility_manager_win.h" #include "content/common/accessibility_messages.h" -#include "net/base/escape.h" #include "ui/base/accessibility/accessible_text_utils.h" #include "ui/base/win/accessibility_misc_utils.h" @@ -2897,10 +2896,6 @@ void BrowserAccessibilityWin::IntAttributeToIA2( base::IntToString16(value)); } -string16 BrowserAccessibilityWin::Escape(const string16& str) { - return net::EscapeQueryParamValueUTF8(str, false); -} - const string16& BrowserAccessibilityWin::TextForIAccessibleText() { if (IsEditableText()) { return value_; diff --git a/content/browser/accessibility/browser_accessibility_win.h b/content/browser/accessibility/browser_accessibility_win.h index 43817fd..114770d 100644 --- a/content/browser/accessibility/browser_accessibility_win.h +++ b/content/browser/accessibility/browser_accessibility_win.h @@ -793,9 +793,6 @@ BrowserAccessibilityWin void IntAttributeToIA2( WebAccessibility::IntAttribute attribute, const char* ia2_attr); - // Escape a string like it would be escaped for a URL or HTML form. - string16 Escape(const string16& str); - // Get the text of this node for the purposes of IAccessibleText - it may // be the name, it may be the value, etc. depending on the role. const string16& TextForIAccessibleText(); diff --git a/net/base/escape.cc b/net/base/escape.cc index f206e98..bee60c1 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -247,6 +247,10 @@ static const Charmap kExternalHandlerCharmap = {{ } // namespace +std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { + return Escape(text, kQueryCharmap, use_plus); +} + std::string EscapePath(const std::string& path) { return Escape(path, kPathCharmap, false); } @@ -354,16 +358,6 @@ string16 UnescapeForHTML(const string16& input) { return text; } -std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { - return Escape(text, kQueryCharmap, use_plus); -} - -// Convert the string to a sequence of bytes and then % escape anything -// except alphanumerics and !'()*-._~ -string16 EscapeQueryParamValueUTF8(const string16& text, bool use_plus) { - return UTF8ToUTF16(Escape(UTF16ToUTF8(text), kQueryCharmap, use_plus)); -} - namespace internal { AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments) diff --git a/net/base/escape.h b/net/base/escape.h index bd9dc39..45c90fd 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -17,8 +17,17 @@ namespace net { // Escaping -------------------------------------------------------------------- -// Escapes a file. This includes: +// Escapes characters in text suitable for use as a query parameter value. +// We %XX everything except alphanumerics and -_.!~*'() +// Spaces change to "+" unless you pass usePlus=false. +// This is basically the same as encodeURIComponent in javascript. +NET_EXPORT std::string EscapeQueryParamValue(const std::string& text, + bool use_plus); + +// Escapes a partial or complete file/pathname. This includes: // non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} +// For the string16 version, we attempt a conversion to |codepage| before +// encoding the string. If this conversion fails, we return false. NET_EXPORT std::string EscapePath(const std::string& path); // Escapes application/x-www-form-urlencoded content. This includes: @@ -124,26 +133,6 @@ NET_EXPORT string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( // < > & " ' NET_EXPORT string16 UnescapeForHTML(const string16& text); -// Deprecated ------------------------------------------------------------------ - -// Escapes characters in text suitable for use as a query parameter value. -// We %XX everything except alphanumerics and -_.!~*'() -// Spaces change to "+" unless you pass usePlus=false. -// This is basically the same as encodeURIComponent in javascript. -// For the string16 version, we do a conversion to charset before encoding the -// string. If the charset doesn't exist, we return false. -NET_EXPORT std::string EscapeQueryParamValue(const std::string& text, - bool use_plus); -NET_EXPORT bool EscapeQueryParamValue(const string16& text, - const char* codepage, - bool use_plus, - string16* escaped); - -// A specialized version of EscapeQueryParamValue for string16s that -// assumes the codepage is UTF8. This is provided as a convenience. -NET_EXPORT string16 EscapeQueryParamValueUTF8(const string16& text, - bool use_plus); - namespace internal { // Private Functions (Exposed for Unit Testing) -------------------------------- diff --git a/net/base/escape_icu.cc b/net/base/escape_icu.cc deleted file mode 100644 index 2962fbc..0000000 --- a/net/base/escape_icu.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "net/base/escape.h" - -#include "base/i18n/icu_string_conversions.h" -#include "base/utf_string_conversions.h" - -// This file exists to avoid having escape.cc depend on ICU. - -namespace net { - -bool EscapeQueryParamValue(const string16& text, - const char* codepage, - bool use_plus, - string16* escaped) { - // TODO(brettw) bug 1201094: this function should be removed, this "SKIP" - // behavior is wrong when the character can't be encoded properly. - std::string encoded; - if (!base::UTF16ToCodepage(text, codepage, - base::OnStringConversionError::SKIP, &encoded)) - return false; - - escaped->assign(UTF8ToUTF16(EscapeQueryParamValue(encoded, use_plus))); - return true; -} - -} // namespace net diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index a68c3ef..b34b38c 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -68,8 +68,7 @@ TEST(EscapeTest, EscapeTextForFormSubmission) { }; for (size_t i = 0; i < arraysize(escape_cases); ++i) { EscapeCase value = escape_cases[i]; - EXPECT_EQ(UTF8ToUTF16(value.output), - EscapeQueryParamValueUTF8(UTF8ToUTF16(value.input), true)); + EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true)); } const EscapeCase escape_cases_no_plus[] = { @@ -79,8 +78,7 @@ TEST(EscapeTest, EscapeTextForFormSubmission) { }; for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { EscapeCase value = escape_cases_no_plus[i]; - EXPECT_EQ(ASCIIToUTF16(value.output), - EscapeQueryParamValueUTF8(ASCIIToUTF16(value.input), false)); + EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false)); } // Test all the values in we're supposed to be escaping. @@ -107,21 +105,6 @@ TEST(EscapeTest, EscapeTextForFormSubmission) { EXPECT_EQ(out, in); } } - - // Check to see if EscapeQueryParamValueUTF8 is the same as - // EscapeQueryParamValue(..., kCodepageUTF8,) - string16 test_str; - test_str.reserve(5000); - for (int i = 1; i < 5000; ++i) { - test_str.push_back(i); - } - string16 utf16; - EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true, - &utf16)); - EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, true)); - EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false, - &utf16)); - EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, false)); } TEST(EscapeTest, EscapePath) { diff --git a/net/net.gyp b/net/net.gyp index 904fb4f..313e11d 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -113,7 +113,6 @@ 'base/dnssec_keyset.h', 'base/escape.cc', 'base/escape.h', - 'base/escape_icu.cc', 'base/ev_root_ca_metadata.cc', 'base/ev_root_ca_metadata.h', 'base/expiring_cache.h', |