diff options
author | brettw <brettw@chromium.org> | 2015-07-13 19:24:50 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-14 02:25:27 +0000 |
commit | a2027fb609835a3f95557b39611e3d11b89fca03 (patch) | |
tree | 588e2007b546480bd3db40ab21be838386315f15 | |
parent | 5b3151cd707dee82b947ae096b12967e3c4a7c26 (diff) | |
download | chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.zip chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.gz chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.bz2 |
Remove CaseInsensitiveCompare from string_util.h
There were a number of callers in net using this for HTTP headers. I think
these callers actually just need ASCII case-insensitive comparisons so these
were changed.
The omnibox code used this functor. I added a new omnibox-specific one which
does not have the locale issues of the old string_util one, but which still
has the UTF-16 and combining accent issues (described in great detail in
the comment for this).
The Windows installer code can't depend on ICU so it calls the Win32 function
to do case-insensitive comparisons. This should match the system comparison
for registry keys better anyway.
I also changed a caller of StartsWith to use this version. I wrote this
StartsWith call using ToLower in a previous patch, but it turns out that the
lengths of case-mapped strings do change in practice, making the offset
computations of the suyrrounding code incorrect. This new version will be
like the old version (will miss some cases of case-insensitive equality) but
will handle 0x80-0xFF properly.
BUG=24917
Review URL: https://codereview.chromium.org/1230583014
Cr-Commit-Position: refs/heads/master@{#338624}
-rw-r--r-- | base/i18n/case_conversion.h | 4 | ||||
-rw-r--r-- | base/strings/string_util.cc | 18 | ||||
-rw-r--r-- | base/strings/string_util.h | 22 | ||||
-rw-r--r-- | chrome/installer/util/shell_util.cc | 8 | ||||
-rw-r--r-- | components/omnibox/browser/autocomplete_i18n.h | 41 | ||||
-rw-r--r-- | components/omnibox/browser/search_suggestion_parser.cc | 3 | ||||
-rw-r--r-- | components/omnibox/browser/shortcuts_provider.cc | 9 | ||||
-rw-r--r-- | extensions/browser/api/declarative_webrequest/webrequest_action.cc | 7 | ||||
-rw-r--r-- | net/http/http_response_headers.cc | 11 | ||||
-rw-r--r-- | net/url_request/url_request_unittest.cc | 9 | ||||
-rw-r--r-- | win8/test/ui_automation_client.cc | 5 |
11 files changed, 92 insertions, 45 deletions
diff --git a/base/i18n/case_conversion.h b/base/i18n/case_conversion.h index de1f432..0631a80 100644 --- a/base/i18n/case_conversion.h +++ b/base/i18n/case_conversion.h @@ -26,6 +26,10 @@ namespace i18n { // locale. Use this when comparing general Unicode strings that don't // necessarily belong in the user's current locale (like commands, protocol // names, other strings from the web) for case-insensitive equality. +// +// Note that case conversions will change the length of the string in some +// not-uncommon cases. Never assume that the output is the same length as +// the input. // Returns the lower case equivalent of string. Uses ICU's current locale. BASE_I18N_EXPORT string16 ToLower(StringPiece16 string); diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index fcd8ddf..ae5fb80 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc @@ -101,6 +101,18 @@ template<> struct NonASCIIMask<8, wchar_t> { }; #endif // WCHAR_T_IS_UTF32 +// DO NOT USE. http://crbug.com/24917 +// +// tolower() will given incorrect results for non-ASCII characters. Use the +// ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here +// for backwards-compat for StartsWith until such calls can be updated. +struct CaseInsensitiveCompareDeprecated { + public: + bool operator()(char16 x, char16 y) const { + return tolower(x) == tolower(y); + } +}; + } // namespace namespace base { @@ -611,7 +623,7 @@ bool StartsWith(const string16& str, if (search.size() > str.size()) return false; return std::equal(search.begin(), search.end(), str.begin(), - CaseInsensitiveCompare<char16>()); + CaseInsensitiveCompareDeprecated()); } return StartsWith(StringPiece16(str), StringPiece16(search), CompareCase::SENSITIVE); @@ -667,10 +679,10 @@ bool EndsWith(const string16& str, return false; return std::equal(search.begin(), search.end(), str.begin() + (str.size() - search.size()), - CaseInsensitiveCompare<char16>()); + CaseInsensitiveCompareDeprecated()); } return EndsWith(StringPiece16(str), StringPiece16(search), - CompareCase::SENSITIVE); + CompareCase::SENSITIVE); } char HexDigitToInt(wchar_t c) { diff --git a/base/strings/string_util.h b/base/strings/string_util.h index 5d26f1c5..e4abce2 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h @@ -103,20 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) { return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; } -// Function objects to aid in comparing/searching strings. - -// DO NOT USE. tolower() will given incorrect results for non-ASCII characters. -// Use the ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. -template<typename Char> struct CaseInsensitiveCompare { - public: - bool operator()(Char x, Char y) const { - // TODO(darin): Do we really want to do locale sensitive comparisons here? - // ANSWER(brettw): No. - // See http://crbug.com/24917 - return tolower(x) == tolower(y); - } -}; - +// Functor for case-insensitive ASCII comparisons for STL algorithms like +// std::search. +// +// Note that a full Unicode version of this functor is not possible to write +// because case mappings might change the number of characters, depend on +// context (combining accents), and require handling UTF-16. If you need +// proper Unicode support, use base::i18n::ToLower/FoldCase and then just +// use a normal operator== on the result. template<typename Char> struct CaseInsensitiveCompareASCII { public: bool operator()(Char x, Char y) const { diff --git a/chrome/installer/util/shell_util.cc b/chrome/installer/util/shell_util.cc index 87a0d58..655d2f8 100644 --- a/chrome/installer/util/shell_util.cc +++ b/chrome/installer/util/shell_util.cc @@ -746,8 +746,12 @@ class RegistryEntry { found = key.ReadValue(name_.c_str(), &read_value) == ERROR_SUCCESS; if (found) { correct_value = read_value.size() == value_.size() && - std::equal(value_.begin(), value_.end(), read_value.begin(), - base::CaseInsensitiveCompare<wchar_t>()); + ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE, + read_value.data(), + base::saturated_cast<int>(read_value.size()), + value_.data(), + base::saturated_cast<int>(value_.size())) + == CSTR_EQUAL; } } else { DWORD read_value; diff --git a/components/omnibox/browser/autocomplete_i18n.h b/components/omnibox/browser/autocomplete_i18n.h new file mode 100644 index 0000000..9733e17 --- /dev/null +++ b/components/omnibox/browser/autocomplete_i18n.h @@ -0,0 +1,41 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ +#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ + +#include "base/strings/string16.h" +#include "third_party/icu/source/common/unicode/uchar.h" + +// Functor for a simple 16-bit Unicode case-insensitive comparison. This is +// designed for the autocomplete system where we would rather get prefix lenths +// correct than handle all possible case sensitivity issues. +// +// Any time this is used the result will be incorrect in some cases that +// certain users will be able to discern. Ideally, this class would be deleted +// and we would do full Unicode case-sensitivity mappings using +// base::i18n::ToLower. However, ToLower can change the lenghts of strings, +// making computations of offsets or prefix lengths difficult. Getting all +// edge cases correct will require careful implementation and testing. In the +// mean time, we use this simpler approach. +// +// This comparator will not handle combining accents properly since it compares +// 16-bit values in isolation. If the two strings use the same sequence of +// combining accents (this is the normal case) in both strings, it will work. +// +// Additionally, this comparator does not decode UTF sequences which is why it +// is called "UCS2". UTF-16 surrogates will be compared literally (i.e. "case- +// sensitively"). +// +// There are also a few cases where the lower-case version of a character +// expands to more than one code point that will not be handled properly. Such +// characters will be compared case-sensitively. +struct SimpleCaseInsensitiveCompareUCS2 { + public: + bool operator()(base::char16 x, base::char16 y) const { + return u_tolower(x) == u_tolower(y); + } +}; + +#endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ diff --git a/components/omnibox/browser/search_suggestion_parser.cc b/components/omnibox/browser/search_suggestion_parser.cc index 9c411fc..11fbdbb 100644 --- a/components/omnibox/browser/search_suggestion_parser.cc +++ b/components/omnibox/browser/search_suggestion_parser.cc @@ -15,6 +15,7 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/values.h" +#include "components/omnibox/browser/autocomplete_i18n.h" #include "components/omnibox/browser/autocomplete_input.h" #include "components/omnibox/browser/url_prefix.h" #include "components/url_fixer/url_fixer.h" @@ -158,7 +159,7 @@ void SearchSuggestionParser::SuggestResult::ClassifyMatchContents( // Do a case-insensitive search for |lookup_text|. base::string16::const_iterator lookup_position = std::search( match_contents_.begin(), match_contents_.end(), lookup_text.begin(), - lookup_text.end(), base::CaseInsensitiveCompare<base::char16>()); + lookup_text.end(), SimpleCaseInsensitiveCompareUCS2()); if (!allow_bolding_all && (lookup_position == match_contents_.end())) { // Bail if the code below to update the bolding would bold the whole // string. Note that the string may already be entirely bolded; if diff --git a/components/omnibox/browser/shortcuts_provider.cc b/components/omnibox/browser/shortcuts_provider.cc index 962cc13..8d43be6 100644 --- a/components/omnibox/browser/shortcuts_provider.cc +++ b/components/omnibox/browser/shortcuts_provider.cc @@ -20,6 +20,7 @@ #include "base/time/time.h" #include "components/history/core/browser/history_service.h" #include "components/metrics/proto/omnibox_input_type.pb.h" +#include "components/omnibox/browser/autocomplete_i18n.h" #include "components/omnibox/browser/autocomplete_input.h" #include "components/omnibox/browser/autocomplete_match.h" #include "components/omnibox/browser/autocomplete_provider_client.h" @@ -218,9 +219,11 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch( // input of "foo.c" to autocomplete to "foo.com" for a fill_into_edit of // "http://foo.com". if (AutocompleteMatch::IsSearchType(match.type)) { - if (base::StartsWith(base::i18n::ToLower(match.fill_into_edit), - base::i18n::ToLower(input.text()), - base::CompareCase::SENSITIVE)) { + if (match.fill_into_edit.size() >= input.text().size() && + std::equal(match.fill_into_edit.begin(), + match.fill_into_edit.begin() + input.text().size(), + input.text().begin(), + SimpleCaseInsensitiveCompareUCS2())) { match.inline_autocompletion = match.fill_into_edit.substr(input.text().length()); match.allowed_to_be_default_match = diff --git a/extensions/browser/api/declarative_webrequest/webrequest_action.cc b/extensions/browser/api/declarative_webrequest/webrequest_action.cc index 1a777ce..7d028e7 100644 --- a/extensions/browser/api/declarative_webrequest/webrequest_action.cc +++ b/extensions/browser/api/declarative_webrequest/webrequest_action.cc @@ -970,13 +970,8 @@ WebRequestRemoveResponseHeaderAction::CreateDelta( void* iter = NULL; std::string current_value; while (headers->EnumerateHeader(&iter, name_, ¤t_value)) { - if (has_value_ && - (current_value.size() != value_.size() || - !std::equal(current_value.begin(), current_value.end(), - value_.begin(), - base::CaseInsensitiveCompare<char>()))) { + if (has_value_ && !base::EqualsCaseInsensitiveASCII(current_value, value_)) continue; - } result->deleted_response_headers.push_back(make_pair(name_, current_value)); } return result; diff --git a/net/http/http_response_headers.cc b/net/http/http_response_headers.cc index 7c0c5ad..84f1a22 100644 --- a/net/http/http_response_headers.cc +++ b/net/http/http_response_headers.cc @@ -603,9 +603,7 @@ bool HttpResponseHeaders::HasHeaderValue(const base::StringPiece& name, void* iter = NULL; std::string temp; while (EnumerateHeader(&iter, name, &temp)) { - if (value.size() == temp.size() && - std::equal(temp.begin(), temp.end(), value.begin(), - base::CaseInsensitiveCompare<char>())) + if (base::EqualsCaseInsensitiveASCII(value, temp)) return true; } return false; @@ -743,11 +741,8 @@ size_t HttpResponseHeaders::FindHeader(size_t from, for (size_t i = from; i < parsed_.size(); ++i) { if (parsed_[i].is_continuation()) continue; - const std::string::const_iterator& name_begin = parsed_[i].name_begin; - const std::string::const_iterator& name_end = parsed_[i].name_end; - if (static_cast<size_t>(name_end - name_begin) == search.size() && - std::equal(name_begin, name_end, search.begin(), - base::CaseInsensitiveCompare<char>())) + base::StringPiece name(parsed_[i].name_begin, parsed_[i].name_end); + if (base::EqualsCaseInsensitiveASCII(search, name)) return i; } diff --git a/net/url_request/url_request_unittest.cc b/net/url_request/url_request_unittest.cc index 2c0af0b..5582f5a 100644 --- a/net/url_request/url_request_unittest.cc +++ b/net/url_request/url_request_unittest.cc @@ -258,12 +258,9 @@ void TestLoadTimingNoHttpResponse( // Do a case-insensitive search through |haystack| for |needle|. bool ContainsString(const std::string& haystack, const char* needle) { - std::string::const_iterator it = - std::search(haystack.begin(), - haystack.end(), - needle, - needle + strlen(needle), - base::CaseInsensitiveCompare<char>()); + std::string::const_iterator it = std::search( + haystack.begin(), haystack.end(), needle, needle + strlen(needle), + base::CaseInsensitiveCompareASCII<char>()); return it != haystack.end(); } diff --git a/win8/test/ui_automation_client.cc b/win8/test/ui_automation_client.cc index 9aa4237..22c1af1 100644 --- a/win8/test/ui_automation_client.cc +++ b/win8/test/ui_automation_client.cc @@ -323,10 +323,11 @@ void UIAutomationClient::Context::HandleWindowOpen( base::string16 class_name(V_BSTR(var.ptr())); - // Window class names are atoms, which are case-insensitive. + // Window class names are atoms, which are case-insensitive. Assume that + // the window in question only needs ASCII case-insensitivity. if (class_name.size() == class_name_.size() && std::equal(class_name.begin(), class_name.end(), class_name_.begin(), - base::CaseInsensitiveCompare<wchar_t>())) { + base::CaseInsensitiveCompareASCII<wchar_t>())) { RemoveWindowObserver(); ProcessWindow(window); } |