summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbrettw <brettw@chromium.org>2015-07-13 19:24:50 -0700
committerCommit bot <commit-bot@chromium.org>2015-07-14 02:25:27 +0000
commita2027fb609835a3f95557b39611e3d11b89fca03 (patch)
tree588e2007b546480bd3db40ab21be838386315f15
parent5b3151cd707dee82b947ae096b12967e3c4a7c26 (diff)
downloadchromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.zip
chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.gz
chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.bz2
Remove CaseInsensitiveCompare from string_util.h
There were a number of callers in net using this for HTTP headers. I think these callers actually just need ASCII case-insensitive comparisons so these were changed. The omnibox code used this functor. I added a new omnibox-specific one which does not have the locale issues of the old string_util one, but which still has the UTF-16 and combining accent issues (described in great detail in the comment for this). The Windows installer code can't depend on ICU so it calls the Win32 function to do case-insensitive comparisons. This should match the system comparison for registry keys better anyway. I also changed a caller of StartsWith to use this version. I wrote this StartsWith call using ToLower in a previous patch, but it turns out that the lengths of case-mapped strings do change in practice, making the offset computations of the suyrrounding code incorrect. This new version will be like the old version (will miss some cases of case-insensitive equality) but will handle 0x80-0xFF properly. BUG=24917 Review URL: https://codereview.chromium.org/1230583014 Cr-Commit-Position: refs/heads/master@{#338624}
-rw-r--r--base/i18n/case_conversion.h4
-rw-r--r--base/strings/string_util.cc18
-rw-r--r--base/strings/string_util.h22
-rw-r--r--chrome/installer/util/shell_util.cc8
-rw-r--r--components/omnibox/browser/autocomplete_i18n.h41
-rw-r--r--components/omnibox/browser/search_suggestion_parser.cc3
-rw-r--r--components/omnibox/browser/shortcuts_provider.cc9
-rw-r--r--extensions/browser/api/declarative_webrequest/webrequest_action.cc7
-rw-r--r--net/http/http_response_headers.cc11
-rw-r--r--net/url_request/url_request_unittest.cc9
-rw-r--r--win8/test/ui_automation_client.cc5
11 files changed, 92 insertions, 45 deletions
diff --git a/base/i18n/case_conversion.h b/base/i18n/case_conversion.h
index de1f432..0631a80 100644
--- a/base/i18n/case_conversion.h
+++ b/base/i18n/case_conversion.h
@@ -26,6 +26,10 @@ namespace i18n {
// locale. Use this when comparing general Unicode strings that don't
// necessarily belong in the user's current locale (like commands, protocol
// names, other strings from the web) for case-insensitive equality.
+//
+// Note that case conversions will change the length of the string in some
+// not-uncommon cases. Never assume that the output is the same length as
+// the input.
// Returns the lower case equivalent of string. Uses ICU's current locale.
BASE_I18N_EXPORT string16 ToLower(StringPiece16 string);
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index fcd8ddf..ae5fb80 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -101,6 +101,18 @@ template<> struct NonASCIIMask<8, wchar_t> {
};
#endif // WCHAR_T_IS_UTF32
+// DO NOT USE. http://crbug.com/24917
+//
+// tolower() will given incorrect results for non-ASCII characters. Use the
+// ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here
+// for backwards-compat for StartsWith until such calls can be updated.
+struct CaseInsensitiveCompareDeprecated {
+ public:
+ bool operator()(char16 x, char16 y) const {
+ return tolower(x) == tolower(y);
+ }
+};
+
} // namespace
namespace base {
@@ -611,7 +623,7 @@ bool StartsWith(const string16& str,
if (search.size() > str.size())
return false;
return std::equal(search.begin(), search.end(), str.begin(),
- CaseInsensitiveCompare<char16>());
+ CaseInsensitiveCompareDeprecated());
}
return StartsWith(StringPiece16(str), StringPiece16(search),
CompareCase::SENSITIVE);
@@ -667,10 +679,10 @@ bool EndsWith(const string16& str,
return false;
return std::equal(search.begin(), search.end(),
str.begin() + (str.size() - search.size()),
- CaseInsensitiveCompare<char16>());
+ CaseInsensitiveCompareDeprecated());
}
return EndsWith(StringPiece16(str), StringPiece16(search),
- CompareCase::SENSITIVE);
+ CompareCase::SENSITIVE);
}
char HexDigitToInt(wchar_t c) {
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index 5d26f1c5..e4abce2 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -103,20 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) {
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
}
-// Function objects to aid in comparing/searching strings.
-
-// DO NOT USE. tolower() will given incorrect results for non-ASCII characters.
-// Use the ASCII version, base::i18n::ToLower, or base::i18n::FoldCase.
-template<typename Char> struct CaseInsensitiveCompare {
- public:
- bool operator()(Char x, Char y) const {
- // TODO(darin): Do we really want to do locale sensitive comparisons here?
- // ANSWER(brettw): No.
- // See http://crbug.com/24917
- return tolower(x) == tolower(y);
- }
-};
-
+// Functor for case-insensitive ASCII comparisons for STL algorithms like
+// std::search.
+//
+// Note that a full Unicode version of this functor is not possible to write
+// because case mappings might change the number of characters, depend on
+// context (combining accents), and require handling UTF-16. If you need
+// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
+// use a normal operator== on the result.
template<typename Char> struct CaseInsensitiveCompareASCII {
public:
bool operator()(Char x, Char y) const {
diff --git a/chrome/installer/util/shell_util.cc b/chrome/installer/util/shell_util.cc
index 87a0d58..655d2f8 100644
--- a/chrome/installer/util/shell_util.cc
+++ b/chrome/installer/util/shell_util.cc
@@ -746,8 +746,12 @@ class RegistryEntry {
found = key.ReadValue(name_.c_str(), &read_value) == ERROR_SUCCESS;
if (found) {
correct_value = read_value.size() == value_.size() &&
- std::equal(value_.begin(), value_.end(), read_value.begin(),
- base::CaseInsensitiveCompare<wchar_t>());
+ ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
+ read_value.data(),
+ base::saturated_cast<int>(read_value.size()),
+ value_.data(),
+ base::saturated_cast<int>(value_.size()))
+ == CSTR_EQUAL;
}
} else {
DWORD read_value;
diff --git a/components/omnibox/browser/autocomplete_i18n.h b/components/omnibox/browser/autocomplete_i18n.h
new file mode 100644
index 0000000..9733e17
--- /dev/null
+++ b/components/omnibox/browser/autocomplete_i18n.h
@@ -0,0 +1,41 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_
+#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_
+
+#include "base/strings/string16.h"
+#include "third_party/icu/source/common/unicode/uchar.h"
+
+// Functor for a simple 16-bit Unicode case-insensitive comparison. This is
+// designed for the autocomplete system where we would rather get prefix lenths
+// correct than handle all possible case sensitivity issues.
+//
+// Any time this is used the result will be incorrect in some cases that
+// certain users will be able to discern. Ideally, this class would be deleted
+// and we would do full Unicode case-sensitivity mappings using
+// base::i18n::ToLower. However, ToLower can change the lenghts of strings,
+// making computations of offsets or prefix lengths difficult. Getting all
+// edge cases correct will require careful implementation and testing. In the
+// mean time, we use this simpler approach.
+//
+// This comparator will not handle combining accents properly since it compares
+// 16-bit values in isolation. If the two strings use the same sequence of
+// combining accents (this is the normal case) in both strings, it will work.
+//
+// Additionally, this comparator does not decode UTF sequences which is why it
+// is called "UCS2". UTF-16 surrogates will be compared literally (i.e. "case-
+// sensitively").
+//
+// There are also a few cases where the lower-case version of a character
+// expands to more than one code point that will not be handled properly. Such
+// characters will be compared case-sensitively.
+struct SimpleCaseInsensitiveCompareUCS2 {
+ public:
+ bool operator()(base::char16 x, base::char16 y) const {
+ return u_tolower(x) == u_tolower(y);
+ }
+};
+
+#endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_
diff --git a/components/omnibox/browser/search_suggestion_parser.cc b/components/omnibox/browser/search_suggestion_parser.cc
index 9c411fc..11fbdbb 100644
--- a/components/omnibox/browser/search_suggestion_parser.cc
+++ b/components/omnibox/browser/search_suggestion_parser.cc
@@ -15,6 +15,7 @@
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
+#include "components/omnibox/browser/autocomplete_i18n.h"
#include "components/omnibox/browser/autocomplete_input.h"
#include "components/omnibox/browser/url_prefix.h"
#include "components/url_fixer/url_fixer.h"
@@ -158,7 +159,7 @@ void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
// Do a case-insensitive search for |lookup_text|.
base::string16::const_iterator lookup_position = std::search(
match_contents_.begin(), match_contents_.end(), lookup_text.begin(),
- lookup_text.end(), base::CaseInsensitiveCompare<base::char16>());
+ lookup_text.end(), SimpleCaseInsensitiveCompareUCS2());
if (!allow_bolding_all && (lookup_position == match_contents_.end())) {
// Bail if the code below to update the bolding would bold the whole
// string. Note that the string may already be entirely bolded; if
diff --git a/components/omnibox/browser/shortcuts_provider.cc b/components/omnibox/browser/shortcuts_provider.cc
index 962cc13..8d43be6 100644
--- a/components/omnibox/browser/shortcuts_provider.cc
+++ b/components/omnibox/browser/shortcuts_provider.cc
@@ -20,6 +20,7 @@
#include "base/time/time.h"
#include "components/history/core/browser/history_service.h"
#include "components/metrics/proto/omnibox_input_type.pb.h"
+#include "components/omnibox/browser/autocomplete_i18n.h"
#include "components/omnibox/browser/autocomplete_input.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/autocomplete_provider_client.h"
@@ -218,9 +219,11 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
// input of "foo.c" to autocomplete to "foo.com" for a fill_into_edit of
// "http://foo.com".
if (AutocompleteMatch::IsSearchType(match.type)) {
- if (base::StartsWith(base::i18n::ToLower(match.fill_into_edit),
- base::i18n::ToLower(input.text()),
- base::CompareCase::SENSITIVE)) {
+ if (match.fill_into_edit.size() >= input.text().size() &&
+ std::equal(match.fill_into_edit.begin(),
+ match.fill_into_edit.begin() + input.text().size(),
+ input.text().begin(),
+ SimpleCaseInsensitiveCompareUCS2())) {
match.inline_autocompletion =
match.fill_into_edit.substr(input.text().length());
match.allowed_to_be_default_match =
diff --git a/extensions/browser/api/declarative_webrequest/webrequest_action.cc b/extensions/browser/api/declarative_webrequest/webrequest_action.cc
index 1a777ce..7d028e7 100644
--- a/extensions/browser/api/declarative_webrequest/webrequest_action.cc
+++ b/extensions/browser/api/declarative_webrequest/webrequest_action.cc
@@ -970,13 +970,8 @@ WebRequestRemoveResponseHeaderAction::CreateDelta(
void* iter = NULL;
std::string current_value;
while (headers->EnumerateHeader(&iter, name_, &current_value)) {
- if (has_value_ &&
- (current_value.size() != value_.size() ||
- !std::equal(current_value.begin(), current_value.end(),
- value_.begin(),
- base::CaseInsensitiveCompare<char>()))) {
+ if (has_value_ && !base::EqualsCaseInsensitiveASCII(current_value, value_))
continue;
- }
result->deleted_response_headers.push_back(make_pair(name_, current_value));
}
return result;
diff --git a/net/http/http_response_headers.cc b/net/http/http_response_headers.cc
index 7c0c5ad..84f1a22 100644
--- a/net/http/http_response_headers.cc
+++ b/net/http/http_response_headers.cc
@@ -603,9 +603,7 @@ bool HttpResponseHeaders::HasHeaderValue(const base::StringPiece& name,
void* iter = NULL;
std::string temp;
while (EnumerateHeader(&iter, name, &temp)) {
- if (value.size() == temp.size() &&
- std::equal(temp.begin(), temp.end(), value.begin(),
- base::CaseInsensitiveCompare<char>()))
+ if (base::EqualsCaseInsensitiveASCII(value, temp))
return true;
}
return false;
@@ -743,11 +741,8 @@ size_t HttpResponseHeaders::FindHeader(size_t from,
for (size_t i = from; i < parsed_.size(); ++i) {
if (parsed_[i].is_continuation())
continue;
- const std::string::const_iterator& name_begin = parsed_[i].name_begin;
- const std::string::const_iterator& name_end = parsed_[i].name_end;
- if (static_cast<size_t>(name_end - name_begin) == search.size() &&
- std::equal(name_begin, name_end, search.begin(),
- base::CaseInsensitiveCompare<char>()))
+ base::StringPiece name(parsed_[i].name_begin, parsed_[i].name_end);
+ if (base::EqualsCaseInsensitiveASCII(search, name))
return i;
}
diff --git a/net/url_request/url_request_unittest.cc b/net/url_request/url_request_unittest.cc
index 2c0af0b..5582f5a 100644
--- a/net/url_request/url_request_unittest.cc
+++ b/net/url_request/url_request_unittest.cc
@@ -258,12 +258,9 @@ void TestLoadTimingNoHttpResponse(
// Do a case-insensitive search through |haystack| for |needle|.
bool ContainsString(const std::string& haystack, const char* needle) {
- std::string::const_iterator it =
- std::search(haystack.begin(),
- haystack.end(),
- needle,
- needle + strlen(needle),
- base::CaseInsensitiveCompare<char>());
+ std::string::const_iterator it = std::search(
+ haystack.begin(), haystack.end(), needle, needle + strlen(needle),
+ base::CaseInsensitiveCompareASCII<char>());
return it != haystack.end();
}
diff --git a/win8/test/ui_automation_client.cc b/win8/test/ui_automation_client.cc
index 9aa4237..22c1af1 100644
--- a/win8/test/ui_automation_client.cc
+++ b/win8/test/ui_automation_client.cc
@@ -323,10 +323,11 @@ void UIAutomationClient::Context::HandleWindowOpen(
base::string16 class_name(V_BSTR(var.ptr()));
- // Window class names are atoms, which are case-insensitive.
+ // Window class names are atoms, which are case-insensitive. Assume that
+ // the window in question only needs ASCII case-insensitivity.
if (class_name.size() == class_name_.size() &&
std::equal(class_name.begin(), class_name.end(), class_name_.begin(),
- base::CaseInsensitiveCompare<wchar_t>())) {
+ base::CaseInsensitiveCompareASCII<wchar_t>())) {
RemoveWindowObserver();
ProcessWindow(window);
}