From a2027fb609835a3f95557b39611e3d11b89fca03 Mon Sep 17 00:00:00 2001 From: brettw Date: Mon, 13 Jul 2015 19:24:50 -0700 Subject: Remove CaseInsensitiveCompare from string_util.h There were a number of callers in net using this for HTTP headers. I think these callers actually just need ASCII case-insensitive comparisons so these were changed. The omnibox code used this functor. I added a new omnibox-specific one which does not have the locale issues of the old string_util one, but which still has the UTF-16 and combining accent issues (described in great detail in the comment for this). The Windows installer code can't depend on ICU so it calls the Win32 function to do case-insensitive comparisons. This should match the system comparison for registry keys better anyway. I also changed a caller of StartsWith to use this version. I wrote this StartsWith call using ToLower in a previous patch, but it turns out that the lengths of case-mapped strings do change in practice, making the offset computations of the suyrrounding code incorrect. This new version will be like the old version (will miss some cases of case-insensitive equality) but will handle 0x80-0xFF properly. BUG=24917 Review URL: https://codereview.chromium.org/1230583014 Cr-Commit-Position: refs/heads/master@{#338624} --- base/strings/string_util.cc | 18 +++++++++++++++--- base/strings/string_util.h | 22 ++++++++-------------- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'base/strings') diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index fcd8ddf..ae5fb80 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc @@ -101,6 +101,18 @@ template<> struct NonASCIIMask<8, wchar_t> { }; #endif // WCHAR_T_IS_UTF32 +// DO NOT USE. http://crbug.com/24917 +// +// tolower() will given incorrect results for non-ASCII characters. Use the +// ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here +// for backwards-compat for StartsWith until such calls can be updated. +struct CaseInsensitiveCompareDeprecated { + public: + bool operator()(char16 x, char16 y) const { + return tolower(x) == tolower(y); + } +}; + } // namespace namespace base { @@ -611,7 +623,7 @@ bool StartsWith(const string16& str, if (search.size() > str.size()) return false; return std::equal(search.begin(), search.end(), str.begin(), - CaseInsensitiveCompare()); + CaseInsensitiveCompareDeprecated()); } return StartsWith(StringPiece16(str), StringPiece16(search), CompareCase::SENSITIVE); @@ -667,10 +679,10 @@ bool EndsWith(const string16& str, return false; return std::equal(search.begin(), search.end(), str.begin() + (str.size() - search.size()), - CaseInsensitiveCompare()); + CaseInsensitiveCompareDeprecated()); } return EndsWith(StringPiece16(str), StringPiece16(search), - CompareCase::SENSITIVE); + CompareCase::SENSITIVE); } char HexDigitToInt(wchar_t c) { diff --git a/base/strings/string_util.h b/base/strings/string_util.h index 5d26f1c5..e4abce2 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h @@ -103,20 +103,14 @@ template inline Char ToUpperASCII(Char c) { return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; } -// Function objects to aid in comparing/searching strings. - -// DO NOT USE. tolower() will given incorrect results for non-ASCII characters. -// Use the ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. -template struct CaseInsensitiveCompare { - public: - bool operator()(Char x, Char y) const { - // TODO(darin): Do we really want to do locale sensitive comparisons here? - // ANSWER(brettw): No. - // See http://crbug.com/24917 - return tolower(x) == tolower(y); - } -}; - +// Functor for case-insensitive ASCII comparisons for STL algorithms like +// std::search. +// +// Note that a full Unicode version of this functor is not possible to write +// because case mappings might change the number of characters, depend on +// context (combining accents), and require handling UTF-16. If you need +// proper Unicode support, use base::i18n::ToLower/FoldCase and then just +// use a normal operator== on the result. template struct CaseInsensitiveCompareASCII { public: bool operator()(Char x, Char y) const { -- cgit v1.1