diff options
author | brettw <brettw@chromium.org> | 2015-07-13 19:24:50 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-14 02:25:27 +0000 |
commit | a2027fb609835a3f95557b39611e3d11b89fca03 (patch) | |
tree | 588e2007b546480bd3db40ab21be838386315f15 /components/omnibox | |
parent | 5b3151cd707dee82b947ae096b12967e3c4a7c26 (diff) | |
download | chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.zip chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.gz chromium_src-a2027fb609835a3f95557b39611e3d11b89fca03.tar.bz2 |
Remove CaseInsensitiveCompare from string_util.h
There were a number of callers in net using this for HTTP headers. I think
these callers actually just need ASCII case-insensitive comparisons so these
were changed.
The omnibox code used this functor. I added a new omnibox-specific one which
does not have the locale issues of the old string_util one, but which still
has the UTF-16 and combining accent issues (described in great detail in
the comment for this).
The Windows installer code can't depend on ICU so it calls the Win32 function
to do case-insensitive comparisons. This should match the system comparison
for registry keys better anyway.
I also changed a caller of StartsWith to use this version. I wrote this
StartsWith call using ToLower in a previous patch, but it turns out that the
lengths of case-mapped strings do change in practice, making the offset
computations of the suyrrounding code incorrect. This new version will be
like the old version (will miss some cases of case-insensitive equality) but
will handle 0x80-0xFF properly.
BUG=24917
Review URL: https://codereview.chromium.org/1230583014
Cr-Commit-Position: refs/heads/master@{#338624}
Diffstat (limited to 'components/omnibox')
-rw-r--r-- | components/omnibox/browser/autocomplete_i18n.h | 41 | ||||
-rw-r--r-- | components/omnibox/browser/search_suggestion_parser.cc | 3 | ||||
-rw-r--r-- | components/omnibox/browser/shortcuts_provider.cc | 9 |
3 files changed, 49 insertions, 4 deletions
diff --git a/components/omnibox/browser/autocomplete_i18n.h b/components/omnibox/browser/autocomplete_i18n.h new file mode 100644 index 0000000..9733e17 --- /dev/null +++ b/components/omnibox/browser/autocomplete_i18n.h @@ -0,0 +1,41 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ +#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ + +#include "base/strings/string16.h" +#include "third_party/icu/source/common/unicode/uchar.h" + +// Functor for a simple 16-bit Unicode case-insensitive comparison. This is +// designed for the autocomplete system where we would rather get prefix lenths +// correct than handle all possible case sensitivity issues. +// +// Any time this is used the result will be incorrect in some cases that +// certain users will be able to discern. Ideally, this class would be deleted +// and we would do full Unicode case-sensitivity mappings using +// base::i18n::ToLower. However, ToLower can change the lenghts of strings, +// making computations of offsets or prefix lengths difficult. Getting all +// edge cases correct will require careful implementation and testing. In the +// mean time, we use this simpler approach. +// +// This comparator will not handle combining accents properly since it compares +// 16-bit values in isolation. If the two strings use the same sequence of +// combining accents (this is the normal case) in both strings, it will work. +// +// Additionally, this comparator does not decode UTF sequences which is why it +// is called "UCS2". UTF-16 surrogates will be compared literally (i.e. "case- +// sensitively"). +// +// There are also a few cases where the lower-case version of a character +// expands to more than one code point that will not be handled properly. Such +// characters will be compared case-sensitively. +struct SimpleCaseInsensitiveCompareUCS2 { + public: + bool operator()(base::char16 x, base::char16 y) const { + return u_tolower(x) == u_tolower(y); + } +}; + +#endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_I18N_H_ diff --git a/components/omnibox/browser/search_suggestion_parser.cc b/components/omnibox/browser/search_suggestion_parser.cc index 9c411fc..11fbdbb 100644 --- a/components/omnibox/browser/search_suggestion_parser.cc +++ b/components/omnibox/browser/search_suggestion_parser.cc @@ -15,6 +15,7 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/values.h" +#include "components/omnibox/browser/autocomplete_i18n.h" #include "components/omnibox/browser/autocomplete_input.h" #include "components/omnibox/browser/url_prefix.h" #include "components/url_fixer/url_fixer.h" @@ -158,7 +159,7 @@ void SearchSuggestionParser::SuggestResult::ClassifyMatchContents( // Do a case-insensitive search for |lookup_text|. base::string16::const_iterator lookup_position = std::search( match_contents_.begin(), match_contents_.end(), lookup_text.begin(), - lookup_text.end(), base::CaseInsensitiveCompare<base::char16>()); + lookup_text.end(), SimpleCaseInsensitiveCompareUCS2()); if (!allow_bolding_all && (lookup_position == match_contents_.end())) { // Bail if the code below to update the bolding would bold the whole // string. Note that the string may already be entirely bolded; if diff --git a/components/omnibox/browser/shortcuts_provider.cc b/components/omnibox/browser/shortcuts_provider.cc index 962cc13..8d43be6 100644 --- a/components/omnibox/browser/shortcuts_provider.cc +++ b/components/omnibox/browser/shortcuts_provider.cc @@ -20,6 +20,7 @@ #include "base/time/time.h" #include "components/history/core/browser/history_service.h" #include "components/metrics/proto/omnibox_input_type.pb.h" +#include "components/omnibox/browser/autocomplete_i18n.h" #include "components/omnibox/browser/autocomplete_input.h" #include "components/omnibox/browser/autocomplete_match.h" #include "components/omnibox/browser/autocomplete_provider_client.h" @@ -218,9 +219,11 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch( // input of "foo.c" to autocomplete to "foo.com" for a fill_into_edit of // "http://foo.com". if (AutocompleteMatch::IsSearchType(match.type)) { - if (base::StartsWith(base::i18n::ToLower(match.fill_into_edit), - base::i18n::ToLower(input.text()), - base::CompareCase::SENSITIVE)) { + if (match.fill_into_edit.size() >= input.text().size() && + std::equal(match.fill_into_edit.begin(), + match.fill_into_edit.begin() + input.text().size(), + input.text().begin(), + SimpleCaseInsensitiveCompareUCS2())) { match.inline_autocompletion = match.fill_into_edit.substr(input.text().length()); match.allowed_to_be_default_match = |