summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--base/base.gyp3
-rw-r--r--base/i18n/string_search.cc45
-rw-r--r--base/i18n/string_search.h27
-rw-r--r--base/i18n/string_search_unittest.cc154
-rw-r--r--content/browser/download/download_item.cc18
5 files changed, 236 insertions, 11 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 30dd274..8bca61bee 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -56,6 +56,8 @@
'i18n/number_formatting.h',
'i18n/rtl.cc',
'i18n/rtl.h',
+ 'i18n/string_search.cc',
+ 'i18n/string_search.h',
'i18n/time_formatting.cc',
'i18n/time_formatting.h',
],
@@ -142,6 +144,7 @@
'i18n/icu_string_conversions_unittest.cc',
'i18n/number_formatting_unittest.cc',
'i18n/rtl_unittest.cc',
+ 'i18n/string_search_unittest.cc',
'i18n/time_formatting_unittest.cc',
'json/json_reader_unittest.cc',
'json/json_writer_unittest.cc',
diff --git a/base/i18n/string_search.cc b/base/i18n/string_search.cc
new file mode 100644
index 0000000..b2b29677
--- /dev/null
+++ b/base/i18n/string_search.cc
@@ -0,0 +1,45 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <unicode/usearch.h>
+
+#include "base/i18n/string_search.h"
+
+namespace {
+
+bool CollationSensitiveStringSearch(const string16& find_this,
+ const string16& in_this,
+ UCollationStrength strength) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ UStringSearch* search = usearch_open(find_this.data(), -1, in_this.data(), -1,
+ uloc_getDefault(), NULL, &status);
+
+ // Default to basic substring search if usearch fails. According to
+ // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
+ // if either |find_this| or |in_this| are empty. In either case basic
+ // substring search will give the correct return value.
+ if (!U_SUCCESS(status))
+ return in_this.find(find_this) != string16::npos;
+
+ UCollator* collator = usearch_getCollator(search);
+ ucol_setStrength(collator, strength);
+ usearch_reset(search);
+
+ return usearch_first(search, &status) != USEARCH_DONE;
+}
+
+} // namespace
+
+namespace base {
+namespace i18n {
+
+bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
+ const string16& in_this) {
+ return CollationSensitiveStringSearch(find_this, in_this, UCOL_PRIMARY);
+}
+
+} // namespace i18n
+} // namespace base
+
diff --git a/base/i18n/string_search.h b/base/i18n/string_search.h
new file mode 100644
index 0000000..6602451
--- /dev/null
+++ b/base/i18n/string_search.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_STRING_SEARCH_H_
+#define BASE_I18N_STRING_SEARCH_H_
+#pragma once
+
+#include "base/i18n/base_i18n_export.h"
+#include "base/string16.h"
+
+namespace base {
+namespace i18n {
+
+// Returns true if |in_this| contains |find_this|. Only differences between base
+// letters are taken into consideration. Case and accent differences are
+// ignored. Please refer to 'primary level' in
+// http://userguide.icu-project.org/collation/concepts for additional details.
+BASE_I18N_EXPORT
+ bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
+ const string16& in_this);
+
+} // namespace i18n
+} // namespace base
+
+#endif // BASE_I18N_STRING_SEARCH_H_
+
diff --git a/base/i18n/string_search_unittest.cc b/base/i18n/string_search_unittest.cc
new file mode 100644
index 0000000..fe877c0
--- /dev/null
+++ b/base/i18n/string_search_unittest.cc
@@ -0,0 +1,154 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+#include <unicode/usearch.h>
+
+#include "base/i18n/rtl.h"
+#include "base/i18n/string_search.h"
+#include "base/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace base {
+namespace i18n {
+
+class StringSearchTest : public testing::Test {
+};
+
+// Note on setting default locale for testing: The current default locale on
+// the Mac trybot is en_US_POSIX, with which primary-level collation strength
+// string search is case-sensitive, when normally it should be
+// case-insensitive. In other locales (including en_US which English speakers
+// in the U.S. use), this search would be case-insensitive as expected.
+
+TEST_F(StringSearchTest, ASCII) {
+ std::string default_locale(uloc_getDefault());
+ bool locale_is_posix = (default_locale == "en_US_POSIX");
+ if (locale_is_posix)
+ SetICUDefaultLocale("en_US");
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));
+
+ EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o")));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));
+
+ EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16("searching within empty string"), ASCIIToUTF16("")));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16(""), ASCIIToUTF16("searching for empty string")));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));
+
+ if (locale_is_posix)
+ SetICUDefaultLocale(default_locale.data());
+}
+
+TEST_F(StringSearchTest, UnicodeLocaleIndependent) {
+ // Base characters
+ const string16 e_base = WideToUTF16(L"e");
+ const string16 E_base = WideToUTF16(L"E");
+ const string16 a_base = WideToUTF16(L"a");
+
+ // Composed characters
+ const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
+ const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
+ const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
+ const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
+ const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");
+
+ // Decomposed characters
+ const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
+ const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
+ const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
+ const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
+ const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");
+
+ std::string default_locale(uloc_getDefault());
+ bool locale_is_posix = (default_locale == "en_US_POSIX");
+ if (locale_is_posix)
+ SetICUDefaultLocale("en_US");
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_base, e_with_accute_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_accent, e_base));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_base, e_with_accute_combining_mark));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_combining_mark, e_base));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_combining_mark, e_with_accute_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_accent, e_with_accute_combining_mark));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_combining_mark, e_with_grave_combining_mark));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_grave_combining_mark, e_with_accute_combining_mark));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_accute_combining_mark, e_with_grave_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ e_with_grave_accent, e_with_accute_combining_mark));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ E_with_accute_accent, e_with_accute_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ E_with_grave_accent, e_with_accute_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ E_with_accute_combining_mark, e_with_grave_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ E_with_grave_combining_mark, e_with_accute_accent));
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ E_base, e_with_grave_accent));
+
+ EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
+ a_with_accute_accent, e_with_accute_accent));
+
+ EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
+ a_with_accute_combining_mark, e_with_accute_combining_mark));
+
+ if (locale_is_posix)
+ SetICUDefaultLocale(default_locale.data());
+}
+
+TEST_F(StringSearchTest, UnicodeLocaleDependent) {
+ // Base characters
+ const string16 a_base = WideToUTF16(L"a");
+
+ // Composed characters
+ const string16 a_with_ring = WideToUTF16(L"\u00e5");
+
+ EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
+ a_base, a_with_ring));
+
+ const char* default_locale = uloc_getDefault();
+ SetICUDefaultLocale("da");
+
+ EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
+ a_base, a_with_ring));
+
+ SetICUDefaultLocale(default_locale);
+}
+
+} // namespace i18n
+} // namespace base
+
diff --git a/content/browser/download/download_item.cc b/content/browser/download/download_item.cc
index 0809872..d37cd51 100644
--- a/content/browser/download/download_item.cc
+++ b/content/browser/download/download_item.cc
@@ -8,6 +8,7 @@
#include "base/file_util.h"
#include "base/format_macros.h"
#include "base/i18n/case_conversion.h"
+#include "base/i18n/string_search.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/stringprintf.h"
@@ -614,8 +615,8 @@ bool DownloadItem::MatchesQuery(const string16& query) const {
DCHECK_EQ(query, base::i18n::ToLower(query));
- string16 url_raw(base::i18n::ToLower(UTF8ToUTF16(GetURL().spec())));
- if (url_raw.find(query) != string16::npos)
+ string16 url_raw(UTF8ToUTF16(GetURL().spec()));
+ if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_raw))
return true;
// TODO(phajdan.jr): write a test case for the following code.
@@ -627,17 +628,12 @@ bool DownloadItem::MatchesQuery(const string16& query) const {
TabContents* tab = request_handle_.GetTabContents();
if (tab)
languages = content::GetContentClient()->browser()->GetAcceptLangs(tab);
- string16 url_formatted(
- base::i18n::ToLower(net::FormatUrl(GetURL(), languages)));
- if (url_formatted.find(query) != string16::npos)
+ string16 url_formatted(net::FormatUrl(GetURL(), languages));
+ if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_formatted))
return true;
- string16 path(base::i18n::ToLower(full_path().LossyDisplayName()));
- // This shouldn't just do a substring match; it is wrong for Unicode
- // due to normalization and we have a fancier search-query system
- // used elsewhere.
- // http://code.google.com/p/chromium/issues/detail?id=71982
- return (path.find(query) != string16::npos);
+ string16 path(full_path().LossyDisplayName());
+ return base::i18n::StringSearchIgnoringCaseAndAccents(query, path);
}
void DownloadItem::SetFileCheckResults(const DownloadStateInfo& state) {