summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorderat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-09-09 16:54:50 +0000
committerderat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-09-09 16:54:50 +0000
commit4de885681dfb6e305fdd58f2d9107d8df406d297 (patch)
treec37820bd04f1482ea84609523922e568ebc1402c
parent02e09be3d38307d02e63491521288a7448bb9d4a (diff)
downloadchromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.zip
chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.gz
chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.bz2
base::i18n::StringSearch...() returns match index and length
Update base::i18n::StringSearchIgnoringCaseAndAccents() to take additional out-params for the index and length of the matched text. BUG=none TEST=none Review URL: https://chromiumcodereview.appspot.com/10910116 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@155629 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--ash/shell/app_list.cc4
-rw-r--r--base/i18n/string_search.cc50
-rw-r--r--base/i18n/string_search.h14
-rw-r--r--base/i18n/string_search_unittest.cc115
-rw-r--r--chrome/browser/bookmarks/bookmark_utils.cc4
-rw-r--r--content/browser/download/download_item_impl.cc11
6 files changed, 147 insertions, 51 deletions
diff --git a/ash/shell/app_list.cc b/ash/shell/app_list.cc
index 0ad18aa..cd3a0fa 100644
--- a/ash/shell/app_list.cc
+++ b/ash/shell/app_list.cc
@@ -262,8 +262,10 @@ class ExampleAppListViewDelegate : public app_list::AppListViewDelegate {
static_cast<WindowTypeLauncherItem::Type>(i);
string16 title = UTF8ToUTF16(WindowTypeLauncherItem::GetTitle(type));
- if (base::i18n::StringSearchIgnoringCaseAndAccents(query, title))
+ if (base::i18n::StringSearchIgnoringCaseAndAccents(
+ query, title, NULL, NULL)) {
model_->results()->Add(new ExampleSearchResult(type, query));
+ }
}
}
diff --git a/base/i18n/string_search.cc b/base/i18n/string_search.cc
index f5fe95c..9dc84cad 100644
--- a/base/i18n/string_search.cc
+++ b/base/i18n/string_search.cc
@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "base/i18n/string_search.h"
+#include "base/logging.h"
#include "unicode/usearch.h"
@@ -10,26 +11,51 @@ namespace {
bool CollationSensitiveStringSearch(const string16& find_this,
const string16& in_this,
- UCollationStrength strength) {
+ UCollationStrength strength,
+ size_t* match_index,
+ size_t* match_length) {
UErrorCode status = U_ZERO_ERROR;
- UStringSearch* search = usearch_open(find_this.data(), -1, in_this.data(), -1,
- uloc_getDefault(), NULL, &status);
+ UStringSearch* search = usearch_open(find_this.data(), -1,
+ in_this.data(), -1,
+ uloc_getDefault(),
+ NULL, // breakiter
+ &status);
// Default to basic substring search if usearch fails. According to
// http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
// if either |find_this| or |in_this| are empty. In either case basic
// substring search will give the correct return value.
- if (!U_SUCCESS(status))
- return in_this.find(find_this) != string16::npos;
+ if (!U_SUCCESS(status)) {
+ size_t index = in_this.find(find_this);
+ if (index == string16::npos) {
+ return false;
+ } else {
+ if (match_index)
+ *match_index = index;
+ if (match_length)
+ *match_length = find_this.size();
+ return true;
+ }
+ }
UCollator* collator = usearch_getCollator(search);
ucol_setStrength(collator, strength);
usearch_reset(search);
- bool result = usearch_first(search, &status) != USEARCH_DONE;
+ int32_t index = usearch_first(search, &status);
+ if (!U_SUCCESS(status) || index == USEARCH_DONE) {
+ usearch_close(search);
+ return false;
+ }
+
+ if (match_index)
+ *match_index = static_cast<size_t>(index);
+ if (match_length)
+ *match_length = static_cast<size_t>(usearch_getMatchedLength(search));
+
usearch_close(search);
- return result;
+ return true;
}
} // namespace
@@ -38,8 +64,14 @@ namespace base {
namespace i18n {
bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
- const string16& in_this) {
- return CollationSensitiveStringSearch(find_this, in_this, UCOL_PRIMARY);
+ const string16& in_this,
+ size_t* match_index,
+ size_t* match_length) {
+ return CollationSensitiveStringSearch(find_this,
+ in_this,
+ UCOL_PRIMARY,
+ match_index,
+ match_length);
}
} // namespace i18n
diff --git a/base/i18n/string_search.h b/base/i18n/string_search.h
index e198890..2069b0f 100644
--- a/base/i18n/string_search.h
+++ b/base/i18n/string_search.h
@@ -11,16 +11,20 @@
namespace base {
namespace i18n {
-// Returns true if |in_this| contains |find_this|. Only differences between base
-// letters are taken into consideration. Case and accent differences are
-// ignored. Please refer to 'primary level' in
+// Returns true if |in_this| contains |find_this|. If |match_index| or
+// |match_length| are non-NULL, they are assigned the start position and total
+// length of the match.
+//
+// Only differences between base letters are taken into consideration. Case and
+// accent differences are ignored. Please refer to 'primary level' in
// http://userguide.icu-project.org/collation/concepts for additional details.
BASE_I18N_EXPORT
bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
- const string16& in_this);
+ const string16& in_this,
+ size_t* match_index,
+ size_t* match_length);
} // namespace i18n
} // namespace base
#endif // BASE_I18N_STRING_SEARCH_H_
-
diff --git a/base/i18n/string_search_unittest.cc b/base/i18n/string_search_unittest.cc
index 9198ea4..e6ca1c5 100644
--- a/base/i18n/string_search_unittest.cc
+++ b/base/i18n/string_search_unittest.cc
@@ -26,23 +26,37 @@ TEST(StringSearchTest, ASCII) {
if (locale_is_posix)
SetICUDefaultLocale("en_US");
+ size_t index = 0;
+ size_t length = 0;
+
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));
+ ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(5U, length);
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
- ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o")));
+ ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
+ &index, &length));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));
+ ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
+ EXPECT_EQ(4U, index);
+ EXPECT_EQ(6U, length);
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
- ASCIIToUTF16("searching within empty string"), string16()));
+ ASCIIToUTF16("searching within empty string"), string16(),
+ &index, &length));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- string16(), ASCIIToUTF16("searching for empty string")));
+ string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(0U, length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));
+ ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
+ &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(18U, length);
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
@@ -55,74 +69,112 @@ TEST(StringSearchTest, UnicodeLocaleIndependent) {
const string16 a_base = WideToUTF16(L"a");
// Composed characters
- const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
- const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
+ const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
+ const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
- const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");
+ const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
// Decomposed characters
- const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
- const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
+ const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
+ const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
- const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");
+ const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
+ size_t index = 0;
+ size_t length = 0;
+
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_base, e_with_accute_accent));
+ e_base, e_with_acute_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_accent, e_base));
+ e_with_acute_accent, e_base, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_base.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_base, e_with_accute_combining_mark));
+ e_base, e_with_acute_combining_mark, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_combining_mark.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_combining_mark, e_base));
+ e_with_acute_combining_mark, e_base, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_base.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_combining_mark, e_with_accute_accent));
+ e_with_acute_combining_mark, e_with_acute_accent,
+ &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_accent, e_with_accute_combining_mark));
+ e_with_acute_accent, e_with_acute_combining_mark,
+ &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_combining_mark.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_combining_mark, e_with_grave_combining_mark));
+ e_with_acute_combining_mark, e_with_grave_combining_mark,
+ &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_grave_combining_mark.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_grave_combining_mark, e_with_accute_combining_mark));
+ e_with_grave_combining_mark, e_with_acute_combining_mark,
+ &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_combining_mark.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_accute_combining_mark, e_with_grave_accent));
+ e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_grave_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- e_with_grave_accent, e_with_accute_combining_mark));
+ e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_combining_mark.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- E_with_accute_accent, e_with_accute_accent));
+ E_with_acute_accent, e_with_acute_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- E_with_grave_accent, e_with_accute_accent));
+ E_with_grave_accent, e_with_acute_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- E_with_accute_combining_mark, e_with_grave_accent));
+ E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_grave_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- E_with_grave_combining_mark, e_with_accute_accent));
+ E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_acute_accent.size(), length);
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- E_base, e_with_grave_accent));
+ E_base, e_with_grave_accent, &index, &length));
+ EXPECT_EQ(0U, index);
+ EXPECT_EQ(e_with_grave_accent.size(), length);
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
- a_with_accute_accent, e_with_accute_accent));
+ a_with_acute_accent, e_with_acute_accent, &index, &length));
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
- a_with_accute_combining_mark, e_with_accute_combining_mark));
+ a_with_acute_combining_mark, e_with_acute_combining_mark,
+ &index, &length));
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
@@ -136,17 +188,16 @@ TEST(StringSearchTest, UnicodeLocaleDependent) {
const string16 a_with_ring = WideToUTF16(L"\u00e5");
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
- a_base, a_with_ring));
+ a_base, a_with_ring, NULL, NULL));
const char* default_locale = uloc_getDefault();
SetICUDefaultLocale("da");
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
- a_base, a_with_ring));
+ a_base, a_with_ring, NULL, NULL));
SetICUDefaultLocale(default_locale);
}
} // namespace i18n
} // namespace base
-
diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc
index a97bba2..b972189 100644
--- a/chrome/browser/bookmarks/bookmark_utils.cc
+++ b/chrome/browser/bookmarks/bookmark_utils.cc
@@ -155,8 +155,10 @@ bool MoreRecentlyModified(const BookmarkNode* n1, const BookmarkNode* n2) {
bool DoesBookmarkTextContainWords(const string16& text,
const std::vector<string16>& words) {
for (size_t i = 0; i < words.size(); ++i) {
- if (!base::i18n::StringSearchIgnoringCaseAndAccents(words[i], text))
+ if (!base::i18n::StringSearchIgnoringCaseAndAccents(
+ words[i], text, NULL, NULL)) {
return false;
+ }
}
return true;
}
diff --git a/content/browser/download/download_item_impl.cc b/content/browser/download/download_item_impl.cc
index a4de17e..0534069 100644
--- a/content/browser/download/download_item_impl.cc
+++ b/content/browser/download/download_item_impl.cc
@@ -706,8 +706,10 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const {
DCHECK_EQ(query, base::i18n::ToLower(query));
string16 url_raw(UTF8ToUTF16(GetURL().spec()));
- if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_raw))
+ if (base::i18n::StringSearchIgnoringCaseAndAccents(
+ query, url_raw, NULL, NULL)) {
return true;
+ }
// TODO(phajdan.jr): write a test case for the following code.
// A good test case would be:
@@ -718,13 +720,16 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const {
languages = content::GetContentClient()->browser()->GetAcceptLangs(
GetBrowserContext());
string16 url_formatted(net::FormatUrl(GetURL(), languages));
- if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_formatted))
+ if (base::i18n::StringSearchIgnoringCaseAndAccents(
+ query, url_formatted, NULL, NULL)) {
return true;
+ }
// TODO(asanka): Change this to GetTargetFilePath() once DownloadQuery has
// been modified to work with target paths.
string16 path(GetFullPath().LossyDisplayName());
- return base::i18n::StringSearchIgnoringCaseAndAccents(query, path);
+ return base::i18n::StringSearchIgnoringCaseAndAccents(
+ query, path, NULL, NULL);
}
DownloadPersistentStoreInfo DownloadItemImpl::GetPersistentStoreInfo() const {