diff options
author | derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-09-09 16:54:50 +0000 |
---|---|---|
committer | derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-09-09 16:54:50 +0000 |
commit | 4de885681dfb6e305fdd58f2d9107d8df406d297 (patch) | |
tree | c37820bd04f1482ea84609523922e568ebc1402c | |
parent | 02e09be3d38307d02e63491521288a7448bb9d4a (diff) | |
download | chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.zip chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.gz chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.bz2 |
base::i18n::StringSearch...() returns match index and length
Update base::i18n::StringSearchIgnoringCaseAndAccents() to
take additional out-params for the index and length of the
matched text.
BUG=none
TEST=none
Review URL: https://chromiumcodereview.appspot.com/10910116
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@155629 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | ash/shell/app_list.cc | 4 | ||||
-rw-r--r-- | base/i18n/string_search.cc | 50 | ||||
-rw-r--r-- | base/i18n/string_search.h | 14 | ||||
-rw-r--r-- | base/i18n/string_search_unittest.cc | 115 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_utils.cc | 4 | ||||
-rw-r--r-- | content/browser/download/download_item_impl.cc | 11 |
6 files changed, 147 insertions, 51 deletions
diff --git a/ash/shell/app_list.cc b/ash/shell/app_list.cc index 0ad18aa..cd3a0fa 100644 --- a/ash/shell/app_list.cc +++ b/ash/shell/app_list.cc @@ -262,8 +262,10 @@ class ExampleAppListViewDelegate : public app_list::AppListViewDelegate { static_cast<WindowTypeLauncherItem::Type>(i); string16 title = UTF8ToUTF16(WindowTypeLauncherItem::GetTitle(type)); - if (base::i18n::StringSearchIgnoringCaseAndAccents(query, title)) + if (base::i18n::StringSearchIgnoringCaseAndAccents( + query, title, NULL, NULL)) { model_->results()->Add(new ExampleSearchResult(type, query)); + } } } diff --git a/base/i18n/string_search.cc b/base/i18n/string_search.cc index f5fe95c..9dc84cad 100644 --- a/base/i18n/string_search.cc +++ b/base/i18n/string_search.cc @@ -3,6 +3,7 @@ // found in the LICENSE file. #include "base/i18n/string_search.h" +#include "base/logging.h" #include "unicode/usearch.h" @@ -10,26 +11,51 @@ namespace { bool CollationSensitiveStringSearch(const string16& find_this, const string16& in_this, - UCollationStrength strength) { + UCollationStrength strength, + size_t* match_index, + size_t* match_length) { UErrorCode status = U_ZERO_ERROR; - UStringSearch* search = usearch_open(find_this.data(), -1, in_this.data(), -1, - uloc_getDefault(), NULL, &status); + UStringSearch* search = usearch_open(find_this.data(), -1, + in_this.data(), -1, + uloc_getDefault(), + NULL, // breakiter + &status); // Default to basic substring search if usearch fails. According to // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail // if either |find_this| or |in_this| are empty. In either case basic // substring search will give the correct return value. - if (!U_SUCCESS(status)) - return in_this.find(find_this) != string16::npos; + if (!U_SUCCESS(status)) { + size_t index = in_this.find(find_this); + if (index == string16::npos) { + return false; + } else { + if (match_index) + *match_index = index; + if (match_length) + *match_length = find_this.size(); + return true; + } + } UCollator* collator = usearch_getCollator(search); ucol_setStrength(collator, strength); usearch_reset(search); - bool result = usearch_first(search, &status) != USEARCH_DONE; + int32_t index = usearch_first(search, &status); + if (!U_SUCCESS(status) || index == USEARCH_DONE) { + usearch_close(search); + return false; + } + + if (match_index) + *match_index = static_cast<size_t>(index); + if (match_length) + *match_length = static_cast<size_t>(usearch_getMatchedLength(search)); + usearch_close(search); - return result; + return true; } } // namespace @@ -38,8 +64,14 @@ namespace base { namespace i18n { bool StringSearchIgnoringCaseAndAccents(const string16& find_this, - const string16& in_this) { - return CollationSensitiveStringSearch(find_this, in_this, UCOL_PRIMARY); + const string16& in_this, + size_t* match_index, + size_t* match_length) { + return CollationSensitiveStringSearch(find_this, + in_this, + UCOL_PRIMARY, + match_index, + match_length); } } // namespace i18n diff --git a/base/i18n/string_search.h b/base/i18n/string_search.h index e198890..2069b0f 100644 --- a/base/i18n/string_search.h +++ b/base/i18n/string_search.h @@ -11,16 +11,20 @@ namespace base { namespace i18n { -// Returns true if |in_this| contains |find_this|. Only differences between base -// letters are taken into consideration. Case and accent differences are -// ignored. Please refer to 'primary level' in +// Returns true if |in_this| contains |find_this|. If |match_index| or +// |match_length| are non-NULL, they are assigned the start position and total +// length of the match. +// +// Only differences between base letters are taken into consideration. Case and +// accent differences are ignored. Please refer to 'primary level' in // http://userguide.icu-project.org/collation/concepts for additional details. BASE_I18N_EXPORT bool StringSearchIgnoringCaseAndAccents(const string16& find_this, - const string16& in_this); + const string16& in_this, + size_t* match_index, + size_t* match_length); } // namespace i18n } // namespace base #endif // BASE_I18N_STRING_SEARCH_H_ - diff --git a/base/i18n/string_search_unittest.cc b/base/i18n/string_search_unittest.cc index 9198ea4..e6ca1c5 100644 --- a/base/i18n/string_search_unittest.cc +++ b/base/i18n/string_search_unittest.cc @@ -26,23 +26,37 @@ TEST(StringSearchTest, ASCII) { if (locale_is_posix) SetICUDefaultLocale("en_US"); + size_t index = 0; + size_t length = 0; + EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"))); + ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(5U, length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( - ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"))); + ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), + &index, &length)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"))); + ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); + EXPECT_EQ(4U, index); + EXPECT_EQ(6U, length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( - ASCIIToUTF16("searching within empty string"), string16())); + ASCIIToUTF16("searching within empty string"), string16(), + &index, &length)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - string16(), ASCIIToUTF16("searching for empty string"))); + string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(0U, length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"))); + ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), + &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(18U, length); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); @@ -55,74 +69,112 @@ TEST(StringSearchTest, UnicodeLocaleIndependent) { const string16 a_base = WideToUTF16(L"a"); // Composed characters - const string16 e_with_accute_accent = WideToUTF16(L"\u00e9"); - const string16 E_with_accute_accent = WideToUTF16(L"\u00c9"); + const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); + const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); - const string16 a_with_accute_accent = WideToUTF16(L"\u00e1"); + const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); // Decomposed characters - const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301"); - const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301"); + const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); + const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); - const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301"); + const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); + size_t index = 0; + size_t length = 0; + EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_base, e_with_accute_accent)); + e_base, e_with_acute_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_accent, e_base)); + e_with_acute_accent, e_base, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_base.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_base, e_with_accute_combining_mark)); + e_base, e_with_acute_combining_mark, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_combining_mark, e_base)); + e_with_acute_combining_mark, e_base, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_base.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_combining_mark, e_with_accute_accent)); + e_with_acute_combining_mark, e_with_acute_accent, + &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_accent, e_with_accute_combining_mark)); + e_with_acute_accent, e_with_acute_combining_mark, + &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_combining_mark, e_with_grave_combining_mark)); + e_with_acute_combining_mark, e_with_grave_combining_mark, + &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_grave_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_grave_combining_mark, e_with_accute_combining_mark)); + e_with_grave_combining_mark, e_with_acute_combining_mark, + &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_accute_combining_mark, e_with_grave_accent)); + e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - e_with_grave_accent, e_with_accute_combining_mark)); + e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - E_with_accute_accent, e_with_accute_accent)); + E_with_acute_accent, e_with_acute_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - E_with_grave_accent, e_with_accute_accent)); + E_with_grave_accent, e_with_acute_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - E_with_accute_combining_mark, e_with_grave_accent)); + E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - E_with_grave_combining_mark, e_with_accute_accent)); + E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - E_base, e_with_grave_accent)); + E_base, e_with_grave_accent, &index, &length)); + EXPECT_EQ(0U, index); + EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( - a_with_accute_accent, e_with_accute_accent)); + a_with_acute_accent, e_with_acute_accent, &index, &length)); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( - a_with_accute_combining_mark, e_with_accute_combining_mark)); + a_with_acute_combining_mark, e_with_acute_combining_mark, + &index, &length)); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); @@ -136,17 +188,16 @@ TEST(StringSearchTest, UnicodeLocaleDependent) { const string16 a_with_ring = WideToUTF16(L"\u00e5"); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( - a_base, a_with_ring)); + a_base, a_with_ring, NULL, NULL)); const char* default_locale = uloc_getDefault(); SetICUDefaultLocale("da"); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( - a_base, a_with_ring)); + a_base, a_with_ring, NULL, NULL)); SetICUDefaultLocale(default_locale); } } // namespace i18n } // namespace base - diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc index a97bba2..b972189 100644 --- a/chrome/browser/bookmarks/bookmark_utils.cc +++ b/chrome/browser/bookmarks/bookmark_utils.cc @@ -155,8 +155,10 @@ bool MoreRecentlyModified(const BookmarkNode* n1, const BookmarkNode* n2) { bool DoesBookmarkTextContainWords(const string16& text, const std::vector<string16>& words) { for (size_t i = 0; i < words.size(); ++i) { - if (!base::i18n::StringSearchIgnoringCaseAndAccents(words[i], text)) + if (!base::i18n::StringSearchIgnoringCaseAndAccents( + words[i], text, NULL, NULL)) { return false; + } } return true; } diff --git a/content/browser/download/download_item_impl.cc b/content/browser/download/download_item_impl.cc index a4de17e..0534069 100644 --- a/content/browser/download/download_item_impl.cc +++ b/content/browser/download/download_item_impl.cc @@ -706,8 +706,10 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const { DCHECK_EQ(query, base::i18n::ToLower(query)); string16 url_raw(UTF8ToUTF16(GetURL().spec())); - if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_raw)) + if (base::i18n::StringSearchIgnoringCaseAndAccents( + query, url_raw, NULL, NULL)) { return true; + } // TODO(phajdan.jr): write a test case for the following code. // A good test case would be: @@ -718,13 +720,16 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const { languages = content::GetContentClient()->browser()->GetAcceptLangs( GetBrowserContext()); string16 url_formatted(net::FormatUrl(GetURL(), languages)); - if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_formatted)) + if (base::i18n::StringSearchIgnoringCaseAndAccents( + query, url_formatted, NULL, NULL)) { return true; + } // TODO(asanka): Change this to GetTargetFilePath() once DownloadQuery has // been modified to work with target paths. string16 path(GetFullPath().LossyDisplayName()); - return base::i18n::StringSearchIgnoringCaseAndAccents(query, path); + return base::i18n::StringSearchIgnoringCaseAndAccents( + query, path, NULL, NULL); } DownloadPersistentStoreInfo DownloadItemImpl::GetPersistentStoreInfo() const { |