base::i18n::StringSearch...() returns match index and length

Update base::i18n::StringSearchIgnoringCaseAndAccents() to take additional out-params for the index and length of the matched text. BUG=none TEST=none Review URL: https://chromiumcodereview.appspot.com/10910116 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@155629 0039d316-1c4b-4281-b951-d872f2087c98
author: derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-09-09 16:54:50 +0000
committer: derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-09-09 16:54:50 +0000
commit: 4de885681dfb6e305fdd58f2d9107d8df406d297 (patch)
tree: c37820bd04f1482ea84609523922e568ebc1402c
parent: 02e09be3d38307d02e63491521288a7448bb9d4a (diff)
download: chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.zip
chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.gz
chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.bz2
6 files changed, 147 insertions, 51 deletions
diff --git a/ash/shell/app_list.cc b/ash/shell/app_list.cc
index 0ad18aa..cd3a0fa 100644
--- a/ash/shell/app_list.cc
+++ b/ash/shell/app_list.cc
@@ -262,8 +262,10 @@ class ExampleAppListViewDelegate : public app_list::AppListViewDelegate {
           static_cast<WindowTypeLauncherItem::Type>(i);
 
       string16 title = UTF8ToUTF16(WindowTypeLauncherItem::GetTitle(type));
-      if (base::i18n::StringSearchIgnoringCaseAndAccents(query, title))
+      if (base::i18n::StringSearchIgnoringCaseAndAccents(
+              query, title, NULL, NULL)) {
         model_->results()->Add(new ExampleSearchResult(type, query));
+      }
     }
   }
 
diff --git a/base/i18n/string_search.cc b/base/i18n/string_search.cc
index f5fe95c..9dc84cad 100644
--- a/base/i18n/string_search.cc
+++ b/base/i18n/string_search.cc
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include "base/i18n/string_search.h"
+#include "base/logging.h"
 
 #include "unicode/usearch.h"
 
@@ -10,26 +11,51 @@ namespace {
 
 bool CollationSensitiveStringSearch(const string16& find_this,
                                     const string16& in_this,
-                                    UCollationStrength strength) {
+                                    UCollationStrength strength,
+                                    size_t* match_index,
+                                    size_t* match_length) {
   UErrorCode status = U_ZERO_ERROR;
 
-  UStringSearch* search = usearch_open(find_this.data(), -1, in_this.data(), -1,
-                                       uloc_getDefault(), NULL, &status);
+  UStringSearch* search = usearch_open(find_this.data(), -1,
+                                       in_this.data(), -1,
+                                       uloc_getDefault(),
+                                       NULL,  // breakiter
+                                       &status);
 
   // Default to basic substring search if usearch fails. According to
   // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
   // if either |find_this| or |in_this| are empty. In either case basic
   // substring search will give the correct return value.
-  if (!U_SUCCESS(status))
-    return in_this.find(find_this) != string16::npos;
+  if (!U_SUCCESS(status)) {
+    size_t index = in_this.find(find_this);
+    if (index == string16::npos) {
+      return false;
+    } else {
+      if (match_index)
+        *match_index = index;
+      if (match_length)
+        *match_length = find_this.size();
+      return true;
+    }
+  }
 
   UCollator* collator = usearch_getCollator(search);
   ucol_setStrength(collator, strength);
   usearch_reset(search);
 
-  bool result = usearch_first(search, &status) != USEARCH_DONE;
+  int32_t index = usearch_first(search, &status);
+  if (!U_SUCCESS(status) || index == USEARCH_DONE) {
+    usearch_close(search);
+    return false;
+  }
+
+  if (match_index)
+    *match_index = static_cast<size_t>(index);
+  if (match_length)
+    *match_length = static_cast<size_t>(usearch_getMatchedLength(search));
+
   usearch_close(search);
-  return result;
+  return true;
 }
 
 }  // namespace
@@ -38,8 +64,14 @@ namespace base {
 namespace i18n {
 
 bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
-                                        const string16& in_this) {
-  return CollationSensitiveStringSearch(find_this, in_this, UCOL_PRIMARY);
+                                        const string16& in_this,
+                                        size_t* match_index,
+                                        size_t* match_length) {
+  return CollationSensitiveStringSearch(find_this,
+                                        in_this,
+                                        UCOL_PRIMARY,
+                                        match_index,
+                                        match_length);
 }
 
 }  // namespace i18n
diff --git a/base/i18n/string_search.h b/base/i18n/string_search.h
index e198890..2069b0f 100644
--- a/base/i18n/string_search.h
+++ b/base/i18n/string_search.h
@@ -11,16 +11,20 @@
 namespace base {
 namespace i18n {
 
-// Returns true if |in_this| contains |find_this|. Only differences between base
-// letters are taken into consideration. Case and accent differences are
-// ignored. Please refer to 'primary level' in
+// Returns true if |in_this| contains |find_this|. If |match_index| or
+// |match_length| are non-NULL, they are assigned the start position and total
+// length of the match.
+//
+// Only differences between base letters are taken into consideration. Case and
+// accent differences are ignored. Please refer to 'primary level' in
 // http://userguide.icu-project.org/collation/concepts for additional details.
 BASE_I18N_EXPORT
     bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
-                                            const string16& in_this);
+                                            const string16& in_this,
+                                            size_t* match_index,
+                                            size_t* match_length);
 
 }  // namespace i18n
 }  // namespace base
 
 #endif  // BASE_I18N_STRING_SEARCH_H_
-
diff --git a/base/i18n/string_search_unittest.cc b/base/i18n/string_search_unittest.cc
index 9198ea4..e6ca1c5 100644
--- a/base/i18n/string_search_unittest.cc
+++ b/base/i18n/string_search_unittest.cc
@@ -26,23 +26,37 @@ TEST(StringSearchTest, ASCII) {
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");
 
+  size_t index = 0;
+  size_t length = 0;
+
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));
+      ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(5U, length);
 
   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
-      ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o")));
+      ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
+      &index, &length));
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));
+      ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
+  EXPECT_EQ(4U, index);
+  EXPECT_EQ(6U, length);
 
   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
-      ASCIIToUTF16("searching within empty string"), string16()));
+      ASCIIToUTF16("searching within empty string"), string16(),
+      &index, &length));
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      string16(), ASCIIToUTF16("searching for empty string")));
+      string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(0U, length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));
+      ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
+      &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(18U, length);
 
   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
@@ -55,74 +69,112 @@ TEST(StringSearchTest, UnicodeLocaleIndependent) {
   const string16 a_base = WideToUTF16(L"a");
 
   // Composed characters
-  const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
-  const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
+  const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
+  const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
-  const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");
+  const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
 
   // Decomposed characters
-  const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
-  const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
+  const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
+  const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
-  const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");
+  const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
 
   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");
 
+  size_t index = 0;
+  size_t length = 0;
+
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_base, e_with_accute_accent));
+      e_base, e_with_acute_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_accent, e_base));
+      e_with_acute_accent, e_base, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_base.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_base, e_with_accute_combining_mark));
+      e_base, e_with_acute_combining_mark, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_combining_mark, e_base));
+      e_with_acute_combining_mark, e_base, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_base.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_combining_mark, e_with_accute_accent));
+      e_with_acute_combining_mark, e_with_acute_accent,
+      &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_accent, e_with_accute_combining_mark));
+      e_with_acute_accent, e_with_acute_combining_mark,
+      &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_combining_mark, e_with_grave_combining_mark));
+      e_with_acute_combining_mark, e_with_grave_combining_mark,
+      &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_grave_combining_mark.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_grave_combining_mark, e_with_accute_combining_mark));
+      e_with_grave_combining_mark, e_with_acute_combining_mark,
+      &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_accute_combining_mark, e_with_grave_accent));
+      e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_grave_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      e_with_grave_accent, e_with_accute_combining_mark));
+      e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      E_with_accute_accent, e_with_accute_accent));
+      E_with_acute_accent, e_with_acute_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      E_with_grave_accent, e_with_accute_accent));
+      E_with_grave_accent, e_with_acute_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      E_with_accute_combining_mark, e_with_grave_accent));
+      E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_grave_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      E_with_grave_combining_mark, e_with_accute_accent));
+      E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_acute_accent.size(), length);
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      E_base, e_with_grave_accent));
+      E_base, e_with_grave_accent, &index, &length));
+  EXPECT_EQ(0U, index);
+  EXPECT_EQ(e_with_grave_accent.size(), length);
 
   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
-      a_with_accute_accent, e_with_accute_accent));
+      a_with_acute_accent, e_with_acute_accent, &index, &length));
 
   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
-      a_with_accute_combining_mark, e_with_accute_combining_mark));
+      a_with_acute_combining_mark, e_with_acute_combining_mark,
+      &index, &length));
 
   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
@@ -136,17 +188,16 @@ TEST(StringSearchTest, UnicodeLocaleDependent) {
   const string16 a_with_ring = WideToUTF16(L"\u00e5");
 
   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
-      a_base, a_with_ring));
+      a_base, a_with_ring, NULL, NULL));
 
   const char* default_locale = uloc_getDefault();
   SetICUDefaultLocale("da");
 
   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
-      a_base, a_with_ring));
+      a_base, a_with_ring, NULL, NULL));
 
   SetICUDefaultLocale(default_locale);
 }
 
 }  // namespace i18n
 }  // namespace base
-
diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc
index a97bba2..b972189 100644
--- a/chrome/browser/bookmarks/bookmark_utils.cc
+++ b/chrome/browser/bookmarks/bookmark_utils.cc
@@ -155,8 +155,10 @@ bool MoreRecentlyModified(const BookmarkNode* n1, const BookmarkNode* n2) {
 bool DoesBookmarkTextContainWords(const string16& text,
                                   const std::vector<string16>& words) {
   for (size_t i = 0; i < words.size(); ++i) {
-    if (!base::i18n::StringSearchIgnoringCaseAndAccents(words[i], text))
+    if (!base::i18n::StringSearchIgnoringCaseAndAccents(
+            words[i], text, NULL, NULL)) {
       return false;
+    }
   }
   return true;
 }
diff --git a/content/browser/download/download_item_impl.cc b/content/browser/download/download_item_impl.cc
index a4de17e..0534069 100644
--- a/content/browser/download/download_item_impl.cc
+++ b/content/browser/download/download_item_impl.cc
@@ -706,8 +706,10 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const {
   DCHECK_EQ(query, base::i18n::ToLower(query));
 
   string16 url_raw(UTF8ToUTF16(GetURL().spec()));
-  if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_raw))
+  if (base::i18n::StringSearchIgnoringCaseAndAccents(
+          query, url_raw, NULL, NULL)) {
     return true;
+  }
 
   // TODO(phajdan.jr): write a test case for the following code.
   // A good test case would be:
@@ -718,13 +720,16 @@ bool DownloadItemImpl::MatchesQuery(const string16& query) const {
   languages = content::GetContentClient()->browser()->GetAcceptLangs(
       GetBrowserContext());
   string16 url_formatted(net::FormatUrl(GetURL(), languages));
-  if (base::i18n::StringSearchIgnoringCaseAndAccents(query, url_formatted))
+  if (base::i18n::StringSearchIgnoringCaseAndAccents(
+          query, url_formatted, NULL, NULL)) {
     return true;
+  }
 
   // TODO(asanka): Change this to GetTargetFilePath() once DownloadQuery has
   //               been modified to work with target paths.
   string16 path(GetFullPath().LossyDisplayName());
-  return base::i18n::StringSearchIgnoringCaseAndAccents(query, path);
+  return base::i18n::StringSearchIgnoringCaseAndAccents(
+      query, path, NULL, NULL);
 }
 
 DownloadPersistentStoreInfo DownloadItemImpl::GetPersistentStoreInfo() const {
author	derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-09-09 16:54:50 +0000
committer	derat@chromium.org <derat@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-09-09 16:54:50 +0000
commit	4de885681dfb6e305fdd58f2d9107d8df406d297 (patch)
tree	c37820bd04f1482ea84609523922e568ebc1402c
parent	02e09be3d38307d02e63491521288a7448bb9d4a (diff)
download	chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.zip chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.gz chromium_src-4de885681dfb6e305fdd58f2d9107d8df406d297.tar.bz2