A fix for Issue 46278.

When an ICU ruleset defines a dictionary value "$dictionary = [:LineBreak = Complex_Context:]", ICU expects we use this value in breaking text. On the other hand, when we do not use this value, ubrk_next() gets stuck in an infinite loop and it causes Issue 46278. To fix this issue, we define |$dictionary| only for the languages we need the value, e.g. Korean and Thai. BUG=46278 TEST=unit_test.exe --gtest_filter=SpellcheckWordIteratorTest.RuleConsistency Review URL: http://codereview.chromium.org/3108003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55856 0039d316-1c4b-4281-b951-d872f2087c98
author: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-12 08:31:36 +0000
committer: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-12 08:31:36 +0000
commit: 3849848ac63a0bfcc9f45acbe1738a099ba995ee (patch)
tree: 11d8b1d94c69acd8abb9b84e173fd0976e562e2a
parent: 33d761c17a24919bab654dd076ba7d5bad3c7f08 (diff)
download: chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.zip
chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.gz
chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.bz2
2 files changed, 31 insertions, 9 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
index 8ed6ef7..1c6b056 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
@@ -67,9 +67,8 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
       "$Numeric      = [\\p{Word_Break = Numeric}];"
       "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
 
-      "$dictionary   = [:LineBreak = Complex_Context:];"
       "$Control        = [\\p{Grapheme_Cluster_Break = Control}]; "
-      "$ALetterPlus  = %s;"
+      "%s"
 
       "$KatakanaEx     = $Katakana     ($Extend |  $Format)*;"
       "$ALetterEx      = $ALetterPlus  ($Extend |  $Format)*;"
@@ -117,13 +116,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
       "($Extend | $Format)+ .?;"
       "($MidLetter | $MidNumLet) $BackALetterEx;"
       "($MidNum | $MidNumLet) $BackNumericEx;"
-      "$dictionary $dictionary;"
 
       "!!safe_forward;"
       "($Extend | $Format)+ .?;"
       "($MidLetterEx | $MidNumLetEx) $ALetterEx;"
-      "($MidNumEx | $MidNumLetEx) $NumericEx;"
-      "$dictionary $dictionary;";
+      "($MidNumEx | $MidNumLetEx) $NumericEx;";
 
   // Retrieve the script codes used by the given language from ICU. When the
   // given language consists of two or more scripts, we just use the first
@@ -145,8 +142,10 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
   if (!aletter)
     aletter = "Latin";
 
-  const char kWithDictionary[] = "[$ALetter [$dictionary-$Extend-$Control]]";
-  const char kWithoutDictionary[] = "$ALetter";
+  const char kWithDictionary[] =
+      "$dictionary   = [:LineBreak = Complex_Context:];"
+      "$ALetterPlus  = [$ALetter [$dictionary-$Extend-$Control]];";
+  const char kWithoutDictionary[] = "$ALetterPlus  = $ALetter;";
   const char* aletter_plus = kWithoutDictionary;
   if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
     aletter_plus = kWithDictionary;
@@ -158,9 +157,9 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
       "$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};";
   const char kDisallowContraction[] = "";
 
-  ruleset_allow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate,
+  ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
       aletter, aletter_plus, kAllowContraction));
-  ruleset_disallow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate,
+  ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
       aletter, aletter_plus, kDisallowContraction));
 }
 
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
index 43af29f..da279fa 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
@@ -129,3 +129,26 @@ TEST(SpellcheckWordIteratorTest, SplitWord) {
   }
 }
 
+// Tests whether our SpellcheckWordIterator extracts an empty word without
+// getting stuck in an infinite loop when inputting a Khmer text. (This is a
+// regression test for Issue 46278.)
+TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {
+  SpellcheckCharAttribute attributes;
+  attributes.SetDefaultLanguage("en-US");
+
+  const wchar_t kTestText[] = L"\x1791\x17c1\x002e";
+  string16 input(WideToUTF16(kTestText));
+
+  SpellcheckWordIterator iterator;
+  EXPECT_TRUE(iterator.Initialize(&attributes, input.c_str(), input.length(),
+                                  true));
+
+  // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following
+  // iterator.GetNextWord() call gets stuck in an infinite loop. Therefore, this
+  // test succeeds if this call returns without timeouts.
+  string16 actual_word;
+  int actual_start, actual_end;
+  EXPECT_FALSE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));
+  EXPECT_EQ(0, actual_start);
+  EXPECT_EQ(0, actual_end);
+}
author	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-12 08:31:36 +0000
committer	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-12 08:31:36 +0000
commit	3849848ac63a0bfcc9f45acbe1738a099ba995ee (patch)
tree	11d8b1d94c69acd8abb9b84e173fd0976e562e2a
parent	33d761c17a24919bab654dd076ba7d5bad3c7f08 (diff)
download	chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.zip chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.gz chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.bz2