summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-12 08:31:36 +0000
committerhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-12 08:31:36 +0000
commit3849848ac63a0bfcc9f45acbe1738a099ba995ee (patch)
tree11d8b1d94c69acd8abb9b84e173fd0976e562e2a
parent33d761c17a24919bab654dd076ba7d5bad3c7f08 (diff)
downloadchromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.zip
chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.gz
chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.bz2
A fix for Issue 46278.
When an ICU ruleset defines a dictionary value "$dictionary = [:LineBreak = Complex_Context:]", ICU expects we use this value in breaking text. On the other hand, when we do not use this value, ubrk_next() gets stuck in an infinite loop and it causes Issue 46278. To fix this issue, we define |$dictionary| only for the languages we need the value, e.g. Korean and Thai. BUG=46278 TEST=unit_test.exe --gtest_filter=SpellcheckWordIteratorTest.RuleConsistency Review URL: http://codereview.chromium.org/3108003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55856 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/renderer/spellchecker/spellcheck_worditerator.cc17
-rw-r--r--chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc23
2 files changed, 31 insertions, 9 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
index 8ed6ef7..1c6b056 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
@@ -67,9 +67,8 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
"$Numeric = [\\p{Word_Break = Numeric}];"
"$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
- "$dictionary = [:LineBreak = Complex_Context:];"
"$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
- "$ALetterPlus = %s;"
+ "%s"
"$KatakanaEx = $Katakana ($Extend | $Format)*;"
"$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
@@ -117,13 +116,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
"($Extend | $Format)+ .?;"
"($MidLetter | $MidNumLet) $BackALetterEx;"
"($MidNum | $MidNumLet) $BackNumericEx;"
- "$dictionary $dictionary;"
"!!safe_forward;"
"($Extend | $Format)+ .?;"
"($MidLetterEx | $MidNumLetEx) $ALetterEx;"
- "($MidNumEx | $MidNumLetEx) $NumericEx;"
- "$dictionary $dictionary;";
+ "($MidNumEx | $MidNumLetEx) $NumericEx;";
// Retrieve the script codes used by the given language from ICU. When the
// given language consists of two or more scripts, we just use the first
@@ -145,8 +142,10 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
if (!aletter)
aletter = "Latin";
- const char kWithDictionary[] = "[$ALetter [$dictionary-$Extend-$Control]]";
- const char kWithoutDictionary[] = "$ALetter";
+ const char kWithDictionary[] =
+ "$dictionary = [:LineBreak = Complex_Context:];"
+ "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";
+ const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";
const char* aletter_plus = kWithoutDictionary;
if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
aletter_plus = kWithDictionary;
@@ -158,9 +157,9 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
"$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};";
const char kDisallowContraction[] = "";
- ruleset_allow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate,
+ ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
aletter, aletter_plus, kAllowContraction));
- ruleset_disallow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate,
+ ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
aletter, aletter_plus, kDisallowContraction));
}
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
index 43af29f..da279fa 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
@@ -129,3 +129,26 @@ TEST(SpellcheckWordIteratorTest, SplitWord) {
}
}
+// Tests whether our SpellcheckWordIterator extracts an empty word without
+// getting stuck in an infinite loop when inputting a Khmer text. (This is a
+// regression test for Issue 46278.)
+TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {
+ SpellcheckCharAttribute attributes;
+ attributes.SetDefaultLanguage("en-US");
+
+ const wchar_t kTestText[] = L"\x1791\x17c1\x002e";
+ string16 input(WideToUTF16(kTestText));
+
+ SpellcheckWordIterator iterator;
+ EXPECT_TRUE(iterator.Initialize(&attributes, input.c_str(), input.length(),
+ true));
+
+ // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following
+ // iterator.GetNextWord() call gets stuck in an infinite loop. Therefore, this
+ // test succeeds if this call returns without timeouts.
+ string16 actual_word;
+ int actual_start, actual_end;
+ EXPECT_FALSE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));
+ EXPECT_EQ(0, actual_start);
+ EXPECT_EQ(0, actual_end);
+}