diff options
5 files changed, 260 insertions, 48 deletions
diff --git a/chrome/browser/spellchecker/spelling_service_client.cc b/chrome/browser/spellchecker/spelling_service_client.cc index a0c1381..f05cbc1 100644 --- a/chrome/browser/spellchecker/spelling_service_client.cc +++ b/chrome/browser/spellchecker/spelling_service_client.cc @@ -4,6 +4,8 @@ #include "chrome/browser/spellchecker/spelling_service_client.h" +#include <algorithm> + #include "base/json/json_reader.h" #include "base/json/string_escape.h" #include "base/logging.h" @@ -66,8 +68,16 @@ bool SpellingServiceClient::RequestTextCheck( &language_code, &country_code); + // Replace typographical apostrophes with typewriter apostrophes, so that + // server word breaker behaves correctly. + const base::char16 kApostrophe = 0x27; + const base::char16 kRightSingleQuotationMark = 0x2019; + base::string16 text_copy = text; + std::replace(text_copy.begin(), text_copy.end(), kRightSingleQuotationMark, + kApostrophe); + // Format the JSON request to be sent to the Spelling service. - std::string encoded_text = base::GetQuotedJSONString(text); + std::string encoded_text = base::GetQuotedJSONString(text_copy); static const char kSpellingRequest[] = "{" diff --git a/chrome/browser/spellchecker/spelling_service_client_unittest.cc b/chrome/browser/spellchecker/spelling_service_client_unittest.cc index e1f805f..cdc16cc 100644 --- a/chrome/browser/spellchecker/spelling_service_client_unittest.cc +++ b/chrome/browser/spellchecker/spelling_service_client_unittest.cc @@ -34,14 +34,14 @@ class TestSpellingURLFetcher : public net::TestURLFetcher { const GURL& url, net::URLFetcherDelegate* d, int version, - const std::string& text, + const std::string& sanitized_text, const std::string& language, int status, const std::string& response) : net::TestURLFetcher(0, url, d), version_(base::StringPrintf("v%d", version)), language_(language.empty() ? std::string("en") : language), - text_(text) { + sanitized_text_(sanitized_text) { set_response_code(status); SetResponseString(response); } @@ -64,9 +64,9 @@ class TestSpellingURLFetcher : public net::TestURLFetcher { std::string version; EXPECT_TRUE(value->GetString("apiVersion", &version)); EXPECT_EQ(version_, version); - std::string text; - EXPECT_TRUE(value->GetString("params.text", &text)); - EXPECT_EQ(text_, text); + std::string sanitized_text; + EXPECT_TRUE(value->GetString("params.text", &sanitized_text)); + EXPECT_EQ(sanitized_text_, sanitized_text); std::string language; EXPECT_TRUE(value->GetString("params.language", &language)); EXPECT_EQ(language_, language); @@ -106,7 +106,7 @@ class TestSpellingURLFetcher : public net::TestURLFetcher { std::string version_; std::string language_; std::string country_; - std::string text_; + std::string sanitized_text_; }; // A class derived from the SpellingServiceClient class used by the @@ -124,10 +124,10 @@ class TestingSpellingServiceClient : public SpellingServiceClient { ~TestingSpellingServiceClient() override {} void SetHTTPRequest(int type, - const std::string& text, + const std::string& sanitized_text, const std::string& language) { request_type_ = type; - request_text_ = text; + sanitized_request_text_ = sanitized_text; request_language_ = language; } @@ -151,8 +151,7 @@ class TestingSpellingServiceClient : public SpellingServiceClient { const base::string16& request_text, const std::vector<SpellCheckResult>& results) { EXPECT_EQ(success_, success); - base::string16 text(base::UTF8ToUTF16(request_text_)); - EXPECT_EQ(text, request_text); + base::string16 text(base::UTF8ToUTF16(sanitized_request_text_)); for (std::vector<SpellCheckResult>::const_iterator it = results.begin(); it != results.end(); ++it) { text.replace(it->location, it->length, it->replacement); @@ -168,15 +167,14 @@ class TestingSpellingServiceClient : public SpellingServiceClient { private: scoped_ptr<net::URLFetcher> CreateURLFetcher(const GURL& url) override { EXPECT_EQ("https://www.googleapis.com/rpc", url.spec()); - fetcher_ = new TestSpellingURLFetcher(0, url, this, - request_type_, request_text_, - request_language_, - response_status_, response_data_); + fetcher_ = new TestSpellingURLFetcher( + 0, url, this, request_type_, sanitized_request_text_, request_language_, + response_status_, response_data_); return scoped_ptr<net::URLFetcher>(fetcher_); } int request_type_; - std::string request_text_; + std::string sanitized_request_text_; std::string request_language_; int response_status_; std::string response_data_; @@ -217,7 +215,8 @@ class SpellingServiceClientTest : public testing::Test { // misspelled words, |corrected_text| should be equal to |request_text|.) TEST_F(SpellingServiceClientTest, RequestTextCheck) { static const struct { - const char* request_text; + const wchar_t* request_text; + const char* sanitized_request_text; SpellingServiceClient::ServiceType request_type; int response_status; const char* response_data; @@ -226,6 +225,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { const char* language; } kTests[] = { { + L"", "", SpellingServiceClient::SUGGEST, 500, @@ -234,6 +234,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { "", "af", }, { + L"chromebook", "chromebook", SpellingServiceClient::SUGGEST, 200, @@ -242,6 +243,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { "chromebook", "af", }, { + L"chrombook", "chrombook", SpellingServiceClient::SUGGEST, 200, @@ -261,6 +263,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { "chromebook", "af", }, { + L"", "", SpellingServiceClient::SPELLCHECK, 500, @@ -269,6 +272,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { "", "en", }, { + L"I have been to USA.", "I have been to USA.", SpellingServiceClient::SPELLCHECK, 200, @@ -277,6 +281,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { "I have been to USA.", "en", }, { + L"I have bean to USA.", "I have bean to USA.", SpellingServiceClient::SPELLCHECK, 200, @@ -295,6 +300,27 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { true, "I have been to USA.", "en", + }, { + L"I\x2019mattheIn'n'Out.", + "I'mattheIn'n'Out.", + SpellingServiceClient::SPELLCHECK, + 200, + "{\n" + " \"result\": {\n" + " \"spellingCheckResponse\": {\n" + " \"misspellings\": [{\n" + " \"charStart\": 0,\n" + " \"charLength\": 16,\n" + " \"suggestions\":" + " [{ \"suggestion\": \"I'm at the In'N'Out\" }],\n" + " \"canAutoCorrect\": false\n" + " }]\n" + " }\n" + " }\n" + "}", + true, + "I'm at the In'N'Out.", + "en", }, }; @@ -303,7 +329,8 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { pref->SetBoolean(prefs::kSpellCheckUseSpellingService, true); for (size_t i = 0; i < arraysize(kTests); ++i) { - client_.SetHTTPRequest(kTests[i].request_type, kTests[i].request_text, + client_.SetHTTPRequest(kTests[i].request_type, + kTests[i].sanitized_request_text, kTests[i].language); client_.SetHTTPResponse(kTests[i].response_status, kTests[i].response_data); client_.SetExpectedTextCheckResult(kTests[i].success, @@ -312,7 +339,7 @@ TEST_F(SpellingServiceClientTest, RequestTextCheck) { client_.RequestTextCheck( &profile_, kTests[i].request_type, - base::ASCIIToUTF16(kTests[i].request_text), + base::WideToUTF16(kTests[i].request_text), base::Bind(&SpellingServiceClientTest::OnTextCheckComplete, base::Unretained(this), 0)); client_.CallOnURLFetchComplete(); diff --git a/chrome/renderer/spellchecker/spellcheck.cc b/chrome/renderer/spellchecker/spellcheck.cc index a43c727..fecb307 100644 --- a/chrome/renderer/spellchecker/spellcheck.cc +++ b/chrome/renderer/spellchecker/spellcheck.cc @@ -97,6 +97,29 @@ bool DocumentMarkersRemover::Visit(content::RenderView* render_view) { return true; } +bool IsApostrophe(base::char16 c) { + const base::char16 kApostrophe = 0x27; + const base::char16 kRightSingleQuotationMark = 0x2019; + return c == kApostrophe || c == kRightSingleQuotationMark; +} + +// Makes sure that the apostrophes in the |spelling_suggestion| are the same +// type as in the |misspelled_word| and in the same order. Ignore differences in +// the number of apostrophes. +void PreserveOriginalApostropheTypes(const base::string16& misspelled_word, + base::string16* spelling_suggestion) { + auto it = spelling_suggestion->begin(); + for (const base::char16& c : misspelled_word) { + if (IsApostrophe(c)) { + it = std::find_if(it, spelling_suggestion->end(), IsApostrophe); + if (it == spelling_suggestion->end()) + return; + + *it++ = c; + } + } +} + } // namespace class SpellCheck::SpellcheckRequest { @@ -383,33 +406,44 @@ void SpellCheck::CreateTextCheckingResults( const base::string16& line_text, const std::vector<SpellCheckResult>& spellcheck_results, WebVector<WebTextCheckingResult>* textcheck_results) { - // Double-check misspelled words with our spellchecker and attach grammar - // markers to them if our spellchecker tells they are correct words, i.e. they - // are probably contextually-misspelled words. - const base::char16* text = line_text.c_str(); - std::vector<WebTextCheckingResult> list; - for (size_t i = 0; i < spellcheck_results.size(); ++i) { - SpellCheckResult::Decoration decoration = spellcheck_results[i].decoration; - int word_location = spellcheck_results[i].location; - int word_length = spellcheck_results[i].length; - int misspelling_start = 0; - int misspelling_length = 0; - if (decoration == SpellCheckResult::SPELLING && - filter == USE_NATIVE_CHECKER) { - if (SpellCheckWord(text + word_location, word_length, 0, - &misspelling_start, &misspelling_length, NULL)) { - decoration = SpellCheckResult::GRAMMAR; - } + std::vector<WebTextCheckingResult> results; + for (const SpellCheckResult& spellcheck_result : spellcheck_results) { + const base::string16& misspelled_word = + line_text.substr(spellcheck_result.location, spellcheck_result.length); + + // Ignore words in custom dictionary. + if (custom_dictionary_.SpellCheckWord(misspelled_word, 0, + misspelled_word.length())) { + continue; } - if (!custom_dictionary_.SpellCheckWord( - line_text, word_location, word_length)) { - list.push_back(WebTextCheckingResult( - static_cast<WebTextDecorationType>(decoration), - word_location + line_offset, - word_length, - spellcheck_results[i].replacement, - spellcheck_results[i].hash)); + + // Use the same types of appostrophes as in the mispelled word. + base::string16 replacement = spellcheck_result.replacement; + PreserveOriginalApostropheTypes(misspelled_word, &replacement); + + // Ignore misspellings due the typographical apostrophe. + if (misspelled_word == replacement) + continue; + + // Double-check misspelled words with out spellchecker and attach grammar + // markers to them if our spellchecker tells us they are correct words, + // i.e. they are probably contextually-misspelled words. + SpellCheckResult::Decoration decoration = spellcheck_result.decoration; + int unused_misspelling_start = 0; + int unused_misspelling_length = 0; + if (decoration == SpellCheckResult::SPELLING && + filter == USE_NATIVE_CHECKER && + SpellCheckWord(misspelled_word.c_str(), misspelled_word.length(), 0, + &unused_misspelling_start, &unused_misspelling_length, + nullptr)) { + decoration = SpellCheckResult::GRAMMAR; } + + results.push_back(WebTextCheckingResult( + static_cast<WebTextDecorationType>(decoration), + line_offset + spellcheck_result.location, spellcheck_result.length, + replacement, spellcheck_result.hash)); } - textcheck_results->assign(list); + + textcheck_results->assign(results); } diff --git a/chrome/renderer/spellchecker/spellcheck_unittest.cc b/chrome/renderer/spellchecker/spellcheck_unittest.cc index 34886aa..0e36c24 100644 --- a/chrome/renderer/spellchecker/spellcheck_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_unittest.cc @@ -17,6 +17,8 @@ #include "third_party/WebKit/public/web/WebTextCheckingCompletion.h" #include "third_party/WebKit/public/web/WebTextCheckingResult.h" +#define TYPOGRAPHICAL_APOSTROPHE L"\x2019" + namespace { base::FilePath GetHunspellDirectory() { @@ -80,6 +82,10 @@ class SpellCheckTest : public testing::Test { base::ASCIIToUTF16(word), tag); } + bool IsValidContraction(const base::string16& word, int tag) { + return spell_check_->spellcheck_.IsValidContraction(word, tag); + } + #if !defined(OS_MACOSX) protected: void TestSpellCheckParagraph( @@ -201,7 +207,7 @@ TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) { // A valid English contraction {L"isn't", true}, // A valid English contraction with a typographical apostrophe. - {L"isn\x2019t", true}, + {L"isn" TYPOGRAPHICAL_APOSTROPHE L"t", true}, // A valid English word enclosed with underscores. {L"_hello_", true}, @@ -1140,7 +1146,7 @@ TEST_F(SpellCheckTest, CreateTextCheckingResults) { text, spellcheck_results, &textcheck_results); - EXPECT_EQ(spellcheck_results.size(), textcheck_results.size()); + ASSERT_EQ(spellcheck_results.size(), textcheck_results.size()); EXPECT_EQ(blink::WebTextDecorationTypeSpelling, textcheck_results[0].decoration); EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location); @@ -1160,12 +1166,78 @@ TEST_F(SpellCheckTest, CreateTextCheckingResults) { text, spellcheck_results, &textcheck_results); - EXPECT_EQ(spellcheck_results.size(), textcheck_results.size()); + ASSERT_EQ(spellcheck_results.size(), textcheck_results.size()); EXPECT_EQ(blink::WebTextDecorationTypeGrammar, textcheck_results[0].decoration); EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location); EXPECT_EQ(spellcheck_results[0].length, textcheck_results[0].length); } + + // Verify that the SpellCheck preserves the original apostrophe type in the + // checked text, regardless of the type of apostrophe the browser returns. + { + base::string16 text = base::WideToUTF16( + L"Ik've havn" TYPOGRAPHICAL_APOSTROPHE L"t ni'n" + TYPOGRAPHICAL_APOSTROPHE L"out-s I've I" TYPOGRAPHICAL_APOSTROPHE + L"ve"); + std::vector<SpellCheckResult> spellcheck_results; + + // All typewriter apostrophe results. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 0, 5, base::UTF8ToUTF16("I've"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 6, 6, base::UTF8ToUTF16("haven't"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 13, 10, base::UTF8ToUTF16("in'n'out's"))); + + // Replacements that differ only by apostrophe type should be ignored. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 24, 4, base::UTF8ToUTF16("I've"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 29, 4, base::UTF8ToUTF16("I've"))); + + // All typographical apostrophe results. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 0, 5, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 6, 6, + base::WideToUTF16(L"haven" TYPOGRAPHICAL_APOSTROPHE L"t"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 13, 10, base::WideToUTF16( + L"in" TYPOGRAPHICAL_APOSTROPHE L"n" TYPOGRAPHICAL_APOSTROPHE L"out" + TYPOGRAPHICAL_APOSTROPHE L"s"))); + + // Replacements that differ only by apostrophe type should be ignored. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 24, 4, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 29, 4, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + + blink::WebVector<blink::WebTextCheckingResult> textcheck_results; + spell_check()->CreateTextCheckingResults(SpellCheck::USE_NATIVE_CHECKER, 0, + text, spellcheck_results, + &textcheck_results); + + static const wchar_t* kExpectedReplacements[] = { + L"I've", + L"haven" TYPOGRAPHICAL_APOSTROPHE L"t", + L"in'n" TYPOGRAPHICAL_APOSTROPHE L"out's", + L"I've", + L"haven" TYPOGRAPHICAL_APOSTROPHE L"t", + L"in'n" TYPOGRAPHICAL_APOSTROPHE L"out" TYPOGRAPHICAL_APOSTROPHE L"s", + }; + + ASSERT_EQ(arraysize(kExpectedReplacements), textcheck_results.size()); + for (size_t i = 0; i < arraysize(kExpectedReplacements); ++i) { + EXPECT_EQ(base::WideToUTF16(kExpectedReplacements[i]), + textcheck_results[i].replacement) + << "i=" << i << "\nactual: \"" + << base::string16(textcheck_results[i].replacement) << "\""; + } + } } #endif @@ -1373,3 +1445,24 @@ TEST_F(SpellCheckTest, LogicalSuggestions) { EXPECT_EQ(suggestions[0], base::ASCIIToUTF16(kTestCases[i].suggestion)); } } + +// Words with apostrophes should be valid contractions. +TEST_F(SpellCheckTest, IsValidContraction) { + static const char* kLanguages[] = { + "en-AU", + "en-CA", + "en-GB", + "en-US", + }; + + static const wchar_t* kWords[] = { + L"in'n'out", + L"in" TYPOGRAPHICAL_APOSTROPHE L"n" TYPOGRAPHICAL_APOSTROPHE L"out", + }; + + for (size_t i = 0; i < arraysize(kLanguages); ++i) { + ReinitializeSpellCheck(kLanguages[i]); + for (size_t j = 0; j < arraysize(kWords); ++j) + EXPECT_TRUE(IsValidContraction(base::WideToUTF16(kWords[j]), 0)); + } +} diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc index d45470c..491a8f2 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc @@ -229,6 +229,54 @@ TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { } } +// Vertify SpellcheckWordIterator treats typographical apostrophe as a part of +// the word. +TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { + static const struct { + const char* language; + const wchar_t* word; + } kTestCases[] = { + // Typewriter apostrophe: + { + "en-AU", L"you're" + }, { + "en-CA", L"you're" + }, { + "en-GB", L"you're" + }, { + "en-US", L"you're" + }, + // Typographical apostrophe: + { + "en-AU", L"you\x2019re" + }, { + "en-CA", L"you\x2019re" + }, { + "en-GB", L"you\x2019re" + }, { + "en-US", L"you\x2019re" + }, + }; + + for (size_t i = 0; i < arraysize(kTestCases); ++i) { + SpellcheckCharAttribute attributes; + attributes.SetDefaultLanguage(kTestCases[i].language); + + base::string16 input_word(base::WideToUTF16(kTestCases[i].word)); + SpellcheckWordIterator iterator; + EXPECT_TRUE(iterator.Initialize(&attributes, true)); + EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); + + base::string16 actual_word; + int actual_start, actual_end; + EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end)); + EXPECT_EQ(input_word, actual_word); + EXPECT_EQ(0, actual_start); + EXPECT_EQ(input_word.length(), + static_cast<base::string16::size_type>(actual_end)); + } +} + TEST(SpellcheckWordIteratorTest, Initialization) { // Test initialization works when a default language is set. { |