diff options
author | sidchat@google.com <sidchat@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-09-17 20:02:04 +0000 |
---|---|---|
committer | sidchat@google.com <sidchat@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-09-17 20:02:04 +0000 |
commit | 63271da3b3740aadd3bd5d186bf7bd2b33ee6347 (patch) | |
tree | 43ab35878220b56a78a8dda05b775df87ddacc4e /chrome | |
parent | 64b421f984b2dfe04546729f8b35648cac3d0f20 (diff) | |
download | chromium_src-63271da3b3740aadd3bd5d186bf7bd2b33ee6347.zip chromium_src-63271da3b3740aadd3bd5d186bf7bd2b33ee6347.tar.gz chromium_src-63271da3b3740aadd3bd5d186bf7bd2b33ee6347.tar.bz2 |
Add support for "Add to dictionary" in the context menu.
Review URL: http://codereview.chromium.org/2446
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@2322 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/browser/profile.cc | 2 | ||||
-rw-r--r-- | chrome/browser/render_process_host.cc | 7 | ||||
-rw-r--r-- | chrome/browser/render_process_host.h | 3 | ||||
-rw-r--r-- | chrome/browser/render_view_context_menu.cc | 8 | ||||
-rw-r--r-- | chrome/browser/render_view_context_menu.h | 2 | ||||
-rw-r--r-- | chrome/browser/render_view_context_menu_controller.cc | 7 | ||||
-rw-r--r-- | chrome/browser/render_view_host.cc | 4 | ||||
-rw-r--r-- | chrome/browser/render_view_host.h | 1 | ||||
-rw-r--r-- | chrome/browser/resource_message_filter.cc | 13 | ||||
-rw-r--r-- | chrome/browser/spellchecker.cc | 84 | ||||
-rw-r--r-- | chrome/browser/spellchecker.h | 18 | ||||
-rw-r--r-- | chrome/browser/web_contents.cc | 12 | ||||
-rw-r--r-- | chrome/browser/web_contents.h | 1 | ||||
-rw-r--r-- | chrome/common/chrome_constants.cc | 1 | ||||
-rw-r--r-- | chrome/common/chrome_constants.h | 1 | ||||
-rw-r--r-- | chrome/renderer/spellcheck_unittest.cc | 152 | ||||
-rw-r--r-- | chrome/third_party/hunspell/src/hunspell/hashmgr.cxx | 1845 | ||||
-rw-r--r-- | chrome/third_party/hunspell/src/hunspell/hashmgr.hxx | 3 |
18 files changed, 1239 insertions, 925 deletions
diff --git a/chrome/browser/profile.cc b/chrome/browser/profile.cc index dacbe95..745b1a8 100644 --- a/chrome/browser/profile.cc +++ b/chrome/browser/profile.cc @@ -853,7 +853,7 @@ SpellChecker* ProfileImpl::GetSpellChecker() { prefs::kSpellCheckDictionary); spellchecker_ = new SpellChecker(dict_dir, dictionary_name, - GetRequestContext()); + GetRequestContext(), L""); spellchecker_->AddRef(); // Manual refcounting. } return spellchecker_; diff --git a/chrome/browser/render_process_host.cc b/chrome/browser/render_process_host.cc index f101fa8..90ff2aa 100644 --- a/chrome/browser/render_process_host.cc +++ b/chrome/browser/render_process_host.cc @@ -35,6 +35,7 @@ #include "chrome/browser/renderer_security_policy.h" #include "chrome/browser/resource_message_filter.h" #include "chrome/browser/sandbox_policy.h" +#include "chrome/browser/spellchecker.h" #include "chrome/browser/visitedlink_master.h" #include "chrome/browser/web_contents.h" #include "chrome/common/chrome_constants.h" @@ -722,6 +723,12 @@ void RenderProcessHost::WidgetHidden() { } } +void RenderProcessHost::AddWord(const std::wstring& word) { + base::Thread* io_thread = g_browser_process->io_thread(); + io_thread->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + profile_->GetSpellChecker(), &SpellChecker::AddWord, word)); +} + // NotificationObserver implementation. void RenderProcessHost::Observe(NotificationType type, const NotificationSource& source, diff --git a/chrome/browser/render_process_host.h b/chrome/browser/render_process_host.h index f725544..fd76ad7 100644 --- a/chrome/browser/render_process_host.h +++ b/chrome/browser/render_process_host.h @@ -181,6 +181,9 @@ class RenderProcessHost : public IPC::Channel::Listener, // to register/unregister visibility. void WidgetRestored(); void WidgetHidden(); + + // Add a word in the spellchecker. + void AddWord(const std::wstring& word); // NotificationObserver implementation. virtual void Observe(NotificationType type, diff --git a/chrome/browser/render_view_context_menu.cc b/chrome/browser/render_view_context_menu.cc index e8205af..96a584c 100644 --- a/chrome/browser/render_view_context_menu.cc +++ b/chrome/browser/render_view_context_menu.cc @@ -16,9 +16,11 @@ RenderViewContextMenu::RenderViewContextMenu( Menu::Delegate* delegate, HWND owner, ContextNode::Type type, + const std::wstring& misspelled_word, const std::vector<std::wstring>& misspelled_word_suggestions, Profile* profile) : Menu(delegate, Menu::TOPLEFT, owner), + misspelled_word_(misspelled_word), misspelled_word_suggestions_(misspelled_word_suggestions), profile_(profile) { InitMenu(type); @@ -120,6 +122,12 @@ void RenderViewContextMenu::AppendEditableItems() { } if (misspelled_word_suggestions_.size() > 0) AppendSeparator(); + + // If word is misspelled, give option for "Add to dictionary" + if (!misspelled_word_.empty()) { + AppendDelegateMenuItem(IDS_CONTENT_CONTEXT_ADD_TO_DICTIONARY); + AppendSeparator(); + } AppendDelegateMenuItem(IDS_CONTENT_CONTEXT_UNDO); AppendDelegateMenuItem(IDS_CONTENT_CONTEXT_REDO); diff --git a/chrome/browser/render_view_context_menu.h b/chrome/browser/render_view_context_menu.h index 57e5f63..a306d7f 100644 --- a/chrome/browser/render_view_context_menu.h +++ b/chrome/browser/render_view_context_menu.h @@ -16,6 +16,7 @@ class RenderViewContextMenu : public Menu { Menu::Delegate* delegate, HWND owner, ContextNode::Type type, + const std::wstring& misspelled_word, const std::vector<std::wstring>& misspelled_word_suggestions, Profile* profile); @@ -31,6 +32,7 @@ class RenderViewContextMenu : public Menu { void AppendSelectionItems(); void AppendEditableItems(); + std::wstring misspelled_word_; std::vector<std::wstring> misspelled_word_suggestions_; Profile* profile_; diff --git a/chrome/browser/render_view_context_menu_controller.cc b/chrome/browser/render_view_context_menu_controller.cc index 8ea251e..39f5b9e 100644 --- a/chrome/browser/render_view_context_menu_controller.cc +++ b/chrome/browser/render_view_context_menu_controller.cc @@ -169,7 +169,8 @@ bool RenderViewContextMenuController::IsCommandEnabled(int id) const { case IDC_USESPELLCHECKSUGGESTION_3: case IDC_USESPELLCHECKSUGGESTION_4: return true; - + case IDS_CONTENT_CONTEXT_ADD_TO_DICTIONARY: + return !params_.misspelled_word.empty(); case IDS_CONTENT_CONTEXT_VIEWPAGEINFO: case IDS_CONTENT_CONTEXT_VIEWFRAMEINFO: case IDS_CONTENT_CONTEXT_SAVEFRAMEAS: @@ -368,6 +369,10 @@ void RenderViewContextMenuController::ExecuteCommand(int id) { id - IDC_USESPELLCHECKSUGGESTION_0]); break; + case IDS_CONTENT_CONTEXT_ADD_TO_DICTIONARY: + source_web_contents_->AddToDictionary(params_.misspelled_word); + break; + case IDS_CONTENT_CONTEXT_ADDSEARCHENGINE: // Not implemented. default: break; diff --git a/chrome/browser/render_view_host.cc b/chrome/browser/render_view_host.cc index 4f02aaca..8c95e07 100644 --- a/chrome/browser/render_view_host.cc +++ b/chrome/browser/render_view_host.cc @@ -452,6 +452,10 @@ void RenderViewHost::Replace(const std::wstring& text_to_replace) { Send(new ViewMsg_Replace(routing_id_, text_to_replace)); } +void RenderViewHost::AddToDictionary(const std::wstring& word) { + process_->AddWord(word); +} + void RenderViewHost::Delete() { Send(new ViewMsg_Delete(routing_id_)); } diff --git a/chrome/browser/render_view_host.h b/chrome/browser/render_view_host.h index 5e0543d..b6d968d 100644 --- a/chrome/browser/render_view_host.h +++ b/chrome/browser/render_view_host.h @@ -256,6 +256,7 @@ class RenderViewHost : public RenderWidgetHost { void Copy(); void Paste(); void Replace(const std::wstring& text); + void AddToDictionary(const std::wstring& word); void Delete(); void SelectAll(); diff --git a/chrome/browser/resource_message_filter.cc b/chrome/browser/resource_message_filter.cc index 3c833d8..4821cfd 100644 --- a/chrome/browser/resource_message_filter.cc +++ b/chrome/browser/resource_message_filter.cc @@ -245,10 +245,15 @@ void ResourceMessageFilter::OnReceiveContextMenuMsg(const IPC::Message& msg) { if (!params.misspelled_word.empty() && spellchecker_ != NULL) { int misspell_location, misspell_length; - spellchecker_->SpellCheckWord(params.misspelled_word.c_str(), - static_cast<int>(params.misspelled_word.length()), - &misspell_location, &misspell_length, - ¶ms.dictionary_suggestions); + bool is_misspelled = !spellchecker_->SpellCheckWord( + params.misspelled_word.c_str(), + static_cast<int>(params.misspelled_word.length()), + &misspell_location, &misspell_length, + ¶ms.dictionary_suggestions); + + // If not misspelled, make the misspelled_word param empty. + if (!is_misspelled) + params.misspelled_word.clear(); } // Create a new ViewHostMsg_ContextMenu message. diff --git a/chrome/browser/spellchecker.cc b/chrome/browser/spellchecker.cc index ecb3874..43df962 100644 --- a/chrome/browser/spellchecker.cc +++ b/chrome/browser/spellchecker.cc @@ -5,11 +5,11 @@ #include <io.h> #include "chrome/browser/spellchecker.h" - #include "base/basictypes.h" #include "base/file_util.h" #include "base/histogram.h" #include "base/logging.h" +#include "base/path_service.h" #include "base/string_util.h" #include "base/thread.h" #include "base/win_util.h" @@ -17,7 +17,9 @@ #include "chrome/browser/browser_process.h" #include "chrome/browser/profile.h" #include "chrome/browser/url_fetcher.h" +#include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_counters.h" +#include "chrome/common/chrome_paths.h" #include "chrome/common/l10n_util.h" #include "chrome/common/pref_names.h" #include "chrome/common/pref_service.h" @@ -194,8 +196,10 @@ void SpellChecker::RegisterUserPrefs(PrefService* prefs) { SpellChecker::SpellChecker(const std::wstring& dict_dir, const std::wstring& language, - URLRequestContext* request_context) + URLRequestContext* request_context, + const std::wstring& custom_dictionary_file_name) : bdict_file_name_(dict_dir), + custom_dictionary_file_name_(custom_dictionary_file_name), bdict_file_(NULL), bdict_mapping_(NULL), bdict_mapped_data_(NULL), @@ -221,6 +225,15 @@ SpellChecker::SpellChecker(const std::wstring& dict_dir, // Get the path to the spellcheck file. file_util::AppendToPath(&bdict_file_name_, language + L".bdic"); + // Get the path to the custom dictionary file. + if (custom_dictionary_file_name_.empty()) { + std::wstring personal_file_directory; + PathService::Get(chrome::DIR_USER_DATA, &personal_file_directory); + custom_dictionary_file_name_ = personal_file_directory; + file_util::AppendToPath(&custom_dictionary_file_name_, + chrome::kCustomDictionaryFileName); + } + // Use this dictionary language as the default one of the // SpecllcheckCharAttribute object. character_attributes_.SetDefaultLanguage(language); @@ -288,8 +301,10 @@ bool SpellChecker::Initialize() { const unsigned char* bdict_data; size_t bdict_length; - if (MapBdictFile(&bdict_data, &bdict_length)) + if (MapBdictFile(&bdict_data, &bdict_length)) { hunspell_ = new Hunspell(bdict_data, bdict_length); + AddCustomWordsToHunspell(); + } TimeTicks end_time = TimeTicks::Now(); DHISTOGRAM_TIMES(L"Spellcheck.InitTime", end_time - begin_time); @@ -298,6 +313,23 @@ bool SpellChecker::Initialize() { return false; } +void SpellChecker::AddCustomWordsToHunspell() { + // Add custom words to Hunspell. + // This should be done in File Loop, but since Hunspell is in this IO Loop, + // this too has to be initialized here. + // TODO (sidchat): Work out a way to initialize Hunspell in the File Loop. + std::string contents; + file_util::ReadFileToString(custom_dictionary_file_name_, &contents); + std::vector<std::string> list_of_words; + SplitString(contents, '\n', &list_of_words); + if (hunspell_) { + for (std::vector<std::string>::iterator it = list_of_words.begin(); + it < list_of_words.end(); ++it) { + hunspell_->put_word((*it).c_str()); + } + } +} + bool SpellChecker::MapBdictFile(const unsigned char** data, size_t* length) { bdict_file_ = CreateFile(bdict_file_name_.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); @@ -422,3 +454,49 @@ bool SpellChecker::SpellCheckWord( return true; } +// This task is called in the file loop to write the new word to the custom +// dictionary in disc. +class AddWordToCustomDictionaryTask : public Task { + public: + AddWordToCustomDictionaryTask(const std::wstring& file_name, + const std::wstring& word) + : file_name_(WideToUTF8(file_name)), + word_(WideToUTF8(word)) { + } + + private: + void Run() { + // Add the word with a new line. Note that, although this would mean an + // extra line after the list of words, this is potentially harmless and + // faster, compared to verifying everytime whether to append a new line + // or not. + word_ += "\n"; + const char* file_name_char = file_name_.c_str(); + FILE* f = fopen(file_name_char, "a+"); + fputs(word_.c_str(), f); + fclose(f); + } + + std::string file_name_; + std::string word_; +}; + +void SpellChecker::AddWord(const std::wstring& word) { + // Check if the |hunspell_| has been initialized at all. + Initialize(); + + // Add the word to hunspell. + std::string word_to_add = WideToUTF8(word); + if (!word_to_add.empty()) + hunspell_->put_word(word_to_add.c_str()); + + // Now add the word to the custom dictionary file in the file loop. + if (file_loop_) { + file_loop_->PostTask(FROM_HERE, new AddWordToCustomDictionaryTask( + custom_dictionary_file_name_, word)); + } else { // just run it in this thread. + Task* write_word_task = new AddWordToCustomDictionaryTask( + custom_dictionary_file_name_, word); + write_word_task->Run(); + } +} diff --git a/chrome/browser/spellchecker.h b/chrome/browser/spellchecker.h index da17ee5..932d3c9 100644 --- a/chrome/browser/spellchecker.h +++ b/chrome/browser/spellchecker.h @@ -35,9 +35,13 @@ class SpellChecker : public base::RefCountedThreadSafe<SpellChecker> { // // The request context is used to download dictionaries if they do not exist. // This can be NULL if you don't want this (like in tests). + // The |custom_dictionary_file_name| should be left blank so that Spellchecker + // can figure out the custom dictionary file. It is non empty only for unit + // testing. SpellChecker(const std::wstring& dict_dir, const std::wstring& language, - URLRequestContext* request_context); + URLRequestContext* request_context, + const std::wstring& custom_dictionary_file_name); static void RegisterUserPrefs(PrefService* prefs); @@ -58,6 +62,11 @@ class SpellChecker : public base::RefCountedThreadSafe<SpellChecker> { int* misspelling_len, std::vector<std::wstring>* optional_suggestions); + // Add custom word to the dictionary, which means: + // a) Add it to the current hunspell object for immediate use, + // b) Add the word to a file in disk for custom dictionary. + void AddWord(const std::wstring& word); + private: // Download dictionary files when required. class DictionaryDownloadController; @@ -67,6 +76,10 @@ class SpellChecker : public base::RefCountedThreadSafe<SpellChecker> { // Initializes the Hunspell Dictionary. bool Initialize(); + // After |hunspell_| is initialized, this function is called to add custom + // words from the custom dictionary to the |hunspell_| + void AddCustomWordsToHunspell(); + void set_file_is_downloading(bool value); // Memory maps the given .bdic file. On success, it will return true and will @@ -80,6 +93,9 @@ class SpellChecker : public base::RefCountedThreadSafe<SpellChecker> { // Path to the spellchecker file. std::wstring bdict_file_name_; + // Path to the custom dictionary file. + std::wstring custom_dictionary_file_name_; + // We memory-map the BDict file for spellchecking. These are the handles // necessary for that. HANDLE bdict_file_; diff --git a/chrome/browser/web_contents.cc b/chrome/browser/web_contents.cc index 3a9fd3a..a796dc6 100644 --- a/chrome/browser/web_contents.cc +++ b/chrome/browser/web_contents.cc @@ -954,6 +954,10 @@ void WebContents::Replace(const std::wstring& text) { render_view_host()->Replace(text); } +void WebContents::AddToDictionary(const std::wstring& word) { + render_view_host()->AddToDictionary(word); +} + void WebContents::Delete() { render_view_host()->Delete(); } @@ -1698,8 +1702,12 @@ void WebContents::DidDownloadImage( void WebContents::ShowContextMenu( const ViewHostMsg_ContextMenu_Params& params) { RenderViewContextMenuController menu_controller(this, params); - RenderViewContextMenu menu(&menu_controller, GetHWND(), params.type, - params.dictionary_suggestions, profile()); + RenderViewContextMenu menu(&menu_controller, + GetHWND(), + params.type, + params.misspelled_word, + params.dictionary_suggestions, + profile()); POINT screen_pt = { params.x, params.y }; MapWindowPoints(GetHWND(), HWND_DESKTOP, &screen_pt, 1); diff --git a/chrome/browser/web_contents.h b/chrome/browser/web_contents.h index 30b4c2f..e337068 100644 --- a/chrome/browser/web_contents.h +++ b/chrome/browser/web_contents.h @@ -169,6 +169,7 @@ class WebContents : public TabContents, void Undo(); void Redo(); void Replace(const std::wstring& text); + void AddToDictionary(const std::wstring& word); void Delete(); void SelectAll(); diff --git a/chrome/common/chrome_constants.cc b/chrome/common/chrome_constants.cc index 2c0dcc8..57749a1 100644 --- a/chrome/common/chrome_constants.cc +++ b/chrome/common/chrome_constants.cc @@ -37,6 +37,7 @@ const wchar_t kUserDataDirname[] = L"User Data"; const wchar_t kWebDataFilename[] = L"Web Data"; const wchar_t kBookmarksFileName[] = L"Bookmarks"; const wchar_t kHistoryBookmarksFileName[] = L"Bookmarks From History"; +const wchar_t kCustomDictionaryFileName[] = L"Custom Dictionary.txt"; // Note, this shouldn't go above 64. See bug 535234. const unsigned int kMaxRendererProcessCount = 20; diff --git a/chrome/common/chrome_constants.h b/chrome/common/chrome_constants.h index 60beac5..46fa329 100644 --- a/chrome/common/chrome_constants.h +++ b/chrome/common/chrome_constants.h @@ -34,6 +34,7 @@ extern const wchar_t kUserDataDirname[]; extern const wchar_t kWebDataFilename[]; extern const wchar_t kBookmarksFileName[]; extern const wchar_t kHistoryBookmarksFileName[]; +extern const wchar_t kCustomDictionaryFileName[]; extern const unsigned int kMaxRendererProcessCount; extern const int kStatsMaxThreads; diff --git a/chrome/renderer/spellcheck_unittest.cc b/chrome/renderer/spellcheck_unittest.cc index 61940cb..4055d7c 100644 --- a/chrome/renderer/spellcheck_unittest.cc +++ b/chrome/renderer/spellcheck_unittest.cc @@ -16,6 +16,9 @@ class SpellCheckTest : public testing::Test { private: MessageLoop message_loop_; }; + +const std::wstring kTempCustomDictionaryFile(L"temp_custom_dictionary.txt"); + } // namespace // Represents a special initialization function used only for the unit tests @@ -251,7 +254,7 @@ TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) { &hunspell_directory)); scoped_refptr<SpellChecker> spell_checker(new SpellChecker( - hunspell_directory, L"en-US", NULL)); + hunspell_directory, L"en-US", NULL, L"")); for (int i = 0; i < arraysize(kTestCases); i++) { size_t input_length = 0; @@ -305,7 +308,7 @@ TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) { &hunspell_directory)); scoped_refptr<SpellChecker> spell_checker(new SpellChecker( - hunspell_directory, L"en-US", NULL)); + hunspell_directory, L"en-US", NULL, L"")); for (int i = 0; i < arraysize(kTestCases); i++) { std::vector<std::wstring> suggestions; @@ -337,3 +340,148 @@ TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) { } } +// This test Adds words to the SpellChecker and veifies that it remembers them. +TEST_F(SpellCheckTest, SpellCheckAddToDictionary_EN_US) { + static const struct { + // A string to be added to SpellChecker. + const wchar_t* word_to_add; + } kTestCases[] = { // word to be added to SpellChecker + {L"Googley"}, + {L"Googleplex"}, + {L"Googler"}, + }; + + std::wstring hunspell_directory; + ASSERT_TRUE(PathService::Get(chrome::DIR_APP_DICTIONARIES, + &hunspell_directory)); + + scoped_refptr<SpellChecker> spell_checker(new SpellChecker( + hunspell_directory, L"en-US", NULL, kTempCustomDictionaryFile)); + + for (int i = 0; i < arraysize(kTestCases); i++) { + // Add the word to spellchecker. + spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add)); + + // Now check whether it is added to Spellchecker. + std::vector<std::wstring> suggestions; + size_t input_length = 0; + if (kTestCases[i].word_to_add != NULL) { + input_length = wcslen(kTestCases[i].word_to_add); + } + int misspelling_start; + int misspelling_length; + bool result = spell_checker->SpellCheckWord(kTestCases[i].word_to_add, + static_cast<int>(input_length), + &misspelling_start, + &misspelling_length, + &suggestions); + + // Check for spelling. + EXPECT_TRUE(result); + } + + // Now initialize another spellchecker to see that AddToWord is permanent. + scoped_refptr<SpellChecker> spell_checker_new(new SpellChecker( + hunspell_directory, L"en-US", NULL, kTempCustomDictionaryFile)); + + for (int i = 0; i < arraysize(kTestCases); i++) { + // Now check whether it is added to Spellchecker. + std::vector<std::wstring> suggestions; + size_t input_length = 0; + if (kTestCases[i].word_to_add != NULL) { + input_length = wcslen(kTestCases[i].word_to_add); + } + int misspelling_start; + int misspelling_length; + bool result = spell_checker_new->SpellCheckWord( + kTestCases[i].word_to_add, + static_cast<int>(input_length), + &misspelling_start, + &misspelling_length, + &suggestions); + + // Check for spelling. + EXPECT_TRUE(result); + } + + // Remove the temp custom dictionary file. + file_util::Delete(kTempCustomDictionaryFile, false); +} + +// SpellChecker should suggest custome words for misspelled words. +TEST_F(SpellCheckTest, SpellCheckSuggestionsAddToDictionary_EN_US) { + static const struct { + // A string to be added to SpellChecker. + const wchar_t* word_to_add; + } kTestCases[] = { // word to be added to SpellChecker + {L"Googley"}, + {L"Googleplex"}, + {L"Googler"}, + }; + + std::wstring hunspell_directory; + ASSERT_TRUE(PathService::Get(chrome::DIR_APP_DICTIONARIES, + &hunspell_directory)); + + scoped_refptr<SpellChecker> spell_checker(new SpellChecker( + hunspell_directory, L"en-US", NULL, kTempCustomDictionaryFile)); + + for (int i = 0; i < arraysize(kTestCases); i++) { + // Add the word to spellchecker. + spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add)); + } + + // Now check to see whether the custom words are suggested for + // misspelled but similar words. + static const struct { + // A string to be tested. + const wchar_t* input; + // An expected result for this test case. + // * true: the input string does not have any invalid words. + // * false: the input string has one or more invalid words. + bool expected_result; + // The position and the length of the first invalid word. + int misspelling_start; + int misspelling_length; + + // A suggested word that should occur. + const wchar_t* suggested_word; + } kTestCasesToBeTested[] = { + {L"oogley", false, 0, 0, L"Googley"}, + {L"oogler", false, 0, 0, L"Googler"}, + {L"oogleplex", false, 0, 0, L"Googleplex"}, + }; + + for (int i = 0; i < arraysize(kTestCasesToBeTested); i++) { + std::vector<std::wstring> suggestions; + size_t input_length = 0; + if (kTestCasesToBeTested[i].input != NULL) { + input_length = wcslen(kTestCasesToBeTested[i].input); + } + int misspelling_start; + int misspelling_length; + bool result = spell_checker->SpellCheckWord(kTestCasesToBeTested[i].input, + static_cast<int>(input_length), + &misspelling_start, + &misspelling_length, + &suggestions); + + // Check for spelling. + EXPECT_EQ(result, kTestCasesToBeTested[i].expected_result); + + // Check if the suggested words occur. + bool suggested_word_is_present = false; + for (int j=0; j < static_cast<int>(suggestions.size()); j++) { + if (suggestions.at(j).compare(kTestCasesToBeTested[i].suggested_word) == + 0) { + suggested_word_is_present = true; + break; + } + } + + EXPECT_TRUE(suggested_word_is_present); + } + + // Remove the temp custom dictionary file. + file_util::Delete(kTempCustomDictionaryFile, false); +} diff --git a/chrome/third_party/hunspell/src/hunspell/hashmgr.cxx b/chrome/third_party/hunspell/src/hunspell/hashmgr.cxx index afa00aa..f1c1355 100644 --- a/chrome/third_party/hunspell/src/hunspell/hashmgr.cxx +++ b/chrome/third_party/hunspell/src/hunspell/hashmgr.cxx @@ -1,911 +1,934 @@ -#include "license.hunspell" -#include "license.myspell" - -#ifndef MOZILLA_CLIENT -#include <cstdlib> -#include <cstring> -#include <cstdio> -#include <cctype> -#else -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <ctype.h> -#endif - -#include <io.h> - -#include "hashmgr.hxx" -#include "csutil.hxx" -#include "atypes.hxx" - -#ifdef MOZILLA_CLIENT -#ifdef __SUNPRO_CC // for SunONE Studio compiler -using namespace std; -#endif -#else -#ifndef W32 -using namespace std; -#endif -#endif - -// build a hash table from a munched word list -#ifdef HUNSPELL_CHROME_CLIENT -HashMgr::HashMgr(hunspell::BDictReader* reader) -{ - bdict_reader = reader; -#else -HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) -{ -#endif - tablesize = 0; - tableptr = NULL; - flag_mode = FLAG_CHAR; - complexprefixes = 0; - utf8 = 0; - ignorechars = NULL; - ignorechars_utf16 = NULL; - ignorechars_utf16_len = 0; - numaliasf = 0; - aliasf = NULL; - numaliasm = 0; - aliasm = NULL; -#ifdef HUNSPELL_CHROME_CLIENT - // No tables to load, just the AF config. - int ec = load_config(); -#else - load_config(aff_handle); - int ec = load_tables(dic_handle); -#endif - if (ec) { - /* error condition - what should we do here */ - HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); - if (tableptr) { - free(tableptr); - tableptr = NULL; - } - tablesize = 0; - } -} - - -HashMgr::~HashMgr() -{ - if (tableptr) { - // now pass through hash table freeing up everything - // go through column by column of the table - for (int i=0; i < tablesize; i++) { - struct hentry * pt = &tableptr[i]; - struct hentry * nt = NULL; - if (pt) { - if (pt->astr && !aliasf) free(pt->astr); - if (pt->word) free(pt->word); -#ifdef HUNSPELL_EXPERIMENTAL - if (pt->description && !aliasm) free(pt->description); -#endif - pt = pt->next; - } - while(pt) { - nt = pt->next; - if (pt->astr && !aliasf) free(pt->astr); - if (pt->word) free(pt->word); -#ifdef HUNSPELL_EXPERIMENTAL - if (pt->description && !aliasm) free(pt->description); -#endif - free(pt); - pt = nt; - } - } - free(tableptr); - tableptr = NULL; - } - tablesize = 0; - - if (aliasf) { - for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); - free(aliasf); - aliasf = NULL; - if (aliasflen) { - free(aliasflen); - aliasflen = NULL; - } - } - if (aliasm) { - for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); - free(aliasm); - aliasm = NULL; - } - - if (ignorechars) free(ignorechars); - if (ignorechars_utf16) free(ignorechars_utf16); - -#ifdef HUNSPELL_CHROME_CLIENT - EmptyHentryCache(); -#endif -} - -#ifdef HUNSPELL_CHROME_CLIENT -void HashMgr::EmptyHentryCache() { - // We need to delete each cache entry, and each additional one in the linked - // list of homonyms. - for (HEntryCache::iterator i = hentry_cache.begin(); - i != hentry_cache.end(); ++i) { - hentry* cur = i->second; - while (cur) { - hentry* next = cur->next_homonym; - delete cur; - cur = next; - } - } - hentry_cache.clear(); -} -#endif - -// lookup a root word in the hashtable - -struct hentry * HashMgr::lookup(const char *word) const -{ -#ifdef HUNSPELL_CHROME_CLIENT - int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; - int affix_count = bdict_reader->FindWord(word, affix_ids); - if (affix_count == 0) - return NULL; - - static const int kMaxWordLen = 128; - static char word_buf[kMaxWordLen]; - strncpy(word_buf, word, kMaxWordLen); - - return AffixIDsToHentry(word_buf, affix_ids, affix_count); -#else - struct hentry * dp; - if (tableptr) { - dp = &tableptr[hash(word)]; - if (dp->word == NULL) return NULL; - for ( ; dp != NULL; dp = dp->next) { - if (strcmp(word,dp->word) == 0) return dp; - } - } - return NULL; -#endif -} - -// add a word to the hash table (private) - -int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc) -{ -#ifndef HUNSPELL_CHROME_CLIENT // Don't support adding words yet. - char * st = mystrdup(word); - if (wl && !st) return 1; - if (ignorechars != NULL) { - if (utf8) { - remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len); - } else { - remove_ignored_chars(st, ignorechars); - } - } - if (complexprefixes) { - if (utf8) reverseword_utf(st); else reverseword(st); - } - int i = hash(st); - struct hentry * dp = &tableptr[i]; - if (dp->word == NULL) { - dp->wlen = (short) wl; - dp->alen = (short) al; - dp->word = st; - dp->astr = aff; - dp->next = NULL; - dp->next_homonym = NULL; -#ifdef HUNSPELL_EXPERIMENTAL - if (aliasm) { - dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); - } else { - dp->description = mystrdup(desc); - if (desc && !dp->description) return 1; - if (dp->description && complexprefixes) { - if (utf8) reverseword_utf(dp->description); else reverseword(dp->description); - } - } -#endif - } else { - struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry)); - if (!hp) return 1; - hp->wlen = (short) wl; - hp->alen = (short) al; - hp->word = st; - hp->astr = aff; - hp->next = NULL; - hp->next_homonym = NULL; -#ifdef HUNSPELL_EXPERIMENTAL - if (aliasm) { - hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); - } else { - hp->description = mystrdup(desc); - if (desc && !hp->description) return 1; - if (dp->description && complexprefixes) { - if (utf8) reverseword_utf(hp->description); else reverseword(hp->description); - } - } -#endif - while (dp->next != NULL) { - if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp; - dp=dp->next; - } - if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp; - dp->next = hp; - } -#endif // HUNSPELL_CHROME_CLIENT - return 0; -} - -// add a custom dic. word to the hash table (public) -int HashMgr::put_word(const char * word, int wl, char * aff) -{ - unsigned short * flags; - int al = 0; - if (aff) { - al = decode_flags(&flags, aff); - flag_qsort(flags, 0, al); - } else { - flags = NULL; - } - add_word(word, wl, flags, al, NULL); - return 0; -} - -int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern) -{ - unsigned short * flags; - struct hentry * dp = lookup(pattern); - if (!dp || !dp->astr) return 1; - flags = (unsigned short *) malloc (dp->alen * sizeof(short)); - memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); - add_word(word, wl, flags, dp->alen, NULL); - return 0; -} - -// walk the hash table entry by entry - null at end -struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const -{ -#ifdef HUNSPELL_CHROME_CLIENT - // DANGER! This is kind of impossible to make work correctly, since Hunspell - // will keep arbitrary hentry pointers into our table. Therefore, the caller - // (SuggestMgr::ngsuggest) will need to be modified to not do this for us - // to be able to uncomment this function. -/* - // This function is only ever called by one place and not nested. We can - // therefore keep static state between calls and use |col| as a "reset" flag - // to avoid changing the API. It is set to -1 for the first call. - static hunspell::WordIterator word_iterator = - bdict_reader->GetAllWordIterator(); - if (col < 0) { - col = 1; - word_iterator = bdict_reader->GetAllWordIterator(); - } - - int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; - static const int kMaxWordLen = 128; - static char word_buf[kMaxWordLen]; - int affix_count = word_iterator.Advance(word_buf, kMaxWordLen, affix_ids); - return AffixIDsToHentry(word_buf, affix_ids, affix_count); -*/ - return NULL; -#else - //reset to start - if ((col < 0) || (hp == NULL)) { - col = -1; - hp = NULL; - } - - if (hp && hp->next != NULL) { - hp = hp->next; - } else { - col++; - hp = (col < tablesize) ? &tableptr[col] : NULL; - // search for next non-blank column entry - while (hp && (hp->word == NULL)) { - col ++; - hp = (col < tablesize) ? &tableptr[col] : NULL; - } - if (col < tablesize) return hp; - hp = NULL; - col = -1; - } - return hp; -#endif -} - -// load a munched word list and build a hash table on the fly -int HashMgr::load_tables(FILE* t_handle) -{ -#ifndef HUNSPELL_CHROME_CLIENT - int wl, al; - char * ap; - char * dp; - unsigned short * flags; - - // raw dictionary - munched file - FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); - if (rawdict == NULL) return 1; - fseek(rawdict, 0, SEEK_SET); - - // first read the first line of file to get hash table size */ - char ts[MAXDELEN]; - if (! fgets(ts, MAXDELEN-1,rawdict)) return 2; - mychomp(ts); - - /* remove byte order mark */ - if (strncmp(ts,"\xef\xbb\xbf",3) == 0) { - memmove(ts, ts+3, strlen(ts+3)+1); - HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n"); - } - - if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n"); - tablesize = atoi(ts); - if (!tablesize) return 4; - tablesize = tablesize + 5 + USERWORD; - if ((tablesize %2) == 0) tablesize++; - - // allocate the hash table - tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry)); - if (! tableptr) return 3; - for (int i=0; i<tablesize; i++) tableptr[i].word = NULL; - - // loop through all words on much list and add to hash - // table and create word and affix strings - - while (fgets(ts,MAXDELEN-1,rawdict)) { - mychomp(ts); - // split each line into word and morphological description - dp = strchr(ts,'\t'); - - if (dp) { - *dp = '\0'; - dp++; - } else { - dp = NULL; - } - - // split each line into word and affix char strings - // "\/" signs slash in words (not affix separator) - // "/" at beginning of the line is word character (not affix separator) - ap = strchr(ts,'/'); - while (ap) { - if (ap == ts) { - ap++; - continue; - } else if (*(ap - 1) != '\\') break; - // replace "\/" with "/" - for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); - ap = strchr(ap,'/'); - } - - if (ap) { - *ap = '\0'; - if (aliasf) { - int index = atoi(ap + 1); - al = get_aliasf(index, &flags); - if (!al) { - HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts); - *ap = '\0'; - } - } else { - al = decode_flags(&flags, ap + 1); - flag_qsort(flags, 0, al); - } - } else { - al = 0; - ap = NULL; - flags = NULL; - } - - wl = strlen(ts); - - // add the word and its index - if (add_word(ts,wl,flags,al,dp)) return 5; - - } - - fclose(rawdict); -#endif - return 0; -} - - -// the hash function is a simple load and rotate -// algorithm borrowed - -int HashMgr::hash(const char * word) const -{ -#ifdef HUNSPELL_CHROME_CLIENT - return 0; -#else - long hv = 0; - for (int i=0; i < 4 && *word != 0; i++) - hv = (hv << 8) | (*word++); - while (*word != 0) { - ROTATE(hv,ROTATE_LEN); - hv ^= (*word++); - } - return (unsigned long) hv % tablesize; -#endif -} - -int HashMgr::decode_flags(unsigned short ** result, char * flags) { - int len; - switch (flag_mode) { - case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) - len = strlen(flags); - if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG flagvector is odd: %s\n", flags); - len = len/2; - *result = (unsigned short *) malloc(len * sizeof(short)); - for (int i = 0; i < len; i++) { - (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; - } - break; - } - case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233) - len = 1; - char * src = flags; - unsigned short * dest; - char * p; - for (p = flags; *p; p++) { - if (*p == ',') len++; - } - *result = (unsigned short *) malloc(len * sizeof(short)); - dest = *result; - for (p = flags; *p; p++) { - if (*p == ',') { - *dest = (unsigned short) atoi(src); - if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); - src = p + 1; - dest++; - } - } - *dest = (unsigned short) atoi(src); - if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); - break; - } - case FLAG_UNI: { // UTF-8 characters - w_char w[MAXDELEN/2]; - len = u8_u16(w, MAXDELEN/2, flags); - *result = (unsigned short *) malloc(len * sizeof(short)); - memcpy(*result, w, len * sizeof(short)); - break; - } - default: { // Ispell's one-character flags (erfg -> e r f g) - unsigned short * dest; - len = strlen(flags); - *result = (unsigned short *) malloc(len * sizeof(short)); - dest = *result; - for (unsigned char * p = (unsigned char *) flags; *p; p++) { - *dest = (unsigned short) *p; - dest++; - } - } - } - return len; -} - -unsigned short HashMgr::decode_flag(const char * f) { - unsigned short s = 0; - switch (flag_mode) { - case FLAG_LONG: - s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; - break; - case FLAG_NUM: - s = (unsigned short) atoi(f); - break; - case FLAG_UNI: - u8_u16((w_char *) &s, 1, f); - break; - default: - s = (unsigned short) *((unsigned char *)f); - } - if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); - return s; -} - -char * HashMgr::encode_flag(unsigned short f) { - unsigned char ch[10]; - if (f==0) return mystrdup("(NULL)"); - if (flag_mode == FLAG_LONG) { - ch[0] = (unsigned char) (f >> 8); - ch[1] = (unsigned char) (f - ((f >> 8) << 8)); - ch[2] = '\0'; - } else if (flag_mode == FLAG_NUM) { - sprintf((char *) ch, "%d", f); - } else if (flag_mode == FLAG_UNI) { - u16_u8((char *) &ch, 10, (w_char *) &f, 1); - } else { - ch[0] = (unsigned char) (f); - ch[1] = '\0'; - } - return mystrdup((char *) ch); -} - -#ifdef HUNSPELL_CHROME_CLIENT -int HashMgr::load_config() -{ - utf8 = 1; // We always use UTF-8. - - // Read in all the AF lines which tell us the rules for each affix group ID. - char line[MAXDELEN+1]; - hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator(); - while (iterator.AdvanceAndCopy(line, MAXDELEN)) { - int rv = parse_aliasf(line, &iterator); - if (rv) - return rv; - } - - // Read in the regular commands from the affix file. We only care about the - // IGNORE line here. The rest of the commands will be read by the affix - // manager. - iterator = bdict_reader->GetOtherLineIterator(); - while (iterator.AdvanceAndCopy(line, MAXDELEN)) { - // Parse in the ignored characters (for example, Arabic optional - // diacritics characters. - if (strncmp(line,"IGNORE",6) == 0) { - parse_array(line, &ignorechars, &ignorechars_utf16, - &ignorechars_utf16_len, "IGNORE", utf8); - break; // All done. - } - } - - return 0; -} -#else -// read in aff file and set flag mode -int HashMgr::load_config(FILE* aff_handle) -{ - int firstline = 1; - - // io buffers - char line[MAXDELEN+1]; - - // open the affix file - FILE * afflst; - afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); - if (!afflst) { - HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); - return 1; - } - fseek(afflst, 0, SEEK_SET); - - // read in each line ignoring any that do not - // start with a known line type indicator - - while (fgets(line,MAXDELEN,afflst)) { - mychomp(line); - - /* remove byte order mark */ - if (firstline) { - firstline = 0; - if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(line+3)+1); - } - - /* parse in the try string */ - if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { - if (flag_mode != FLAG_CHAR) { - HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n"); - } - if (strstr(line, "long")) flag_mode = FLAG_LONG; - if (strstr(line, "num")) flag_mode = FLAG_NUM; - if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; - if (flag_mode == FLAG_CHAR) { - HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line); - } - } - if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UTF-8")) utf8 = 1; - - /* parse in the ignored characters (for example, Arabic optional diacritics characters */ - if (strncmp(line,"IGNORE",6) == 0) { - if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) { - fclose(afflst); - return 1; - } - } - - if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { - if (parse_aliasf(line, afflst)) { - fclose(afflst); - return 1; - } - } - -#ifdef HUNSPELL_EXPERIMENTAL - if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { - if (parse_aliasm(line, afflst)) { - fclose(afflst); - return 1; - } - } -#endif - if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; - if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break; - } - fclose(afflst); - return 0; -} -#endif // HUNSPELL_CHROME_CLIENT - -/* parse in the ALIAS table */ -#ifdef HUNSPELL_CHROME_CLIENT -int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator) -{ -#else -int HashMgr::parse_aliasf(char * line, FILE * af) -{ -#endif - if (numaliasf != 0) { - HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n"); - return 1; - } - char * tp = line; - char * piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch(i) { - case 0: { np++; break; } - case 1: { - numaliasf = atoi(piece); - if (numaliasf < 1) { - numaliasf = 0; - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n"); - free(piece); - return 1; - } - aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *)); - aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short)); - if (!aliasf || !aliasflen) { - numaliasf = 0; - if (aliasf) free(aliasf); - if (aliasflen) free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - return 1; - } - np++; - break; - } - default: break; - } - i++; - } - free(piece); - piece = mystrsep(&tp, 0); - } - if (np != 2) { - numaliasf = 0; - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: missing AF table information\n"); - return 1; - } - - /* now parse the numaliasf lines to read in the remainder of the table */ - char * nl = line; - for (int j=0; j < numaliasf; j++) { -#ifdef HUNSPELL_CHROME_CLIENT - if (!iterator->AdvanceAndCopy(nl, MAXDELEN)) - return 1; -#else - if (!fgets(nl,MAXDELEN,af)) return 1; -#endif - mychomp(nl); - tp = nl; - i = 0; - aliasf[j] = NULL; - aliasflen[j] = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch(i) { - case 0: { - if (strncmp(piece,"AF",2) != 0) { - numaliasf = 0; - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); - free(piece); - return 1; - } - break; - } - case 1: { - aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece); - flag_qsort(aliasf[j], 0, aliasflen[j]); - break; - } - default: break; - } - i++; - } - free(piece); - piece = mystrsep(&tp, 0); - } - if (!aliasf[j]) { - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - numaliasf = 0; - HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); - return 1; - } - } - return 0; -} - -#ifdef HUNSPELL_CHROME_CLIENT -hentry* HashMgr::AffixIDsToHentry(char* word, - int* affix_ids, int affix_count) const -{ - if (affix_count == 0) - return NULL; - - HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; - std::string std_word(word); - HEntryCache::iterator found = cache.find(std_word); - if (found != cache.end()) { - // We must return an existing hentry for the same word if we've previously - // handed one out. Hunspell will compare pointers in some cases to see if - // two words it has found are the same. - return found->second; - } - - short word_len = static_cast<short>(strlen(word)); - - // We can get a number of prefixes per word. There will normally be only one, - // but if not, there will be a linked list of "hentry"s for the "homonym"s - // for the word. - struct hentry* first_he = NULL; - struct hentry* prev_he = NULL; // For making linked list. - for (int i = 0; i < affix_count; i++) { - struct hentry* he = new hentry; - if (i == 0) - first_he = he; - he->word = word; - he->wlen = word_len; - he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i], - &he->astr); - he->next = NULL; - he->next_homonym = NULL; - if (prev_he) - prev_he->next_homonym = he; - prev_he = he; - } - - cache[std_word] = first_he; // Save this word in the cache for later. - return first_he; -} - -#endif - -int HashMgr::is_aliasf() { - return (aliasf != NULL); -} - -int HashMgr::get_aliasf(int index, unsigned short ** fvec) { - if ((index > 0) && (index <= numaliasf)) { - *fvec = aliasf[index - 1]; - return aliasflen[index - 1]; - } - HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index); - *fvec = NULL; - return 0; -} - -#ifdef HUNSPELL_EXPERIMENTAL -/* parse morph alias definitions */ -int HashMgr::parse_aliasm(char * line, FILE * af) -{ - if (numaliasm != 0) { - HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n"); - return 1; - } - char * tp = line; - char * piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch(i) { - case 0: { np++; break; } - case 1: { - numaliasm = atoi(piece); - if (numaliasm < 1) { - HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n"); - free(piece); - return 1; - } - aliasm = (char **) malloc(numaliasm * sizeof(char *)); - if (!aliasm) { - numaliasm = 0; - return 1; - } - np++; - break; - } - default: break; - } - i++; - } - free(piece); - piece = mystrsep(&tp, 0); - } - if (np != 2) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: missing AM alias information\n"); - return 1; - } - - /* now parse the numaliasm lines to read in the remainder of the table */ - char * nl = line; - for (int j=0; j < numaliasm; j++) { - if (!fgets(nl,MAXDELEN,af)) return 1; - mychomp(nl); - tp = nl; - i = 0; - aliasm[j] = NULL; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch(i) { - case 0: { - if (strncmp(piece,"AM",2) != 0) { - HUNSPELL_WARNING(stderr, "error: AM table is corrupt\n"); - free(piece); - numaliasm = 0; - free(aliasm); - aliasm = NULL; - return 1; - } - break; - } - case 1: { - if (complexprefixes) { - if (utf8) reverseword_utf(piece); - else reverseword(piece); - } - aliasm[j] = mystrdup(piece); - break; } - default: break; - } - i++; - } - free(piece); - piece = mystrsep(&tp, 0); - } - if (!aliasm[j]) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: map table is corrupt\n"); - return 1; - } - } - return 0; -} - -int HashMgr::is_aliasm() { - return (aliasm != NULL); -} - -char * HashMgr::get_aliasm(int index) { - if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; - HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); - return NULL; -} -#endif +#include "license.hunspell"
+#include "license.myspell"
+
+#ifndef MOZILLA_CLIENT
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <cctype>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#endif
+
+#include <io.h>
+
+#include "hashmgr.hxx"
+#include "csutil.hxx"
+#include "atypes.hxx"
+
+#ifdef MOZILLA_CLIENT
+#ifdef __SUNPRO_CC // for SunONE Studio compiler
+using namespace std;
+#endif
+#else
+#ifndef W32
+using namespace std;
+#endif
+#endif
+
+// build a hash table from a munched word list
+#ifdef HUNSPELL_CHROME_CLIENT
+HashMgr::HashMgr(hunspell::BDictReader* reader)
+{
+ bdict_reader = reader;
+#else
+HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle)
+{
+#endif
+ tablesize = 0;
+ tableptr = NULL;
+ flag_mode = FLAG_CHAR;
+ complexprefixes = 0;
+ utf8 = 0;
+ ignorechars = NULL;
+ ignorechars_utf16 = NULL;
+ ignorechars_utf16_len = 0;
+ numaliasf = 0;
+ aliasf = NULL;
+ numaliasm = 0;
+ aliasm = NULL;
+#ifdef HUNSPELL_CHROME_CLIENT
+ // No tables to load, just the AF config.
+ int ec = load_config();
+#else
+ load_config(aff_handle);
+ int ec = load_tables(dic_handle);
+#endif
+ if (ec) {
+ /* error condition - what should we do here */
+ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
+ if (tableptr) {
+ free(tableptr);
+ tableptr = NULL;
+ }
+ tablesize = 0;
+ }
+}
+
+
+HashMgr::~HashMgr()
+{
+ if (tableptr) {
+ // now pass through hash table freeing up everything
+ // go through column by column of the table
+ for (int i=0; i < tablesize; i++) {
+ struct hentry * pt = &tableptr[i];
+ struct hentry * nt = NULL;
+ if (pt) {
+ if (pt->astr && !aliasf) free(pt->astr);
+ if (pt->word) free(pt->word);
+#ifdef HUNSPELL_EXPERIMENTAL
+ if (pt->description && !aliasm) free(pt->description);
+#endif
+ pt = pt->next;
+ }
+ while(pt) {
+ nt = pt->next;
+ if (pt->astr && !aliasf) free(pt->astr);
+ if (pt->word) free(pt->word);
+#ifdef HUNSPELL_EXPERIMENTAL
+ if (pt->description && !aliasm) free(pt->description);
+#endif
+ free(pt);
+ pt = nt;
+ }
+ }
+ free(tableptr);
+ tableptr = NULL;
+ }
+ tablesize = 0;
+
+ if (aliasf) {
+ for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
+ free(aliasf);
+ aliasf = NULL;
+ if (aliasflen) {
+ free(aliasflen);
+ aliasflen = NULL;
+ }
+ }
+ if (aliasm) {
+ for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
+ free(aliasm);
+ aliasm = NULL;
+ }
+
+ if (ignorechars) free(ignorechars);
+ if (ignorechars_utf16) free(ignorechars_utf16);
+
+#ifdef HUNSPELL_CHROME_CLIENT
+ EmptyHentryCache();
+#endif
+}
+
+#ifdef HUNSPELL_CHROME_CLIENT
+void HashMgr::EmptyHentryCache() {
+ // We need to delete each cache entry, and each additional one in the linked
+ // list of homonyms.
+ for (HEntryCache::iterator i = hentry_cache.begin();
+ i != hentry_cache.end(); ++i) {
+ hentry* cur = i->second;
+ while (cur) {
+ hentry* next = cur->next_homonym;
+ delete cur;
+ cur = next;
+ }
+ }
+ hentry_cache.clear();
+}
+#endif
+
+// lookup a root word in the hashtable
+
+struct hentry * HashMgr::lookup(const char *word) const
+{
+#ifdef HUNSPELL_CHROME_CLIENT
+ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
+ int affix_count = bdict_reader->FindWord(word, affix_ids);
+ if (affix_count == 0) { // look for custom added word
+ std::map<StringPiece, struct hentry *>::const_iterator iter =
+ custom_word_to_hentry_map_.find(word);
+ if (iter != custom_word_to_hentry_map_.end())
+ return iter->second;
+ else
+ return NULL;
+ }
+
+ static const int kMaxWordLen = 128;
+ static char word_buf[kMaxWordLen];
+ strncpy(word_buf, word, kMaxWordLen);
+
+ return AffixIDsToHentry(word_buf, affix_ids, affix_count);
+#else
+ struct hentry * dp;
+ if (tableptr) {
+ dp = &tableptr[hash(word)];
+ if (dp->word == NULL) return NULL;
+ for ( ; dp != NULL; dp = dp->next) {
+ if (strcmp(word,dp->word) == 0) return dp;
+ }
+ }
+ return NULL;
+#endif
+}
+
+// add a word to the hash table (private)
+
+int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)
+{
+#ifndef HUNSPELL_CHROME_CLIENT
+ char * st = mystrdup(word);
+ if (wl && !st) return 1;
+ if (ignorechars != NULL) {
+ if (utf8) {
+ remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len);
+ } else {
+ remove_ignored_chars(st, ignorechars);
+ }
+ }
+ if (complexprefixes) {
+ if (utf8) reverseword_utf(st); else reverseword(st);
+ }
+ int i = hash(st);
+ struct hentry * dp = &tableptr[i];
+ if (dp->word == NULL) {
+ dp->wlen = (short) wl;
+ dp->alen = (short) al;
+ dp->word = st;
+ dp->astr = aff;
+ dp->next = NULL;
+ dp->next_homonym = NULL;
+#ifdef HUNSPELL_EXPERIMENTAL
+ if (aliasm) {
+ dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+ } else {
+ dp->description = mystrdup(desc);
+ if (desc && !dp->description) return 1;
+ if (dp->description && complexprefixes) {
+ if (utf8) reverseword_utf(dp->description); else reverseword(dp->description);
+ }
+ }
+#endif
+ } else {
+ struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
+ if (!hp) return 1;
+ hp->wlen = (short) wl;
+ hp->alen = (short) al;
+ hp->word = st;
+ hp->astr = aff;
+ hp->next = NULL;
+ hp->next_homonym = NULL;
+#ifdef HUNSPELL_EXPERIMENTAL
+ if (aliasm) {
+ hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+ } else {
+ hp->description = mystrdup(desc);
+ if (desc && !hp->description) return 1;
+ if (dp->description && complexprefixes) {
+ if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
+ }
+ }
+#endif
+ while (dp->next != NULL) {
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
+ dp=dp->next;
+ }
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
+ dp->next = hp;
+ }
+#endif // HUNSPELL_CHROME_CLIENT
+ std::map<StringPiece, struct hentry *>::iterator iter =
+ custom_word_to_hentry_map_.find(word);
+ if(iter == custom_word_to_hentry_map_.end()) { // word needs to be added
+ // Make a custom hentry.
+ struct hentry* he = new hentry;
+ he->word = (char *)word;
+ he->wlen = wl;
+ he->next = NULL;
+ he->next_homonym = NULL;
+
+ std::string* new_string_word = new std::string(word);
+ pointer_to_strings_.push_back(new_string_word);
+ StringPiece sp(*(new_string_word));
+ custom_word_to_hentry_map_[sp] = he;
+ return 1;
+ }
+
+ return 0;
+}
+
+// add a custom dic. word to the hash table (public)
+int HashMgr::put_word(const char * word, int wl, char * aff)
+{
+ unsigned short * flags;
+ int al = 0;
+ if (aff) {
+ al = decode_flags(&flags, aff);
+ flag_qsort(flags, 0, al);
+ } else {
+ flags = NULL;
+ }
+ add_word(word, wl, flags, al, NULL);
+ return 0;
+}
+
+int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)
+{
+ unsigned short * flags;
+ struct hentry * dp = lookup(pattern);
+ if (!dp || !dp->astr) return 1;
+ flags = (unsigned short *) malloc (dp->alen * sizeof(short));
+ memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
+ add_word(word, wl, flags, dp->alen, NULL);
+ return 0;
+}
+
+// walk the hash table entry by entry - null at end
+struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
+{
+#ifdef HUNSPELL_CHROME_CLIENT
+ // DANGER! This is kind of impossible to make work correctly, since Hunspell
+ // will keep arbitrary hentry pointers into our table. Therefore, the caller
+ // (SuggestMgr::ngsuggest) will need to be modified to not do this for us
+ // to be able to uncomment this function.
+/*
+ // This function is only ever called by one place and not nested. We can
+ // therefore keep static state between calls and use |col| as a "reset" flag
+ // to avoid changing the API. It is set to -1 for the first call.
+ static hunspell::WordIterator word_iterator =
+ bdict_reader->GetAllWordIterator();
+ if (col < 0) {
+ col = 1;
+ word_iterator = bdict_reader->GetAllWordIterator();
+ }
+
+ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
+ static const int kMaxWordLen = 128;
+ static char word_buf[kMaxWordLen];
+ int affix_count = word_iterator.Advance(word_buf, kMaxWordLen, affix_ids);
+ return AffixIDsToHentry(word_buf, affix_ids, affix_count);
+*/
+ return NULL;
+#else
+ //reset to start
+ if ((col < 0) || (hp == NULL)) {
+ col = -1;
+ hp = NULL;
+ }
+
+ if (hp && hp->next != NULL) {
+ hp = hp->next;
+ } else {
+ col++;
+ hp = (col < tablesize) ? &tableptr[col] : NULL;
+ // search for next non-blank column entry
+ while (hp && (hp->word == NULL)) {
+ col ++;
+ hp = (col < tablesize) ? &tableptr[col] : NULL;
+ }
+ if (col < tablesize) return hp;
+ hp = NULL;
+ col = -1;
+ }
+ return hp;
+#endif
+}
+
+// load a munched word list and build a hash table on the fly
+int HashMgr::load_tables(FILE* t_handle)
+{
+#ifndef HUNSPELL_CHROME_CLIENT
+ int wl, al;
+ char * ap;
+ char * dp;
+ unsigned short * flags;
+
+ // raw dictionary - munched file
+ FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r");
+ if (rawdict == NULL) return 1;
+ fseek(rawdict, 0, SEEK_SET);
+
+ // first read the first line of file to get hash table size */
+ char ts[MAXDELEN];
+ if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
+ mychomp(ts);
+
+ /* remove byte order mark */
+ if (strncmp(ts,"\xef\xbb\xbf",3) == 0) {
+ memmove(ts, ts+3, strlen(ts+3)+1);
+ HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
+ }
+
+ if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");
+ tablesize = atoi(ts);
+ if (!tablesize) return 4;
+ tablesize = tablesize + 5 + USERWORD;
+ if ((tablesize %2) == 0) tablesize++;
+
+ // allocate the hash table
+ tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
+ if (! tableptr) return 3;
+ for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;
+
+ // loop through all words on much list and add to hash
+ // table and create word and affix strings
+
+ while (fgets(ts,MAXDELEN-1,rawdict)) {
+ mychomp(ts);
+ // split each line into word and morphological description
+ dp = strchr(ts,'\t');
+
+ if (dp) {
+ *dp = '\0';
+ dp++;
+ } else {
+ dp = NULL;
+ }
+
+ // split each line into word and affix char strings
+ // "\/" signs slash in words (not affix separator)
+ // "/" at beginning of the line is word character (not affix separator)
+ ap = strchr(ts,'/');
+ while (ap) {
+ if (ap == ts) {
+ ap++;
+ continue;
+ } else if (*(ap - 1) != '\\') break;
+ // replace "\/" with "/"
+ for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
+ ap = strchr(ap,'/');
+ }
+
+ if (ap) {
+ *ap = '\0';
+ if (aliasf) {
+ int index = atoi(ap + 1);
+ al = get_aliasf(index, &flags);
+ if (!al) {
+ HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts);
+ *ap = '\0';
+ }
+ } else {
+ al = decode_flags(&flags, ap + 1);
+ flag_qsort(flags, 0, al);
+ }
+ } else {
+ al = 0;
+ ap = NULL;
+ flags = NULL;
+ }
+
+ wl = strlen(ts);
+
+ // add the word and its index
+ if (add_word(ts,wl,flags,al,dp)) return 5;
+
+ }
+
+ fclose(rawdict);
+#endif
+ return 0;
+}
+
+
+// the hash function is a simple load and rotate
+// algorithm borrowed
+
+int HashMgr::hash(const char * word) const
+{
+#ifdef HUNSPELL_CHROME_CLIENT
+ return 0;
+#else
+ long hv = 0;
+ for (int i=0; i < 4 && *word != 0; i++)
+ hv = (hv << 8) | (*word++);
+ while (*word != 0) {
+ ROTATE(hv,ROTATE_LEN);
+ hv ^= (*word++);
+ }
+ return (unsigned long) hv % tablesize;
+#endif
+}
+
+int HashMgr::decode_flags(unsigned short ** result, char * flags) {
+ int len;
+ switch (flag_mode) {
+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+ len = strlen(flags);
+ if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG flagvector is odd: %s\n", flags);
+ len = len/2;
+ *result = (unsigned short *) malloc(len * sizeof(short));
+ for (int i = 0; i < len; i++) {
+ (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
+ }
+ break;
+ }
+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
+ len = 1;
+ char * src = flags;
+ unsigned short * dest;
+ char * p;
+ for (p = flags; *p; p++) {
+ if (*p == ',') len++;
+ }
+ *result = (unsigned short *) malloc(len * sizeof(short));
+ dest = *result;
+ for (p = flags; *p; p++) {
+ if (*p == ',') {
+ *dest = (unsigned short) atoi(src);
+ if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ src = p + 1;
+ dest++;
+ }
+ }
+ *dest = (unsigned short) atoi(src);
+ if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ break;
+ }
+ case FLAG_UNI: { // UTF-8 characters
+ w_char w[MAXDELEN/2];
+ len = u8_u16(w, MAXDELEN/2, flags);
+ *result = (unsigned short *) malloc(len * sizeof(short));
+ memcpy(*result, w, len * sizeof(short));
+ break;
+ }
+ default: { // Ispell's one-character flags (erfg -> e r f g)
+ unsigned short * dest;
+ len = strlen(flags);
+ *result = (unsigned short *) malloc(len * sizeof(short));
+ dest = *result;
+ for (unsigned char * p = (unsigned char *) flags; *p; p++) {
+ *dest = (unsigned short) *p;
+ dest++;
+ }
+ }
+ }
+ return len;
+}
+
+unsigned short HashMgr::decode_flag(const char * f) {
+ unsigned short s = 0;
+ switch (flag_mode) {
+ case FLAG_LONG:
+ s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
+ break;
+ case FLAG_NUM:
+ s = (unsigned short) atoi(f);
+ break;
+ case FLAG_UNI:
+ u8_u16((w_char *) &s, 1, f);
+ break;
+ default:
+ s = (unsigned short) *((unsigned char *)f);
+ }
+ if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ return s;
+}
+
+char * HashMgr::encode_flag(unsigned short f) {
+ unsigned char ch[10];
+ if (f==0) return mystrdup("(NULL)");
+ if (flag_mode == FLAG_LONG) {
+ ch[0] = (unsigned char) (f >> 8);
+ ch[1] = (unsigned char) (f - ((f >> 8) << 8));
+ ch[2] = '\0';
+ } else if (flag_mode == FLAG_NUM) {
+ sprintf((char *) ch, "%d", f);
+ } else if (flag_mode == FLAG_UNI) {
+ u16_u8((char *) &ch, 10, (w_char *) &f, 1);
+ } else {
+ ch[0] = (unsigned char) (f);
+ ch[1] = '\0';
+ }
+ return mystrdup((char *) ch);
+}
+
+#ifdef HUNSPELL_CHROME_CLIENT
+int HashMgr::load_config()
+{
+ utf8 = 1; // We always use UTF-8.
+
+ // Read in all the AF lines which tell us the rules for each affix group ID.
+ char line[MAXDELEN+1];
+ hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
+ while (iterator.AdvanceAndCopy(line, MAXDELEN)) {
+ int rv = parse_aliasf(line, &iterator);
+ if (rv)
+ return rv;
+ }
+
+ // Read in the regular commands from the affix file. We only care about the
+ // IGNORE line here. The rest of the commands will be read by the affix
+ // manager.
+ iterator = bdict_reader->GetOtherLineIterator();
+ while (iterator.AdvanceAndCopy(line, MAXDELEN)) {
+ // Parse in the ignored characters (for example, Arabic optional
+ // diacritics characters.
+ if (strncmp(line,"IGNORE",6) == 0) {
+ parse_array(line, &ignorechars, &ignorechars_utf16,
+ &ignorechars_utf16_len, "IGNORE", utf8);
+ break; // All done.
+ }
+ }
+
+ return 0;
+}
+#else
+// read in aff file and set flag mode
+int HashMgr::load_config(FILE* aff_handle)
+{
+ int firstline = 1;
+
+ // io buffers
+ char line[MAXDELEN+1];
+
+ // open the affix file
+ FILE * afflst;
+ afflst = _fdopen(_dup(_fileno(aff_handle)), "r");
+ if (!afflst) {
+ HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");
+ return 1;
+ }
+ fseek(afflst, 0, SEEK_SET);
+
+ // read in each line ignoring any that do not
+ // start with a known line type indicator
+
+ while (fgets(line,MAXDELEN,afflst)) {
+ mychomp(line);
+
+ /* remove byte order mark */
+ if (firstline) {
+ firstline = 0;
+ if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(line+3)+1);
+ }
+
+ /* parse in the try string */
+ if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
+ if (flag_mode != FLAG_CHAR) {
+ HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n");
+ }
+ if (strstr(line, "long")) flag_mode = FLAG_LONG;
+ if (strstr(line, "num")) flag_mode = FLAG_NUM;
+ if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
+ if (flag_mode == FLAG_CHAR) {
+ HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line);
+ }
+ }
+ if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UTF-8")) utf8 = 1;
+
+ /* parse in the ignored characters (for example, Arabic optional diacritics characters */
+ if (strncmp(line,"IGNORE",6) == 0) {
+ if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
+ fclose(afflst);
+ return 1;
+ }
+ }
+
+ if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
+ if (parse_aliasf(line, afflst)) {
+ fclose(afflst);
+ return 1;
+ }
+ }
+
+#ifdef HUNSPELL_EXPERIMENTAL
+ if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
+ if (parse_aliasm(line, afflst)) {
+ fclose(afflst);
+ return 1;
+ }
+ }
+#endif
+ if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
+ if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
+ }
+ fclose(afflst);
+ return 0;
+}
+#endif // HUNSPELL_CHROME_CLIENT
+
+/* parse in the ALIAS table */
+#ifdef HUNSPELL_CHROME_CLIENT
+int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator)
+{
+#else
+int HashMgr::parse_aliasf(char * line, FILE * af)
+{
+#endif
+ if (numaliasf != 0) {
+ HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n");
+ return 1;
+ }
+ char * tp = line;
+ char * piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: { np++; break; }
+ case 1: {
+ numaliasf = atoi(piece);
+ if (numaliasf < 1) {
+ numaliasf = 0;
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n");
+ free(piece);
+ return 1;
+ }
+ aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
+ aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
+ if (!aliasf || !aliasflen) {
+ numaliasf = 0;
+ if (aliasf) free(aliasf);
+ if (aliasflen) free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ return 1;
+ }
+ np++;
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ numaliasf = 0;
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: missing AF table information\n");
+ return 1;
+ }
+
+ /* now parse the numaliasf lines to read in the remainder of the table */
+ char * nl = line;
+ for (int j=0; j < numaliasf; j++) {
+#ifdef HUNSPELL_CHROME_CLIENT
+ if (!iterator->AdvanceAndCopy(nl, MAXDELEN))
+ return 1;
+#else
+ if (!fgets(nl,MAXDELEN,af)) return 1;
+#endif
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ aliasf[j] = NULL;
+ aliasflen[j] = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: {
+ if (strncmp(piece,"AF",2) != 0) {
+ numaliasf = 0;
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");
+ free(piece);
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece);
+ flag_qsort(aliasf[j], 0, aliasflen[j]);
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ piece = mystrsep(&tp, 0);
+ }
+ if (!aliasf[j]) {
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ numaliasf = 0;
+ HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#ifdef HUNSPELL_CHROME_CLIENT
+hentry* HashMgr::AffixIDsToHentry(char* word,
+ int* affix_ids, int affix_count) const
+{
+ if (affix_count == 0)
+ return NULL;
+
+ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
+ std::string std_word(word);
+ HEntryCache::iterator found = cache.find(std_word);
+ if (found != cache.end()) {
+ // We must return an existing hentry for the same word if we've previously
+ // handed one out. Hunspell will compare pointers in some cases to see if
+ // two words it has found are the same.
+ return found->second;
+ }
+
+ short word_len = static_cast<short>(strlen(word));
+
+ // We can get a number of prefixes per word. There will normally be only one,
+ // but if not, there will be a linked list of "hentry"s for the "homonym"s
+ // for the word.
+ struct hentry* first_he = NULL;
+ struct hentry* prev_he = NULL; // For making linked list.
+ for (int i = 0; i < affix_count; i++) {
+ struct hentry* he = new hentry;
+ if (i == 0)
+ first_he = he;
+ he->word = word;
+ he->wlen = word_len;
+ he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i],
+ &he->astr);
+ he->next = NULL;
+ he->next_homonym = NULL;
+ if (prev_he)
+ prev_he->next_homonym = he;
+ prev_he = he;
+ }
+
+ cache[std_word] = first_he; // Save this word in the cache for later.
+ return first_he;
+}
+
+#endif
+
+int HashMgr::is_aliasf() {
+ return (aliasf != NULL);
+}
+
+int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
+ if ((index > 0) && (index <= numaliasf)) {
+ *fvec = aliasf[index - 1];
+ return aliasflen[index - 1];
+ }
+ HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index);
+ *fvec = NULL;
+ return 0;
+}
+
+#ifdef HUNSPELL_EXPERIMENTAL
+/* parse morph alias definitions */
+int HashMgr::parse_aliasm(char * line, FILE * af)
+{
+ if (numaliasm != 0) {
+ HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n");
+ return 1;
+ }
+ char * tp = line;
+ char * piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: { np++; break; }
+ case 1: {
+ numaliasm = atoi(piece);
+ if (numaliasm < 1) {
+ HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n");
+ free(piece);
+ return 1;
+ }
+ aliasm = (char **) malloc(numaliasm * sizeof(char *));
+ if (!aliasm) {
+ numaliasm = 0;
+ return 1;
+ }
+ np++;
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: missing AM alias information\n");
+ return 1;
+ }
+
+ /* now parse the numaliasm lines to read in the remainder of the table */
+ char * nl = line;
+ for (int j=0; j < numaliasm; j++) {
+ if (!fgets(nl,MAXDELEN,af)) return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ aliasm[j] = NULL;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: {
+ if (strncmp(piece,"AM",2) != 0) {
+ HUNSPELL_WARNING(stderr, "error: AM table is corrupt\n");
+ free(piece);
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ if (complexprefixes) {
+ if (utf8) reverseword_utf(piece);
+ else reverseword(piece);
+ }
+ aliasm[j] = mystrdup(piece);
+ break; }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ piece = mystrsep(&tp, 0);
+ }
+ if (!aliasm[j]) {
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int HashMgr::is_aliasm() {
+ return (aliasm != NULL);
+}
+
+char * HashMgr::get_aliasm(int index) {
+ if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
+ HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
+ return NULL;
+}
+#endif
diff --git a/chrome/third_party/hunspell/src/hunspell/hashmgr.hxx b/chrome/third_party/hunspell/src/hunspell/hashmgr.hxx index 9290e90..f247f80 100644 --- a/chrome/third_party/hunspell/src/hunspell/hashmgr.hxx +++ b/chrome/third_party/hunspell/src/hunspell/hashmgr.hxx @@ -6,6 +6,7 @@ #ifdef HUNSPELL_CHROME_CLIENT #include "chrome/third_party/hunspell/google/bdict_reader.h" +#include "base/string_piece.h" #include <string> #include <map> #endif @@ -17,6 +18,8 @@ class HashMgr #ifdef HUNSPELL_CHROME_CLIENT // Not owned by this class, owned by the Hunspell object. hunspell::BDictReader* bdict_reader; + std::map<StringPiece, struct hentry *> custom_word_to_hentry_map_; + std::vector<std::string*> pointer_to_strings_; #endif int tablesize; struct hentry * tableptr; |