summaryrefslogtreecommitdiffstats
path: root/chrome/tools/convert_dict/hunspell_reader.cc
diff options
context:
space:
mode:
authorhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-02-02 10:02:26 +0000
committerhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-02-02 10:02:26 +0000
commitbbffa669691c4d2f9f1ab8f95226171be7b2dd04 (patch)
tree0336c36be3524514fe5ab0f2d7341a6ae270877d /chrome/tools/convert_dict/hunspell_reader.cc
parent4f4c43ca4eed4bff261f6e4ff760a02455ef50aa (diff)
downloadchromium_src-bbffa669691c4d2f9f1ab8f95226171be7b2dd04.zip
chromium_src-bbffa669691c4d2f9f1ab8f95226171be7b2dd04.tar.gz
chromium_src-bbffa669691c4d2f9f1ab8f95226171be7b2dd04.tar.bz2
The first step towards supporting the Hungarian spell-checking dictionary.
This change fixes a couple of problems needed for using a Hungarian dictionary in Chrome. 1. Use TrimWhitespace() in TrimLine() Sorry, this is caused by my mistake that used TrimWhiteSpaceUTF8() without checking it deeply. 2. Replace morphing rules with compound rules. it seems existing Hungarian dictionaries use (language-specific) morphing rules to handle words that have both prefixes and suffixes, e.g. "legjobb" (best). It is better to replace such (language-dependent) morphing rules with (language-independent) compound rules to avoid language-specific issues. (As far as I tested, this change fixes many quality problems caused by Hungarian compounds.) This change also adds simple tests for our dictionary converter. BUG=15558 TEST=unit_test --gtest_filter=ConvertDictTest* Review URL: http://codereview.chromium.org/553087 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@37816 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/tools/convert_dict/hunspell_reader.cc')
-rw-r--r--chrome/tools/convert_dict/hunspell_reader.cc8
1 files changed, 4 insertions, 4 deletions
diff --git a/chrome/tools/convert_dict/hunspell_reader.cc b/chrome/tools/convert_dict/hunspell_reader.cc
index b573b1c..d197c4d 100644
--- a/chrome/tools/convert_dict/hunspell_reader.cc
+++ b/chrome/tools/convert_dict/hunspell_reader.cc
@@ -20,10 +20,10 @@ void TrimLine(std::string* line) {
static_cast<unsigned char>((*line)[2]) == 0xbf)
*line = line->substr(3);
- std::wstring line_input_wide = UTF8ToWide(*line);
- std::wstring line_output_wide;
- TrimWhitespace(line_input_wide, TRIM_ALL, &line_output_wide);
- *line = WideToUTF8(line_output_wide);
+ // Treat this text as an ASCII text and trim whitespace characters as
+ // hunspell does. The returned text is to be converted into UTF-8 text with
+ // the encoding defined in an affix file.
+ TrimWhitespace(*line, TRIM_ALL, line);
}
std::string ReadLine(FILE* file) {