1 files changed, 38 insertions, 17 deletions
diff --git a/chrome/tools/convert_dict/aff_reader.cc b/chrome/tools/convert_dict/aff_reader.cc
index b3b0381..b24a0d8 100644
--- a/chrome/tools/convert_dict/aff_reader.cc
+++ b/chrome/tools/convert_dict/aff_reader.cc
@@ -122,7 +122,7 @@ bool AffReader::Read() {
       exit(1);
     } else if (StringBeginsWith(line, "COMPLEXPREFIXES ")) {
       printf("We don't support the COMPLEXPREFIXES command yet. This would "
-        "mean we have to insert words backwords as well (I think)\n");
+        "mean we have to insert words backwards as well (I think)\n");
       exit(1);
     } else {
       // All other commands get stored in the other commands list.
@@ -241,7 +241,7 @@ void AffReader::AddAffix(std::string* rule) {
           // so that means that this prefix would be a compound one.
           //
           // It expects these rules to use the same alias rules as the .dic
-          // file. We've forced it to use aliases, which is a numberical index
+          // file. We've forced it to use aliases, which is a numerical index
           // instead of these character flags, and this needs to be consistent.
 
           std::string before_flags = part.substr(0, slash_index + 1);
@@ -250,13 +250,21 @@ void AffReader::AddAffix(std::string* rule) {
           // that tells us what to strip.
           std::vector<std::string> after_slash;
           base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);
-          if (after_slash.size() < 2) {
-            // Note that we may get a third term here which is the
-            // morphological description of this rule. This happens in the tests
-            // only, so we can just ignore it.
-            printf("ERROR: Didn't get enough after the slash\n");
+          if (after_slash.size() == 0) {
+            printf("ERROR: Found 0 terms after slash in affix rule '%s', "
+                      "but need at least 2.\n",
+                   part.c_str());
             return;
           }
+          if (after_slash.size() == 1) {
+            printf("WARNING: Found 1 term after slash in affix rule '%s', "
+                      "but expected at least 2. Adding '.'.\n",
+                   part.c_str());
+            after_slash.push_back(".");
+          }
+          // Note that we may get a third term here which is the morphological
+          // description of this rule. This happens in the tests only, so we can
+          // just ignore it.
 
           part = base::StringPrintf("%s%d %s",
                                     before_flags.c_str(),
@@ -266,8 +274,11 @@ void AffReader::AddAffix(std::string* rule) {
 
         // Reencode from here
         std::string reencoded;
-        if (!EncodingToUTF8(part, &reencoded))
+        if (!EncodingToUTF8(part, &reencoded)) {
+          printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",
+                 part.c_str());
           break;
+        }
 
         *rule = rule->substr(0, part_start) + reencoded;
         break;
@@ -283,19 +294,26 @@ void AffReader::AddAffix(std::string* rule) {
 
 void AffReader::AddReplacement(std::string* rule) {
   TrimLine(rule);
+  CollapseDuplicateSpaces(rule);
 
   std::string utf8rule;
-  if (!EncodingToUTF8(*rule, &utf8rule))
+  if (!EncodingToUTF8(*rule, &utf8rule)) {
+    printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",
+           rule->c_str());
     return;
+  }
 
-  std::vector<std::string> split;
-  base::SplitString(utf8rule, ' ', &split);
-
-  // There should be two parts.
-  if (split.size() != 2)
+  // The first space separates key and value.
+  size_t space_index = utf8rule.find(' ');
+  if (space_index == std::string::npos) {
+    printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());
     return;
+  }
+  std::vector<std::string> split;
+  split.push_back(utf8rule.substr(0, space_index));
+  split.push_back(utf8rule.substr(space_index + 1));
 
-  // Underscores are used to represent spaces
+  // Underscores are used to represent spaces in most aff files
   // (since the line is parsed on spaces).
   std::replace(split[0].begin(), split[0].end(), '_', ' ');
   std::replace(split[1].begin(), split[1].end(), '_', ' ');
@@ -309,8 +327,11 @@ void AffReader::HandleRawCommand(const std::string& line) {
 
 void AffReader::HandleEncodedCommand(const std::string& line) {
   std::string utf8;
-  if (EncodingToUTF8(line, &utf8))
-    other_commands_.push_back(utf8);
+  if (!EncodingToUTF8(line, &utf8)) {
+    printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());
+    return;
+  }
+  other_commands_.push_back(utf8);
 }
 
 }  // namespace convert_dict