3 files changed, 826 insertions, 0 deletions
diff --git a/chrome/browser/chromeos/input_method/input_method_util.cc b/chrome/browser/chromeos/input_method/input_method_util.cc
new file mode 100644
index 0000000..9d7cf1e
--- /dev/null
+++ b/chrome/browser/chromeos/input_method/input_method_util.cc
@@ -0,0 +1,503 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/chromeos/input_method/input_method_util.h"
+
+#include <map>
+#include <utility>
+
+#include "app/l10n_util.h"
+#include "app/l10n_util_collator.h"
+#include "base/basictypes.h"
+#include "base/hash_tables.h"
+#include "base/scoped_ptr.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/chromeos/cros/cros_library.h"
+#include "chrome/browser/chromeos/cros/keyboard_library.h"
+#include "grit/generated_resources.h"
+#include "third_party/icu/public/common/unicode/uloc.h"
+
+namespace {
+
+// Map from language code to associated input method IDs.
+typedef std::multimap<std::string, std::string> LanguageCodeToIdsMap;
+LanguageCodeToIdsMap* g_language_code_to_ids_map = NULL;
+
+const struct EnglishToResouceId {
+  const char* english_string_from_ibus;
+  int resource_id;
+} kEnglishToResourceIdArray[] = {
+  // For ibus-mozc: third_party/ibus-mozc/files/src/unix/ibus/.
+  { "Direct input", IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_DIRECT_INPUT },
+  { "Hiragana", IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_HIRAGANA },
+  { "Katakana", IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_KATAKANA },
+  { "Half width katakana",  // small k is not a typo.
+    IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_HALF_WIDTH_KATAKANA },
+  { "Latin", IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_LATIN },
+  { "Wide Latin", IDS_STATUSBAR_IME_JAPANESE_IME_STATUS_WIDE_LATIN },
+
+  // For ibus-hangul: third_party/ibus-hangul/files/po/.
+  { "Enable/Disable Hanja mode", IDS_STATUSBAR_IME_KOREAN_HANJA_MODE },
+
+  // For ibus-pinyin: third_party/ibus-pinyin/files/po/.
+  { "Chinese", IDS_STATUSBAR_IME_CHINESE_PINYIN_TOGGLE_CHINESE_ENGLISH },
+  { "Full/Half width",
+    IDS_STATUSBAR_IME_CHINESE_PINYIN_TOGGLE_FULL_HALF },
+  { "Full/Half width punctuation",
+    IDS_STATUSBAR_IME_CHINESE_PINYIN_TOGGLE_FULL_HALF_PUNCTUATION },
+  { "Simplfied/Traditional Chinese",
+    IDS_STATUSBAR_IME_CHINESE_PINYIN_TOGGLE_S_T_CHINESE },
+
+  // TODO(yusukes): Support ibus-chewing and ibus-table-* if needed.
+
+  // For the "Languages and Input" dialog.
+  { "kbd (m17n)", IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_STANDARD_INPUT_METHOD },
+  { "itrans (m17n)",  // also uses the "STANDARD_INPUT_METHOD" id.
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_STANDARD_INPUT_METHOD },
+  { "cangjie (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_CHINESE_CANGJIE_INPUT_METHOD },
+  { "quick (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_CHINESE_QUICK_INPUT_METHOD },
+  { "isiri (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_PERSIAN_ISIRI_2901_INPUT_METHOD },
+  { "kesmanee (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_THAI_KESMANEE_INPUT_METHOD },
+  { "tis820 (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_THAI_TIS820_INPUT_METHOD },
+  { "pattachote (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_THAI_PATTACHOTE_INPUT_METHOD },
+  { "tcvn (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_VIETNAMESE_TCVN_INPUT_METHOD },
+  { "telex (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_VIETNAMESE_TELEX_INPUT_METHOD },
+  { "viqr (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_VIETNAMESE_VIQR_INPUT_METHOD },
+  { "vni (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_VIETNAMESE_VNI_INPUT_METHOD },
+  { "latn-post (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_LATIN_POST_INPUT_METHOD },
+  { "latn-pre (m17n)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_M17N_LATIN_PRE_INPUT_METHOD },
+  { "Bopomofo", IDS_OPTIONS_SETTINGS_LANGUAGES_BOPOMOFO_INPUT_METHOD },
+  { "Chewing", IDS_OPTIONS_SETTINGS_LANGUAGES_CHEWING_INPUT_METHOD },
+  { "Pinyin", IDS_OPTIONS_SETTINGS_LANGUAGES_PINYIN_INPUT_METHOD },
+  { "Mozc (US keyboard layout)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_JAPANESE_MOZC_US_INPUT_METHOD },
+  { "Mozc (Japanese keyboard layout)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_JAPANESE_MOZC_JP_INPUT_METHOD },
+  { "Google Japanese Input (US keyboard layout)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_JAPANESE_GOOGLE_US_INPUT_METHOD },
+  { "Google Japanese Input (Japanese keyboard layout)",
+    IDS_OPTIONS_SETTINGS_LANGUAGES_JAPANESE_GOOGLE_JP_INPUT_METHOD },
+  { "Korean", IDS_OPTIONS_SETTINGS_LANGUAGES_KOREAN_INPUT_METHOD },
+
+  // For ibus-xkb-layouts engine: third_party/ibus-xkb-layouts/files
+  { "Japan", IDS_STATUSBAR_LAYOUT_JAPAN },
+  { "Slovenia", IDS_STATUSBAR_LAYOUT_SLOVENIA },
+  { "Germany", IDS_STATUSBAR_LAYOUT_GERMANY },
+  { "Italy", IDS_STATUSBAR_LAYOUT_ITALY },
+  { "Estonia", IDS_STATUSBAR_LAYOUT_ESTONIA },
+  { "Hungary", IDS_STATUSBAR_LAYOUT_HUNGARY },
+  { "Poland", IDS_STATUSBAR_LAYOUT_POLAND },
+  { "Denmark", IDS_STATUSBAR_LAYOUT_DENMARK },
+  { "Croatia", IDS_STATUSBAR_LAYOUT_CROATIA },
+  { "Brazil", IDS_STATUSBAR_LAYOUT_BRAZIL },
+  { "Serbia", IDS_STATUSBAR_LAYOUT_SERBIA },
+  { "Czechia", IDS_STATUSBAR_LAYOUT_CZECHIA },
+  { "USA - Dvorak", IDS_STATUSBAR_LAYOUT_USA_DVORAK },
+  { "Romania", IDS_STATUSBAR_LAYOUT_ROMANIA },
+  { "USA", IDS_STATUSBAR_LAYOUT_USA },
+  { "Lithuania", IDS_STATUSBAR_LAYOUT_LITHUANIA },
+  { "United Kingdom", IDS_STATUSBAR_LAYOUT_UNITED_KINGDOM },
+  { "Slovakia", IDS_STATUSBAR_LAYOUT_SLOVAKIA },
+  { "Russia", IDS_STATUSBAR_LAYOUT_RUSSIA },
+  { "Greece", IDS_STATUSBAR_LAYOUT_GREECE },
+  { "Belgium", IDS_STATUSBAR_LAYOUT_BELGIUM },
+  { "Bulgaria", IDS_STATUSBAR_LAYOUT_BULGARIA },
+  { "Switzerland", IDS_STATUSBAR_LAYOUT_SWITZERLAND },
+  { "Turkey", IDS_STATUSBAR_LAYOUT_TURKEY },
+  { "Portugal", IDS_STATUSBAR_LAYOUT_PORTUGAL },
+  { "Spain", IDS_STATUSBAR_LAYOUT_SPAIN },
+  { "Finland", IDS_STATUSBAR_LAYOUT_FINLAND },
+  { "Ukraine", IDS_STATUSBAR_LAYOUT_UKRAINE },
+  { "Spain - Catalan variant with middle-dot L",
+    IDS_STATUSBAR_LAYOUT_SPAIN_CATALAN },
+  { "France", IDS_STATUSBAR_LAYOUT_FRANCE },
+  { "Norway", IDS_STATUSBAR_LAYOUT_NORWAY },
+  { "Sweden", IDS_STATUSBAR_LAYOUT_SWEDEN },
+  { "Netherlands", IDS_STATUSBAR_LAYOUT_NETHERLANDS },
+  { "Latvia", IDS_STATUSBAR_LAYOUT_LATVIA },
+};
+const size_t kNumEntries = arraysize(kEnglishToResourceIdArray);
+
+// There are some differences between ISO 639-2 (T) and ISO 639-2 B, and
+// some language codes are not recognized by ICU (i.e. ICU cannot convert
+// these codes to two-letter language codes and display names). Hence we
+// convert these codes to ones that ICU recognize.
+//
+// See http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes for details.
+const char* kIso639VariantMapping[][2] = {
+  {"cze", "ces"},
+  {"ger", "deu"},
+  {"gre", "ell"},
+  // "scr" is not a ISO 639 code. For some reason, evdev.xml uses "scr" as
+  // the language code for Croatian.
+  {"scr", "hrv"},
+  {"rum", "ron"},
+  {"slo", "slk"},
+};
+
+// The list defines pairs of language code and the default input method
+// id. The list is used for reordering input method ids.
+//
+// TODO(satorux): We may need to handle secondary, and ternary input
+// methods, rather than handling the default input method only.
+const struct LanguageDefaultInputMethodId {
+  const char* language_code;
+  const char* input_method_id;
+} kLanguageDefaultInputMethodIds[] = {
+  { "en-US", "xkb:us::eng", },  // US - English
+  { "fr",    "xkb:fr::fra", },  // France - French
+  { "de",    "xkb:de::ger", },  // Germany - German
+};
+
+// The comparator is used for sorting language codes by their
+// corresponding language names, using the ICU collator.
+struct CompareLanguageCodesByLanguageName
+    : std::binary_function<const std::string&, const std::string&, bool> {
+  explicit CompareLanguageCodesByLanguageName(icu::Collator* collator)
+      : collator_(collator) {
+  }
+
+  // Calling GetLanguageDisplayNameFromCode() in the comparator is not
+  // efficient, but acceptable as the function is cheap, and the language
+  // list is short (about 40 at most).
+  bool operator()(const std::string& s1, const std::string& s2) const {
+    const std::wstring key1 =
+        chromeos::input_method::GetLanguageDisplayNameFromCode(s1);
+    const std::wstring key2 =
+        chromeos::input_method::GetLanguageDisplayNameFromCode(s2);
+    return l10n_util::StringComparator<std::wstring>(collator_)(key1, key2);
+  }
+
+  icu::Collator* collator_;
+};
+
+// The comparator is used for sorting input method ids by their
+// corresponding language names, using the ICU collator.
+struct CompareInputMethodIdsByLanguageName
+    : std::binary_function<const std::string&, const std::string&, bool> {
+  CompareInputMethodIdsByLanguageName(
+      icu::Collator* collator,
+      const std::map<std::string, std::string>& id_to_language_code_map)
+      : comparator_(collator),
+        id_to_language_code_map_(id_to_language_code_map) {
+  }
+
+  bool operator()(const std::string& s1, const std::string& s2) const {
+    std::string language_code_1;
+    std::map<std::string, std::string>::const_iterator iter =
+        id_to_language_code_map_.find(s1);
+    if (iter != id_to_language_code_map_.end()) {
+      language_code_1 = iter->second;
+    }
+    std::string language_code_2;
+    iter = id_to_language_code_map_.find(s2);
+    if (iter != id_to_language_code_map_.end()) {
+      language_code_2 = iter->second;
+    }
+    return comparator_(language_code_1, language_code_2);
+  }
+
+  const CompareLanguageCodesByLanguageName comparator_;
+  const std::map<std::string, std::string>& id_to_language_code_map_;
+};
+
+bool GetLocalizedString(
+    const std::string& english_string, string16 *out_string) {
+  DCHECK(out_string);
+  typedef base::hash_map<std::string, int> HashType;
+  static HashType* english_to_resource_id = NULL;
+
+  // Initialize the map if needed.
+  if (!english_to_resource_id) {
+    // We don't free this map.
+    english_to_resource_id = new HashType(kNumEntries);
+    for (size_t i = 0; i < kNumEntries; ++i) {
+      const bool result = english_to_resource_id->insert(
+          std::make_pair(kEnglishToResourceIdArray[i].english_string_from_ibus,
+                         kEnglishToResourceIdArray[i].resource_id)).second;
+      DCHECK(result) << "Duplicated string is found: "
+                     << kEnglishToResourceIdArray[i].english_string_from_ibus;
+    }
+  }
+
+  HashType::const_iterator iter = english_to_resource_id->find(english_string);
+  if (iter == english_to_resource_id->end()) {
+    LOG(ERROR) << "Resouce ID is not found for: " << english_string;
+    return false;
+  }
+
+  *out_string = l10n_util::GetStringUTF16(iter->second);
+  return true;
+};
+
+// Initializes |g_language_code_to_ids_map| if necessary.
+// Returns true on success. If this function returns true, it is guaranteed
+// |g_language_code_to_ids_map| is non-NULL. The function might return false
+// when ibus-daemon is not ready.
+bool InitializeLanguageCodeToIdsMap() {
+  if (g_language_code_to_ids_map) {
+    return true;
+  }
+
+  chromeos::InputMethodLibrary* library =
+      chromeos::CrosLibrary::Get()->GetInputMethodLibrary();
+  scoped_ptr<chromeos::InputMethodDescriptors> supported_input_methods(
+      library->GetSupportedInputMethods());
+  if (supported_input_methods->size() <= 1) {
+    // TODO(yusukes): Handle this error in nicer way.
+    LOG(ERROR) << "GetSupportedInputMethods returned a fallback ID";
+    return false;
+  }
+
+  g_language_code_to_ids_map = new LanguageCodeToIdsMap;
+  for (size_t i = 0; i < supported_input_methods->size(); ++i) {
+    const std::string language_code =
+        chromeos::input_method::GetLanguageCodeFromDescriptor(
+            supported_input_methods->at(i));
+    const std::string& input_method_id = supported_input_methods->at(i).id;
+    g_language_code_to_ids_map->insert(
+        std::make_pair(language_code, input_method_id));
+  }
+  // Go through the languages listed in kExtraLanguages.
+  using chromeos::input_method::kExtraLanguages;
+  for (size_t i = 0; i < arraysize(kExtraLanguages); ++i) {
+    const char* language_code = kExtraLanguages[i].language_code;
+    const char* input_method_id = kExtraLanguages[i].input_method_id;
+    g_language_code_to_ids_map->insert(
+        std::make_pair(language_code, input_method_id));
+  }
+  return true;
+}
+
+}  // namespace
+
+namespace chromeos {
+namespace input_method {
+
+std::wstring GetString(const std::string& english_string) {
+  string16 localized_string;
+  if (GetLocalizedString(english_string, &localized_string)) {
+    return UTF16ToWide(localized_string);
+  }
+  return UTF8ToWide(english_string);
+}
+
+std::string GetStringUTF8(const std::string& english_string) {
+  string16 localized_string;
+  if (GetLocalizedString(english_string, &localized_string)) {
+    return UTF16ToUTF8(localized_string);
+  }
+  return english_string;
+}
+
+string16 GetStringUTF16(const std::string& english_string) {
+  string16 localized_string;
+  if (GetLocalizedString(english_string, &localized_string)) {
+    return localized_string;
+  }
+  return UTF8ToUTF16(english_string);
+}
+
+bool StringIsSupported(const std::string& english_string) {
+  string16 localized_string;
+  return GetLocalizedString(english_string, &localized_string);
+}
+
+std::string NormalizeLanguageCode(
+    const std::string& language_code) {
+  // Some ibus engines return locale codes like "zh_CN" as language codes.
+  // Normalize these to like "zh-CN".
+  if (language_code.size() >= 5 && language_code[2] == '_') {
+    std::string copied_language_code = language_code;
+    copied_language_code[2] = '-';
+    // Downcase the language code part.
+    for (size_t i = 0; i < 2; ++i) {
+      copied_language_code[i] = ToLowerASCII(copied_language_code[i]);
+    }
+    // Upcase the country code part.
+    for (size_t i = 3; i < copied_language_code.size(); ++i) {
+      copied_language_code[i] = ToUpperASCII(copied_language_code[i]);
+    }
+    return copied_language_code;
+  }
+  // We only handle three-letter codes from here.
+  if (language_code.size() != 3) {
+    return language_code;
+  }
+
+  // Convert special language codes. See comments at kIso639VariantMapping.
+  std::string copied_language_code = language_code;
+  for (size_t i = 0; i < arraysize(kIso639VariantMapping); ++i) {
+    if (language_code == kIso639VariantMapping[i][0]) {
+      copied_language_code = kIso639VariantMapping[i][1];
+    }
+  }
+  // Convert the three-letter code to two letter-code.
+  UErrorCode error = U_ZERO_ERROR;
+  char two_letter_code[ULOC_LANG_CAPACITY];
+  uloc_getLanguage(copied_language_code.c_str(),
+                   two_letter_code, sizeof(two_letter_code), &error);
+  if (U_FAILURE(error)) {
+    return language_code;
+  }
+  return two_letter_code;
+}
+
+bool IsKeyboardLayout(const std::string& input_method_id) {
+  const bool kCaseInsensitive = false;
+  return StartsWithASCII(input_method_id, "xkb:", kCaseInsensitive);
+}
+
+std::string GetLanguageCodeFromDescriptor(
+    const InputMethodDescriptor& descriptor) {
+  // Handle some Chinese input methods as zh-CN/zh-TW, rather than zh.
+  // TODO: we should fix this issue in engines rather than here.
+  if (descriptor.language_code == "zh") {
+    if (descriptor.id == "pinyin") {
+      return "zh-CN";
+    } else if (descriptor.id == "bopomofo" ||
+               descriptor.id == "chewing" ||
+               descriptor.id == "m17n:zh:cangjie" ||
+               descriptor.id == "m17n:zh:quick") {
+      return "zh-TW";
+    }
+  }
+
+  std::string language_code = NormalizeLanguageCode(descriptor.language_code);
+
+  // Add country codes to language codes of some XKB input methods to make
+  // these compatible with Chrome's application locale codes like "en-US".
+  // TODO(satorux): Maybe we need to handle "es" for "es-419".
+  // TODO: We should not rely on the format of the engine name. Should we add
+  //       |country_code| in InputMethodDescriptor?
+  if (IsKeyboardLayout(descriptor.id) &&
+      (language_code == "en" ||
+       language_code == "zh" ||
+       language_code == "pt")) {
+    std::vector<std::string> portions;
+    SplitString(descriptor.id, ':', &portions);
+    if (portions.size() >= 2 && !portions[1].empty()) {
+      language_code.append("-");
+      language_code.append(StringToUpperASCII(portions[1]));
+    }
+  }
+  return language_code;
+}
+
+std::wstring MaybeRewriteLanguageName(const std::wstring& language_name) {
+  // "t" is used as the language code for input methods that don't fall
+  // under any other languages.
+  if (language_name == L"t") {
+    return l10n_util::GetString(
+        IDS_OPTIONS_SETTINGS_LANGUAGES_OTHERS);
+  }
+  return language_name;
+}
+
+std::wstring GetLanguageDisplayNameFromCode(const std::string& language_code) {
+  if (!g_browser_process) {
+    return L"";
+  }
+  return MaybeRewriteLanguageName(UTF16ToWide(
+      l10n_util::GetDisplayNameForLocale(
+          language_code, g_browser_process->GetApplicationLocale(),
+          true)));
+}
+
+void SortLanguageCodesByNames(std::vector<std::string>* language_codes) {
+  if (!g_browser_process) {
+    return;
+  }
+  // We should build collator outside of the comparator. We cannot have
+  // scoped_ptr<> in the comparator for a subtle STL reason.
+  UErrorCode error = U_ZERO_ERROR;
+  icu::Locale locale(g_browser_process->GetApplicationLocale().c_str());
+  scoped_ptr<icu::Collator> collator(
+      icu::Collator::createInstance(locale, error));
+  if (U_FAILURE(error)) {
+    collator.reset();
+  }
+  std::sort(language_codes->begin(), language_codes->end(),
+            CompareLanguageCodesByLanguageName(collator.get()));
+}
+
+void SortInputMethodIdsByNames(
+    const std::map<std::string, std::string>& id_to_language_code_map,
+    std::vector<std::string>* input_method_ids) {
+  if (!g_browser_process) {
+    return;
+  }
+  UErrorCode error = U_ZERO_ERROR;
+  icu::Locale locale(g_browser_process->GetApplicationLocale().c_str());
+  scoped_ptr<icu::Collator> collator(
+      icu::Collator::createInstance(locale, error));
+  if (U_FAILURE(error)) {
+    collator.reset();
+  }
+  std::stable_sort(input_method_ids->begin(), input_method_ids->end(),
+                   CompareInputMethodIdsByLanguageName(
+                       collator.get(), id_to_language_code_map));
+}
+
+void ReorderInputMethodIdsForLanguageCode(
+    const std::string& language_code,
+    std::vector<std::string>* input_method_ids) {
+  for (size_t i = 0; i < arraysize(kLanguageDefaultInputMethodIds); ++i) {
+    if (language_code == kLanguageDefaultInputMethodIds[i].language_code) {
+      std::vector<std::string>::iterator iter =
+          std::find(input_method_ids->begin(), input_method_ids->end(),
+                    kLanguageDefaultInputMethodIds[i].input_method_id);
+      // If it's not on the top of |input_method_id|, swap it with the top one.
+      if (iter != input_method_ids->end() &&
+          iter != input_method_ids->begin()) {
+        std::swap(*input_method_ids->begin(), *iter);
+      }
+      break;  // Don't have to check other language codes.
+    }
+  }
+}
+
+bool GetInputMethodIdsByLanguageCode(
+    const std::string& normalized_language_code,
+    bool keyboard_layout_only,
+    std::vector<std::string>* out_input_method_ids) {
+  DCHECK(out_input_method_ids);
+  out_input_method_ids->clear();
+
+  bool result = false;
+  if (InitializeLanguageCodeToIdsMap()) {
+    std::pair<LanguageCodeToIdsMap::const_iterator,
+        LanguageCodeToIdsMap::const_iterator> range =
+        g_language_code_to_ids_map->equal_range(normalized_language_code);
+    for (LanguageCodeToIdsMap::const_iterator iter = range.first;
+         iter != range.second; ++iter) {
+      const std::string& input_method_id = iter->second;
+      if ((!keyboard_layout_only) || IsKeyboardLayout(input_method_id)) {
+        out_input_method_ids->push_back(input_method_id);
+        result = true;
+      }
+    }
+    if (!result) {
+      LOG(ERROR) << "Unknown language code: " << normalized_language_code;
+    }
+  }
+  return result;
+}
+
+}  // namespace input_method
+}  // namespace chromeos
diff --git a/chrome/browser/chromeos/input_method/input_method_util.h b/chrome/browser/chromeos/input_method/input_method_util.h
new file mode 100644
index 0000000..77ebbd3
--- /dev/null
+++ b/chrome/browser/chromeos/input_method/input_method_util.h
@@ -0,0 +1,119 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_CHROMEOS_INPUT_METHOD_INPUT_METHOD_UTIL_H_
+#define CHROME_BROWSER_CHROMEOS_INPUT_METHOD_INPUT_METHOD_UTIL_H_
+
+#include <string>
+#include <vector>
+
+#include "base/string16.h"
+#include "chrome/browser/chromeos/cros/language_library.h"
+
+namespace chromeos {
+namespace input_method {
+
+// The list of language that do not have associated input methods. For
+// these languages, we associate input methods here.
+const struct ExtraLanguage {
+  const char* language_code;
+  const char* input_method_id;
+} kExtraLanguages[] = {
+  { "id", "xkb:us::eng" }, // For Indonesian, use US keyboard layout.
+  // The code "fil" comes from app/l10_util.cc.
+  { "fil", "xkb:us::eng" },  // For Filipino, use US keyboard layout.
+  // The code "es-419" comes from app/l10_util.cc.
+  // For Spanish in Latin America, use Spanish keyboard layout.
+  { "es-419", "xkb:es::spa" },
+};
+// TODO(yusukes): Move |kExtraLanguages| to input_method_util.cc.
+
+// Converts a string sent from IBus IME engines, which is written in English,
+// into Chrome's string ID, then pulls internationalized resource string from
+// the resource bundle and returns it. These functions are not thread-safe.
+// Non-UI threads are not allowed to call them.
+std::wstring GetString(const std::string& english_string);
+std::string GetStringUTF8(const std::string& english_string);
+string16 GetStringUTF16(const std::string& english_string);
+
+// This method is ONLY for unit testing. Returns true if the given string is
+// supported (i.e. the string is associated with a resource ID).
+bool StringIsSupported(const std::string& english_string);
+
+// Normalizes the language code and returns the normalized version.  The
+// function normalizes the given language code to be compatible with the
+// one used in Chrome's application locales. Otherwise, returns the
+// given language code as-is.
+//
+// Examples:
+//
+// - "zh_CN" => "zh-CN" (Use - instead of _)
+// - "jpn"   => "ja"    (Use two-letter code)
+// - "t"     => "t"     (Return as-is if unknown)
+std::string NormalizeLanguageCode(const std::string& language_code);
+
+// Returns true if the given input method id is for a keyboard layout.
+bool IsKeyboardLayout(const std::string& input_method_id);
+
+// Gets the language code from the given input method descriptor.  This
+// encapsulates differences between the language codes used in
+// InputMethodDescriptor and Chrome's application locale codes.
+std::string GetLanguageCodeFromDescriptor(
+    const InputMethodDescriptor& descriptor);
+
+// Gets the keyboard layout name from the given input method ID.
+// If the ID is invalid, the default layout name will be returned.
+//
+// Examples:
+//
+// "xkb:us::eng"       => "us"
+// "xkb:us:dvorak:eng" => "us(dvorak)"
+std::string GetKeyboardLayoutName(const std::string& input_method_id);
+
+// Rewrites the language name and returns the modified version if
+// necessary. Otherwise, returns the given language name as is.
+// In particular, this rewrites the special language name used for input
+// methods that don't fall under any other languages.
+std::wstring MaybeRewriteLanguageName(const std::wstring& language_name);
+
+// Converts a language code to a language display name, using the
+// current application locale. MaybeRewriteLanguageName() is called
+// internally.
+// Examples: "fr"    => "French"
+//           "en-US" => "English (United States)"
+std::wstring GetLanguageDisplayNameFromCode(const std::string& language_code);
+
+// Sorts the given language codes by their corresponding language names,
+// using the unicode string comparator. Uses unstable sorting.
+void SortLanguageCodesByNames(std::vector<std::string>* language_codes);
+
+// Sorts the given input method ids by their corresponding language names,
+// using the unicode string comparator. Uses stable sorting.
+void SortInputMethodIdsByNames(
+    const std::map<std::string, std::string>& id_to_language_code_map,
+    std::vector<std::string>* input_method_ids);
+
+// Reorders the given input method ids for the language code. For
+// example, if |language_codes| is "fr" and |input_method_ids| contains
+// ["xkb:be::fra", and "xkb:fr::fra"], the list is reordered to
+// ["xkb:fr::fra", and "xkb:be::fra"], so that French keyboard layout
+// comes before Belgian keyboard layout.
+void ReorderInputMethodIdsForLanguageCode(
+    const std::string& language_code,
+    std::vector<std::string>* input_method_ids);
+
+// Gets input method ids that belong to |language_code|.
+// If |keyboard_layout_only| is true, the function does not return input methods
+// that are not for keybord layout switching. Returns true on success. Note that
+// the function might return false if ibus-daemon is not running, or
+// |language_code| is unknown.
+bool GetInputMethodIdsByLanguageCode(
+    const std::string& language_code,
+    bool keyboard_layout_only,
+    std::vector<std::string>* out_input_method_ids);
+
+}  // namespace input_method
+}  // namespace chromeos
+
+#endif  // CHROME_BROWSER_CHROMEOS_INPUT_METHOD_INPUT_METHOD_UTIL_H_
diff --git a/chrome/browser/chromeos/input_method/input_method_util_unittest.cc b/chrome/browser/chromeos/input_method/input_method_util_unittest.cc
new file mode 100644
index 0000000..3c59b6a
--- /dev/null
+++ b/chrome/browser/chromeos/input_method/input_method_util_unittest.cc
@@ -0,0 +1,204 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/chromeos/input_method/input_method_util.h"
+
+#include <string>
+
+#include "app/l10n_util.h"
+#include "base/utf_string_conversions.h"
+#include "grit/generated_resources.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace chromeos {
+namespace input_method {
+
+TEST(InputMethodUtilTest, FindLocalizedStringTest) {
+  EXPECT_TRUE(StringIsSupported("Hiragana"));
+  EXPECT_TRUE(StringIsSupported("Latin"));
+  EXPECT_TRUE(StringIsSupported("Direct input"));
+  EXPECT_FALSE(StringIsSupported("####THIS_STRING_IS_NOT_SUPPORTED####"));
+}
+
+TEST(InputMethodUtilTest, NormalizeLanguageCode) {
+  // TODO(yusukes): test all language codes that IBus provides.
+  EXPECT_EQ("ja", NormalizeLanguageCode("ja"));
+  EXPECT_EQ("ja", NormalizeLanguageCode("jpn"));
+  EXPECT_EQ("t", NormalizeLanguageCode("t"));
+  EXPECT_EQ("zh-CN", NormalizeLanguageCode("zh-CN"));
+  EXPECT_EQ("zh-CN", NormalizeLanguageCode("zh_CN"));
+  EXPECT_EQ("en-US", NormalizeLanguageCode("EN_us"));
+  // See app/l10n_util.cc for es-419.
+  EXPECT_EQ("es-419", NormalizeLanguageCode("es_419"));
+
+  // Special three-letter language codes.
+  EXPECT_EQ("cs", NormalizeLanguageCode("cze"));
+  EXPECT_EQ("de", NormalizeLanguageCode("ger"));
+  EXPECT_EQ("el", NormalizeLanguageCode("gre"));
+  EXPECT_EQ("hr", NormalizeLanguageCode("scr"));
+  EXPECT_EQ("ro", NormalizeLanguageCode("rum"));
+  EXPECT_EQ("sk", NormalizeLanguageCode("slo"));
+}
+
+TEST(InputMethodUtilTest, IsKeyboardLayout) {
+  EXPECT_TRUE(IsKeyboardLayout("xkb:us::eng"));
+  EXPECT_FALSE(IsKeyboardLayout("anthy"));
+}
+
+TEST(InputMethodUtilTest, GetLanguageCodeFromDescriptor) {
+  EXPECT_EQ("ja", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("anthy", "Anthy", "us", "ja")));
+  EXPECT_EQ("zh-TW", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("chewing", "Chewing", "us", "zh")));
+  EXPECT_EQ("zh-TW", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("bopomofo", "Bopomofo(Zhuyin)", "us", "zh")));
+  EXPECT_EQ("zh-TW", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("m17n:zh:cangjie", "Cangjie", "us", "zh")));
+  EXPECT_EQ("zh-TW", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("m17n:zh:quick", "Quick", "us", "zh")));
+  EXPECT_EQ("zh-CN", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("pinyin", "Pinyin", "us", "zh")));
+  EXPECT_EQ("en-US", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("xkb:us::eng", "USA", "us", "eng")));
+  EXPECT_EQ("en-UK", GetLanguageCodeFromDescriptor(
+      InputMethodDescriptor("xkb:uk::eng", "United Kingdom", "us", "eng")));
+}
+
+TEST(LanguageConfigModelTest, MaybeRewriteLanguageName) {
+  EXPECT_EQ(L"English", MaybeRewriteLanguageName(L"English"));
+  EXPECT_EQ(l10n_util::GetString(IDS_OPTIONS_SETTINGS_LANGUAGES_OTHERS),
+            MaybeRewriteLanguageName(L"t"));
+}
+
+TEST(LanguageConfigModelTest, GetLanguageDisplayNameFromCode) {
+  EXPECT_EQ(L"French", GetLanguageDisplayNameFromCode("fr"));
+  // MaybeRewriteLanguageName() should be applied.
+  EXPECT_EQ(l10n_util::GetString(IDS_OPTIONS_SETTINGS_LANGUAGES_OTHERS),
+            GetLanguageDisplayNameFromCode("t"));
+}
+
+TEST(LanguageConfigModelTest, SortLanguageCodesByNames) {
+  std::vector<std::string> language_codes;
+  // Check if this function can handle an empty list.
+  SortLanguageCodesByNames(&language_codes);
+
+  language_codes.push_back("ja");
+  language_codes.push_back("fr");
+  language_codes.push_back("t");
+  SortLanguageCodesByNames(&language_codes);
+  ASSERT_EQ(3U, language_codes.size());
+  ASSERT_EQ("fr", language_codes[0]);  // French
+  ASSERT_EQ("ja", language_codes[1]);  // Japanese
+  ASSERT_EQ("t",  language_codes[2]);  // Others
+
+  // Add a duplicate entry and see if it works.
+  language_codes.push_back("ja");
+  SortLanguageCodesByNames(&language_codes);
+  ASSERT_EQ(4U, language_codes.size());
+  ASSERT_EQ("fr", language_codes[0]);  // French
+  ASSERT_EQ("ja", language_codes[1]);  // Japanese
+  ASSERT_EQ("ja", language_codes[2]);  // Japanese
+  ASSERT_EQ("t",  language_codes[3]);  // Others
+}
+
+TEST(LanguageConfigModelTest, SortInputMethodIdsByNames) {
+  std::map<std::string, std::string> id_to_language_code_map;
+  id_to_language_code_map.insert(std::make_pair("mozc", "ja"));
+  id_to_language_code_map.insert(std::make_pair("mozc-jp", "ja"));
+  id_to_language_code_map.insert(std::make_pair("xkb:jp::jpn", "ja"));
+  id_to_language_code_map.insert(std::make_pair("xkb:fr::fra", "fr"));
+  id_to_language_code_map.insert(std::make_pair("m17n:latn-pre", "t"));
+
+  std::vector<std::string> input_method_ids;
+  // Check if this function can handle an empty list.
+  SortInputMethodIdsByNames(id_to_language_code_map,
+                            &input_method_ids);
+
+  input_method_ids.push_back("mozc");           // Japanese
+  input_method_ids.push_back("xkb:fr::fra");    // French
+  input_method_ids.push_back("m17n:latn-pre");  // Others
+  SortInputMethodIdsByNames(id_to_language_code_map,
+                            &input_method_ids);
+  ASSERT_EQ(3U, input_method_ids.size());
+  ASSERT_EQ("xkb:fr::fra", input_method_ids[0]);     // French
+  ASSERT_EQ("mozc", input_method_ids[1]);            // Japanese
+  ASSERT_EQ("m17n:latn-pre",  input_method_ids[2]);  // Others
+
+  // Add a duplicate entry and see if it works.
+  // Note that SortInputMethodIdsByNames uses std::stable_sort.
+  input_method_ids.push_back("xkb:jp::jpn");  // also Japanese
+  SortInputMethodIdsByNames(id_to_language_code_map,
+                            &input_method_ids);
+  ASSERT_EQ(4U, input_method_ids.size());
+  ASSERT_EQ("xkb:fr::fra", input_method_ids[0]);     // French
+  ASSERT_EQ("mozc", input_method_ids[1]);            // Japanese
+  ASSERT_EQ("xkb:jp::jpn", input_method_ids[2]);     // Japanese
+  ASSERT_EQ("m17n:latn-pre",  input_method_ids[3]);  // Others
+
+  input_method_ids.push_back("mozc-jp");  // also Japanese
+  SortInputMethodIdsByNames(id_to_language_code_map,
+                            &input_method_ids);
+  ASSERT_EQ(5U, input_method_ids.size());
+  ASSERT_EQ("xkb:fr::fra", input_method_ids[0]);     // French
+  ASSERT_EQ("mozc", input_method_ids[1]);            // Japanese
+  ASSERT_EQ("xkb:jp::jpn", input_method_ids[2]);     // Japanese
+  ASSERT_EQ("mozc-jp", input_method_ids[3]);         // Japanese
+  ASSERT_EQ("m17n:latn-pre",  input_method_ids[4]);  // Others
+}
+
+TEST(LanguageConfigModelTest, ReorderInputMethodIdsForLanguageCode_DE) {
+  std::vector<std::string> input_method_ids;
+  input_method_ids.push_back("xkb:ch::ger");  // Switzerland - German
+  input_method_ids.push_back("xkb:de::ger");  // Germany - German
+  ReorderInputMethodIdsForLanguageCode("de", &input_method_ids);
+  // The list should be reordered.
+  ASSERT_EQ(2U, input_method_ids.size());
+  EXPECT_EQ("xkb:de::ger", input_method_ids[0]);
+  EXPECT_EQ("xkb:ch::ger", input_method_ids[1]);
+}
+
+TEST(LanguageConfigModelTest, ReorderInputMethodIdsForLanguageCode_FR) {
+  std::vector<std::string> input_method_ids;
+  input_method_ids.push_back("xkb:be::fra");  // Belgium - French
+  input_method_ids.push_back("xkb:fr::fra");  // France - French
+  ReorderInputMethodIdsForLanguageCode("fr", &input_method_ids);
+  // The list should be reordered.
+  ASSERT_EQ(2U, input_method_ids.size());
+  EXPECT_EQ("xkb:fr::fra", input_method_ids[0]);
+  EXPECT_EQ("xkb:be::fra", input_method_ids[1]);
+}
+
+TEST(LanguageConfigModelTest, ReorderInputMethodIdsForLanguageCode_EN_US) {
+  std::vector<std::string> input_method_ids;
+  input_method_ids.push_back("xkb:us:dvorak:eng");  // US - Dvorak - English
+  input_method_ids.push_back("xkb:us::eng");  // US - English
+  ReorderInputMethodIdsForLanguageCode("en-US", &input_method_ids);
+  // The list should be reordered.
+  ASSERT_EQ(2U, input_method_ids.size());
+  EXPECT_EQ("xkb:us::eng", input_method_ids[0]);
+  EXPECT_EQ("xkb:us:dvorak:eng", input_method_ids[1]);
+}
+
+TEST(LanguageConfigModelTest, ReorderInputMethodIdsForLanguageCode_FI) {
+  std::vector<std::string> input_method_ids;
+  input_method_ids.push_back("xkb:fi::fin");  // Finland - Finnish
+  ReorderInputMethodIdsForLanguageCode("fi", &input_method_ids);
+  // There is no rule for reordering for Finnish.
+  ASSERT_EQ(1U, input_method_ids.size());
+  EXPECT_EQ("xkb:fi::fin", input_method_ids[0]);
+}
+
+TEST(LanguageConfigModelTest, ReorderInputMethodIdsForLanguageCode_Noop) {
+  std::vector<std::string> input_method_ids;
+  input_method_ids.push_back("xkb:fr::fra");  // France - French
+  input_method_ids.push_back("xkb:be::fra");  // Belgium - French
+  // If the list is already sorted, nothing should happen.
+  ReorderInputMethodIdsForLanguageCode("fr", &input_method_ids);
+  ASSERT_EQ(2U, input_method_ids.size());
+  EXPECT_EQ("xkb:fr::fra", input_method_ids[0]);
+  EXPECT_EQ("xkb:be::fra", input_method_ids[1]);
+}
+
+}  // namespace input_method
+}  // namespace chromeos