diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
commit | 09911bf300f1a419907a9412154760efd0b7abc3 (patch) | |
tree | f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/character_encoding.cc | |
parent | 586acc5fe142f498261f52c66862fa417c3d52d2 (diff) | |
download | chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2 |
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/character_encoding.cc')
-rw-r--r-- | chrome/browser/character_encoding.cc | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/chrome/browser/character_encoding.cc b/chrome/browser/character_encoding.cc new file mode 100644 index 0000000..9e4404c --- /dev/null +++ b/chrome/browser/character_encoding.cc @@ -0,0 +1,460 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/character_encoding.h" + +#include <map> +#include <set> + +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "chrome/app/chrome_dll_resource.h" +#include "chrome/common/l10n_util.h" +#include "unicode/ucnv.h" + +#include "generated_resources.h" + +namespace { + +// The maximum length of short list of recently user selected encodings is 3. +const int kUserSelectedEncodingsMaxLength = 3; + +typedef struct { + int resource_id; + const wchar_t* name; + int category_string_id; +} CanonicalEncodingData; + +// An array of all supported canonical encoding names. +static CanonicalEncodingData canonical_encoding_names[] = { + { IDC_ENCODING_UTF8, L"UTF-8", IDS_ENCODING_UNICODE }, + { IDC_ENCODING_UTF16LE, L"UTF-16LE", IDS_ENCODING_UNICODE }, + { IDC_ENCODING_ISO88591, L"ISO-8859-1", IDS_ENCODING_WESTERN }, + { IDC_ENCODING_WINDOWS1252, L"windows-1252", IDS_ENCODING_WESTERN }, + { IDC_ENCODING_GB2312, L"GB2312", IDS_ENCODING_SIMP_CHINESE }, + { IDC_ENCODING_GB18030, L"gb18030", IDS_ENCODING_SIMP_CHINESE }, + { IDC_ENCODING_BIG5, L"Big5", IDS_ENCODING_TRAD_CHINESE }, + { IDC_ENCODING_BIG5HKSCS, L"Big5-HKSCS", IDS_ENCODING_TRAD_CHINESE }, + { IDC_ENCODING_KOREAN, L"EUC-KR", IDS_ENCODING_KOREAN }, + { IDC_ENCODING_SHIFTJIS, L"Shift_JIS", IDS_ENCODING_JAPANESE }, + { IDC_ENCODING_ISO2022JP, L"ISO-2022-JP", IDS_ENCODING_JAPANESE }, + { IDC_ENCODING_EUCJP, L"EUC-JP", IDS_ENCODING_JAPANESE }, + { IDC_ENCODING_THAI, L"TIS-620", IDS_ENCODING_THAI }, + { IDC_ENCODING_ISO885915, L"ISO-8859-15", IDS_ENCODING_WESTERN }, + { IDC_ENCODING_MACINTOSH, L"macintosh", IDS_ENCODING_WESTERN }, + { IDC_ENCODING_ISO88592, L"ISO-8859-2", IDS_ENCODING_CENTRAL_EUROPEAN }, + { IDC_ENCODING_WINDOWS1250, L"windows-1250", IDS_ENCODING_CENTRAL_EUROPEAN }, + { IDC_ENCODING_ISO88595, L"ISO-8859-5", IDS_ENCODING_CYRILLIC }, + { IDC_ENCODING_WINDOWS1251, L"windows-1251", IDS_ENCODING_CYRILLIC }, + { IDC_ENCODING_KOI8R, L"KOI8-R", IDS_ENCODING_CYRILLIC }, + { IDC_ENCODING_KOI8U, L"KOI8-U", IDS_ENCODING_CYRILLIC }, + { IDC_ENCODING_ISO88597, L"ISO-8859-7", IDS_ENCODING_GREEK }, + { IDC_ENCODING_WINDOWS1253, L"windows-1253", IDS_ENCODING_GREEK }, + { IDC_ENCODING_ISO88599, L"ISO-8859-9", IDS_ENCODING_TURKISH }, + { IDC_ENCODING_WINDOWS1254, L"windows-1254", IDS_ENCODING_TURKISH }, + { IDC_ENCODING_ISO88596, L"ISO-8859-6", IDS_ENCODING_ARABIC }, + { IDC_ENCODING_WINDOWS1256, L"windows-1256", IDS_ENCODING_ARABIC }, + { IDC_ENCODING_ISO88598, L"ISO-8859-8", IDS_ENCODING_HEBREW }, + { IDC_ENCODING_WINDOWS1255, L"windows-1255", IDS_ENCODING_HEBREW }, + { IDC_ENCODING_WINDOWS1258, L"windows-1258", IDS_ENCODING_VIETNAMESE }, + { IDC_ENCODING_ISO88594, L"ISO-8859-4", IDS_ENCODING_BALTIC }, + { IDC_ENCODING_ISO885913, L"ISO-8859-13", IDS_ENCODING_BALTIC }, + { IDC_ENCODING_WINDOWS1257, L"windows-1257", IDS_ENCODING_BALTIC }, + { IDC_ENCODING_ISO88593, L"ISO-8859-3", IDS_ENCODING_SOUTH_EUROPEAN }, + { IDC_ENCODING_ISO885910, L"ISO-8859-10", IDS_ENCODING_NORDIC }, + { IDC_ENCODING_ISO885914, L"ISO-8859-14", IDS_ENCODING_CELTIC }, + { IDC_ENCODING_ISO885916, L"ISO-8859-16", IDS_ENCODING_ROMANIAN }, +}; + +static const int canonical_encoding_names_length = + arraysize(canonical_encoding_names); + +typedef std::map<int, std::pair<const wchar_t*, int> > IdToCanonicalEncodingNameMapType; +typedef std::map<const std::wstring, int> CanonicalEncodingNameToIdMapType; + +class CanonicalEncodingMap { + public: + CanonicalEncodingMap() + : id_to_encoding_name_map_(NULL), + encoding_name_to_id_map_(NULL) { } + const IdToCanonicalEncodingNameMapType* GetIdToCanonicalEncodingNameMapData(); + const CanonicalEncodingNameToIdMapType* GetCanonicalEncodingNameToIdMapData(); + std::vector<int>* const locale_dependent_encoding_ids() { + return &locale_dependent_encoding_ids_; + } + + std::vector<int>* const current_display_encoding_ids() { + return ¤t_display_encoding_ids_; + } + + private: + scoped_ptr<IdToCanonicalEncodingNameMapType> id_to_encoding_name_map_; + scoped_ptr<CanonicalEncodingNameToIdMapType> encoding_name_to_id_map_; + std::vector<int> locale_dependent_encoding_ids_; + std::vector<int> current_display_encoding_ids_; + + DISALLOW_EVIL_CONSTRUCTORS(CanonicalEncodingMap); +}; + +const IdToCanonicalEncodingNameMapType* CanonicalEncodingMap::GetIdToCanonicalEncodingNameMapData() { + // Testing and building map is not thread safe, this function is supposed to + // only run in UI thread. Myabe I should add a lock in here for making it as + // thread safe. + if (!id_to_encoding_name_map_.get()) { + id_to_encoding_name_map_.reset(new IdToCanonicalEncodingNameMapType); + for (int i = 0; i < canonical_encoding_names_length; ++i) { + int resource_id = canonical_encoding_names[i].resource_id; + (*id_to_encoding_name_map_)[resource_id] = + std::make_pair(canonical_encoding_names[i].name, + canonical_encoding_names[i].category_string_id); + } + } + return id_to_encoding_name_map_.get(); +} + +const CanonicalEncodingNameToIdMapType* CanonicalEncodingMap::GetCanonicalEncodingNameToIdMapData() { + if (!encoding_name_to_id_map_.get()) { + encoding_name_to_id_map_.reset(new CanonicalEncodingNameToIdMapType); + for (int i = 0; i < canonical_encoding_names_length; ++i) { + (*encoding_name_to_id_map_)[canonical_encoding_names[i].name] = + canonical_encoding_names[i].resource_id; + } + } + return encoding_name_to_id_map_.get(); +} + +// A static map object which contains all resourceid-nonsequenced canonical +// encoding names. +static CanonicalEncodingMap canonical_encoding_name_map_singleton; + +// Static. +// Get encoding command id according to input encoding name. If the name is +// valid, return corresponding encoding command id. Otherwise return 0; +static int GetCommandIdByCanonicalEncodingName( + const std::wstring& encoding_name) { + const CanonicalEncodingNameToIdMapType* map = + canonical_encoding_name_map_singleton. + GetCanonicalEncodingNameToIdMapData(); + DCHECK(map); + + CanonicalEncodingNameToIdMapType::const_iterator found_id = + map->find(encoding_name); + if (found_id != map->end()) + return found_id->second; + return 0; +} + +const int default_encoding_menus[] = { + IDC_ENCODING_UTF16LE, + 0, + IDC_ENCODING_ISO88591, + IDC_ENCODING_WINDOWS1252, + 0, + IDC_ENCODING_GB2312, + IDC_ENCODING_GB18030, + IDC_ENCODING_BIG5, + IDC_ENCODING_BIG5HKSCS, + 0, + IDC_ENCODING_KOREAN, + 0, + IDC_ENCODING_SHIFTJIS, + IDC_ENCODING_ISO2022JP, + IDC_ENCODING_EUCJP, + 0, + IDC_ENCODING_THAI, + 0, + IDC_ENCODING_ISO885915, + IDC_ENCODING_MACINTOSH, + IDC_ENCODING_ISO88592, + IDC_ENCODING_WINDOWS1250, + 0, + IDC_ENCODING_ISO88595, + IDC_ENCODING_WINDOWS1251, + IDC_ENCODING_KOI8R, + IDC_ENCODING_KOI8U, + 0, + IDC_ENCODING_ISO88597, + IDC_ENCODING_WINDOWS1253, + IDC_ENCODING_ISO88599, + IDC_ENCODING_WINDOWS1254, + IDC_ENCODING_ISO88596, + IDC_ENCODING_WINDOWS1256, + IDC_ENCODING_ISO88598, + IDC_ENCODING_WINDOWS1255, + IDC_ENCODING_WINDOWS1258, + + IDC_ENCODING_ISO88594, + IDC_ENCODING_ISO885913, + IDC_ENCODING_WINDOWS1257, + IDC_ENCODING_ISO88593, + IDC_ENCODING_ISO885910, + IDC_ENCODING_ISO885914, + IDC_ENCODING_ISO885916, +}; + +const int default_encoding_menus_length = arraysize(default_encoding_menus); + +// Parse the input |encoding_list| which is a encoding list separated with +// comma, get available encoding ids and save them to |available_list|. +// The parameter |maximum_size| indicates maximum size of encoding items we +// want to get from the |encoding_list|. +static void ParseEncodingListSeparatedWithComma( + const std::wstring& encoding_list, std::vector<int>* const available_list, + size_t maximum_size) { + WStringTokenizer tokenizer(encoding_list, L","); + while (tokenizer.GetNext()) { + int id = GetCommandIdByCanonicalEncodingName(tokenizer.token()); + // Ignore invalid encoding. + if (!id) + continue; + available_list->push_back(id); + if (available_list->size() == maximum_size) + return; + } +} + +std::wstring GetEncodingDisplayName(std::wstring encoding_name, + int category_string_id) { + std::wstring category_name = l10n_util::GetString(category_string_id); + if (category_string_id != IDS_ENCODING_KOREAN && + category_string_id != IDS_ENCODING_THAI) { + return l10n_util::GetStringF(IDS_ENCODING_DISPLAY_TEMPLATE, + category_name, + encoding_name); + } + return category_name; +} + +} // namespace + +// Static. +std::wstring CharacterEncoding::GetCanonicalEncodingNameByCommandId(int id) { + const IdToCanonicalEncodingNameMapType* map = + canonical_encoding_name_map_singleton. + GetIdToCanonicalEncodingNameMapData(); + DCHECK(map); + + IdToCanonicalEncodingNameMapType::const_iterator found_name = map->find(id); + if (found_name != map->end()) + return found_name->second.first; + return std::wstring(); +} + +// Static. +std::wstring CharacterEncoding::GetCanonicalEncodingDisplayNameByCommandId( + int id) { + const IdToCanonicalEncodingNameMapType* map = + canonical_encoding_name_map_singleton. + GetIdToCanonicalEncodingNameMapData(); + DCHECK(map); + + IdToCanonicalEncodingNameMapType::const_iterator found_name = map->find(id); + if (found_name != map->end()) + return GetEncodingDisplayName(found_name->second.first, + found_name->second.second); + return std::wstring(); +} + +// Static. +// Return count number of all supported canonical encoding. +int CharacterEncoding::GetSupportCanonicalEncodingCount() { + return canonical_encoding_names_length; +} + +// Static. +std::wstring CharacterEncoding::GetCanonicalEncodingNameByIndex(int index) { + if (index < canonical_encoding_names_length) + return canonical_encoding_names[index].name; + return std::wstring(); +} + +// Static. +std::wstring CharacterEncoding::GetCanonicalEncodingDisplayNameByIndex( + int index) { + if (index < canonical_encoding_names_length) + return GetEncodingDisplayName(canonical_encoding_names[index].name, + canonical_encoding_names[index].category_string_id); + return std::wstring(); +} + +// Static. +std::wstring CharacterEncoding::GetCanonicalEncodingNameByAliasName( + const std::wstring& alias_name) { + // If the input alias_name is already canonical encoding name, just return it. + const CanonicalEncodingNameToIdMapType* map = + canonical_encoding_name_map_singleton. + GetCanonicalEncodingNameToIdMapData(); + DCHECK(map); + + CanonicalEncodingNameToIdMapType::const_iterator found_id = + map->find(alias_name); + if (found_id != map->end()) + return alias_name; + + UErrorCode error_code = U_ZERO_ERROR; + + const char* canonical_name = ucnv_getCanonicalName( + WideToASCII(alias_name).c_str(), "MIME", &error_code); + // If failed, then try IANA next. + if (U_FAILURE(error_code) || !canonical_name) { + error_code = U_ZERO_ERROR; + canonical_name = ucnv_getCanonicalName( + WideToASCII(alias_name).c_str(), "IANA", &error_code); + } + + if (canonical_name) + return ASCIIToWide(canonical_name); + else + return std::wstring(); +} + +// Static +// According to the behavior of user recently selected encoding short list in +// FireFox, we always put UTF-8 as toppest position, after then put user +// recently selected encodings, then put local dependent encoding items. +// At last, we put all rest encoding items. +const std::vector<int>* CharacterEncoding::GetCurrentDisplayEncodings( + const std::wstring& locale_encodings, + const std::wstring& recently_select_encodings) { + std::vector<int>* const locale_dependent_encoding_list = + canonical_encoding_name_map_singleton.locale_dependent_encoding_ids(); + std::vector<int>* const encoding_list = + canonical_encoding_name_map_singleton.current_display_encoding_ids(); + + // Initialize locale dependent static encoding list. + if (locale_dependent_encoding_list->empty() && !locale_encodings.empty()) + ParseEncodingListSeparatedWithComma(locale_encodings, + locale_dependent_encoding_list, + kUserSelectedEncodingsMaxLength); + + static std::wstring cached_user_selected_encodings; + // Build current display encoding list. + if (encoding_list->empty() || + cached_user_selected_encodings != recently_select_encodings) { + // Update user recently selected encodings. + cached_user_selected_encodings = recently_select_encodings; + // Clear old encoding list since user recently selected encodings changed. + encoding_list->clear(); + // Always add UTF-8 to first encoding position. + encoding_list->push_back(IDC_ENCODING_UTF8); + std::set<int> inserted_encoding; + inserted_encoding.insert(IDC_ENCODING_UTF8); + + // Parse user recently selected encodings and get list + std::vector<int> recently_select_encoding_list; + ParseEncodingListSeparatedWithComma(recently_select_encodings, + &recently_select_encoding_list, + kUserSelectedEncodingsMaxLength); + + // Put 'cached encodings' (dynamic encoding list) after 'local dependent + // encoding list'. + recently_select_encoding_list.insert(recently_select_encoding_list.begin(), + locale_dependent_encoding_list->begin(), + locale_dependent_encoding_list->end()); + std::vector<int>::const_iterator it; + for (it = recently_select_encoding_list.begin(); + it != recently_select_encoding_list.end(); ++it) { + // Test whether we have met this encoding id. + bool ok = inserted_encoding.insert(*it).second; + // Duplicated encoding, ignore it. Ideally, this situation should not + // happened, but just in case some one manually edit preference file. + if (!ok) + continue; + encoding_list->push_back(*it); + } + // Append a separator; + encoding_list->push_back(0); + + // Add those encodings which are in default_encoding_menus and does not + // override with locale-dependent encodings list. + bool previous_is_separator = true; + for (int i = 0; i < default_encoding_menus_length; ++i) { + int id = default_encoding_menus[i]; + if (id) { + // We have inserted this encoding, skip it. + if (inserted_encoding.find(id) != inserted_encoding.end()) + continue; + encoding_list->push_back(id); + previous_is_separator = false; + } else if (!previous_is_separator) { + encoding_list->push_back(0); + previous_is_separator = true; + } + } + } + DCHECK(!encoding_list->empty()); + return encoding_list; +} + +// Static +bool CharacterEncoding::UpdateRecentlySelectdEncoding( + const std::wstring& original_selected_encodings, + int new_selected_encoding_id, + std::wstring* selected_encodings) { + // Get encoding name. + std::wstring encoding_name = + GetCanonicalEncodingNameByCommandId(new_selected_encoding_id); + DCHECK(!encoding_name.empty()); + // Check whether the new encoding is in local dependent encodings or original + // recently selected encodings. If yes, do not add it. + std::vector<int>* locale_dependent_encoding_list = + canonical_encoding_name_map_singleton.locale_dependent_encoding_ids(); + DCHECK(locale_dependent_encoding_list); + std::vector<int> selected_encoding_list; + ParseEncodingListSeparatedWithComma(original_selected_encodings, + &selected_encoding_list, + kUserSelectedEncodingsMaxLength); + // Put 'cached encodings' (dynamic encoding list) after 'local dependent + // encoding list' for check. + std::vector<int> top_encoding_list(*locale_dependent_encoding_list); + // UTF8 is always in our optimized encoding list. + top_encoding_list.insert(top_encoding_list.begin(), IDC_ENCODING_UTF8); + top_encoding_list.insert(top_encoding_list.end(), + selected_encoding_list.begin(), + selected_encoding_list.end()); + for (std::vector<int>::const_iterator it = top_encoding_list.begin(); + it != top_encoding_list.end(); ++it) + if (*it == new_selected_encoding_id) + return false; + // Need to add the encoding id to recently selected encoding list. + // Remove the last encoding in original list. + if (selected_encoding_list.size() == kUserSelectedEncodingsMaxLength) + selected_encoding_list.pop_back(); + // Insert new encoding to head of selected encoding list. + *selected_encodings = encoding_name; + // Generate the string for rest selected encoding list. + for (std::vector<int>::const_iterator it = selected_encoding_list.begin(); + it != selected_encoding_list.end(); ++it) { + selected_encodings->append(1, L','); + selected_encodings->append(GetCanonicalEncodingNameByCommandId(*it)); + } + return true; +} |