summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-10 20:40:50 +0000
committerbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-10 20:40:50 +0000
commitd6e58c6ef48f52cce6cdf781c5a507e254322425 (patch)
tree274e82d3fa22052761a146eaed25d862744633af
parent3522bda4050a25b9020d9c224e9dfc89439e2f5e (diff)
downloadchromium_src-d6e58c6ef48f52cce6cdf781c5a507e254322425.zip
chromium_src-d6e58c6ef48f52cce6cdf781c5a507e254322425.tar.gz
chromium_src-d6e58c6ef48f52cce6cdf781c5a507e254322425.tar.bz2
Remove the dependency on i18n/icu_string_conversions from base/string_util.h.
Fix up all files requireing this header to include it directly. Split out the ICU-dependent string util unit tests into a new file base/i18n/icu_string_util_unittest.cc TEST=none BUG=none Review URL: http://codereview.chromium.org/269034 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28674 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--base/DEPS3
-rw-r--r--base/base.gyp1
-rw-r--r--base/i18n/icu_string_conversions.cc32
-rw-r--r--base/i18n/icu_string_conversions.h22
-rw-r--r--base/i18n/icu_string_conversions_unittest.cc328
-rw-r--r--base/string_util.cc1
-rw-r--r--base/string_util.h4
-rw-r--r--base/string_util_unittest.cc262
-rw-r--r--chrome/browser/autocomplete/search_provider.cc6
-rw-r--r--chrome/browser/importer/firefox2_importer.cc21
-rw-r--r--chrome/browser/importer/mork_reader.cc9
-rw-r--r--chrome/browser/search_engines/template_url.cc14
-rw-r--r--chrome/tools/convert_dict/aff_reader.cc5
-rw-r--r--net/base/escape.cc9
-rw-r--r--net/base/escape_unittest.cc5
-rw-r--r--net/base/net_util.cc11
-rw-r--r--net/proxy/proxy_script_fetcher.cc10
-rw-r--r--webkit/appcache/manifest_parser.cc7
-rw-r--r--webkit/glue/ftp_directory_listing_response_delegate.cc12
19 files changed, 435 insertions, 327 deletions
diff --git a/base/DEPS b/base/DEPS
index 3fdcb7b..9ef2764 100644
--- a/base/DEPS
+++ b/base/DEPS
@@ -4,4 +4,7 @@ include_rules = [
# Testing stuff shouldn't be used by the general base code.
"-base/test",
+
+ # ICU dependendencies must be separate from the rest of base.
+ "-i18n",
]
diff --git a/base/base.gyp b/base/base.gyp
index 61f5e3c..949f34b 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -587,6 +587,7 @@
'hmac_unittest.cc',
'id_map_unittest.cc',
'i18n/file_util_icu_unittest.cc',
+ 'i18n/icu_string_conversions_unittest.cc',
'i18n/word_iterator_unittest.cc',
'json_reader_unittest.cc',
'json_writer_unittest.cc',
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc
index 225fe0b..ba9f9ae 100644
--- a/base/i18n/icu_string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -14,6 +14,8 @@
#include "unicode/ucnv_err.h"
#include "unicode/ustring.h"
+namespace base {
+
namespace {
inline bool IsValidCodepoint(uint32 code_point) {
@@ -83,7 +85,7 @@ void ToUnicodeCallbackSubstitute(const void* context,
}
bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src,
- int uchar_len, OnStringUtilConversionError::Type on_error,
+ int uchar_len, OnStringConversionError::Type on_error,
std::string* encoded) {
int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(uchar_len,
ucnv_getMaxCharSize(converter));
@@ -93,12 +95,12 @@ bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src,
// Setup our error handler.
switch (on_error) {
- case OnStringUtilConversionError::FAIL:
+ case OnStringConversionError::FAIL:
ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
NULL, NULL, &status);
break;
- case OnStringUtilConversionError::SKIP:
- case OnStringUtilConversionError::SUBSTITUTE:
+ case OnStringConversionError::SKIP:
+ case OnStringConversionError::SUBSTITUTE:
ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
NULL, NULL, &status);
break;
@@ -118,18 +120,18 @@ bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src,
}
// Set up our error handler for ToUTF-16 converters
-void SetUpErrorHandlerForToUChars(OnStringUtilConversionError::Type on_error,
+void SetUpErrorHandlerForToUChars(OnStringConversionError::Type on_error,
UConverter* converter, UErrorCode* status) {
switch (on_error) {
- case OnStringUtilConversionError::FAIL:
+ case OnStringConversionError::FAIL:
ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, 0,
NULL, NULL, status);
break;
- case OnStringUtilConversionError::SKIP:
+ case OnStringConversionError::SKIP:
ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, 0,
NULL, NULL, status);
break;
- case OnStringUtilConversionError::SUBSTITUTE:
+ case OnStringConversionError::SUBSTITUTE:
ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, 0,
NULL, NULL, status);
break;
@@ -148,13 +150,18 @@ inline UConverterType utf32_platform_endian() {
} // namespace
+const char kCodepageLatin1[] = "ISO-8859-1";
+const char kCodepageUTF8[] = "UTF-8";
+const char kCodepageUTF16BE[] = "UTF-16BE";
+const char kCodepageUTF16LE[] = "UTF-16LE";
+
// Codepage <-> Wide/UTF-16 ---------------------------------------------------
// Convert a wstring into the specified codepage_name. If the codepage
// isn't found, return false.
bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::string* encoded) {
#if defined(WCHAR_T_IS_UTF16)
return UTF16ToCodepage(wide, codepage_name, on_error, encoded);
@@ -185,7 +192,7 @@ bool WideToCodepage(const std::wstring& wide,
// isn't found, return false.
bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::string* encoded) {
encoded->clear();
@@ -202,7 +209,7 @@ bool UTF16ToCodepage(const string16& utf16,
// If the codepage isn't found, return false.
bool CodepageToWide(const std::string& encoded,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::wstring* wide) {
#if defined(WCHAR_T_IS_UTF16)
return CodepageToUTF16(encoded, codepage_name, on_error, wide);
@@ -250,7 +257,7 @@ bool CodepageToWide(const std::string& encoded,
// If the codepage isn't found, return false.
bool CodepageToUTF16(const std::string& encoded,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
string16* utf16) {
utf16->clear();
@@ -286,3 +293,4 @@ bool CodepageToUTF16(const std::string& encoded,
return true;
}
+} // namespace base
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
index d849c71..e7dac605 100644
--- a/base/i18n/icu_string_conversions.h
+++ b/base/i18n/icu_string_conversions.h
@@ -10,9 +10,11 @@
#include "base/string16.h"
#include "base/string_piece.h"
+namespace base {
+
// Defines the error handling modes of UTF16ToCodepage, CodepageToUTF16,
// WideToCodepage and CodepageToWide.
-class OnStringUtilConversionError {
+class OnStringConversionError {
public:
enum Type {
// The function will return failure. The output buffer will be empty.
@@ -29,20 +31,26 @@ class OnStringUtilConversionError {
};
private:
- OnStringUtilConversionError();
+ OnStringConversionError();
};
+// Names of codepages (charsets) understood by icu.
+extern const char kCodepageLatin1[]; // a.k.a. ISO 8859-1
+extern const char kCodepageUTF8[];
+extern const char kCodepageUTF16BE[];
+extern const char kCodepageUTF16LE[];
+
// Converts between UTF-16 strings and the encoding specified. If the
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
// returns false.
bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::string* encoded);
bool CodepageToUTF16(const std::string& encoded,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
string16* utf16);
// Converts between wide strings and the encoding specified. If the
@@ -50,11 +58,13 @@ bool CodepageToUTF16(const std::string& encoded,
// returns false.
bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::string* encoded);
bool CodepageToWide(const std::string& encoded,
const char* codepage_name,
- OnStringUtilConversionError::Type on_error,
+ OnStringConversionError::Type on_error,
std::wstring* wide);
+} // namespace base
+
#endif // BASE_I18N_ICU_STRING_CONVERSIONS_H_
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc
new file mode 100644
index 0000000..969ddb7
--- /dev/null
+++ b/base/i18n/icu_string_conversions_unittest.cc
@@ -0,0 +1,328 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <math.h>
+#include <stdarg.h>
+
+#include <limits>
+#include <sstream>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/utf_string_conversions.h"
+#include "base/i18n/icu_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace base {
+
+namespace {
+
+// Given a null-terminated string of wchar_t with each wchar_t representing
+// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
+// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
+// should be represented as a surrogate pair (two UTF-16 units)
+// *even* where wchar_t is 32-bit (Linux and Mac).
+//
+// This is to help write tests for functions with string16 params until
+// the C++ 0x UTF-16 literal is well-supported by compilers.
+string16 BuildString16(const wchar_t* s) {
+#if defined(WCHAR_T_IS_UTF16)
+ return string16(s);
+#elif defined(WCHAR_T_IS_UTF32)
+ string16 u16;
+ while (*s != 0) {
+ DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
+ u16.push_back(*s++);
+ }
+ return u16;
+#endif
+}
+
+static const wchar_t* const kConvertRoundtripCases[] = {
+ L"Google Video",
+ // "网页 图片 资讯更多 »"
+ L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
+ // "Παγκόσμιος Ιστός"
+ L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
+ L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
+ // "Поиск страниц на русском"
+ L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
+ L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
+ L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
+ // "전체서비스"
+ L"\xc804\xccb4\xc11c\xbe44\xc2a4",
+
+ // Test characters that take more than 16 bits. This will depend on whether
+ // wchar_t is 16 or 32 bits.
+#if defined(WCHAR_T_IS_UTF16)
+ L"\xd800\xdf00",
+ // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
+ L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
+#elif defined(WCHAR_T_IS_UTF32)
+ L"\x10300",
+ // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
+ L"\x11d40\x11d41\x11d42\x11d43\x11d44",
+#endif
+};
+
+} // namespace
+
+TEST(StringUtilTest, ConvertCodepageUTF8) {
+ // Make sure WideToCodepage works like WideToUTF8.
+ for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
+ std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
+ std::string utf8;
+ EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
+ OnStringConversionError::SKIP, &utf8));
+ EXPECT_EQ(expected, utf8);
+ }
+}
+
+// kConverterCodepageCases is not comprehensive. There are a number of cases
+// to add if we really want to have a comprehensive coverage of various
+// codepages and their 'idiosyncrasies'. Currently, the only implementation
+// for CodepageTo* and *ToCodepage uses ICU, which has a very extensive
+// set of tests for the charset conversion. So, we can get away with a
+// relatively small number of cases listed below.
+//
+// Note about |u16_wide| in the following struct.
+// On Windows, the field is always identical to |wide|. On Mac and Linux,
+// it's identical as long as there's no character outside the
+// BMP (<= U+FFFF). When there is, it is different from |wide| and
+// is not a real wide string (UTF-32 string) in that each wchar_t in
+// the string is a UTF-16 code unit zero-extended to be 32-bit
+// even when the code unit belongs to a surrogate pair.
+// For instance, a Unicode string (U+0041 U+010000) is represented as
+// L"\x0041\xD800\xDC00" instead of L"\x0041\x10000".
+// To avoid the clutter, |u16_wide| will be set to NULL
+// if it's identical to |wide| on *all* platforms.
+
+static const struct {
+ const char* codepage_name;
+ const char* encoded;
+ OnStringConversionError::Type on_error;
+ bool success;
+ const wchar_t* wide;
+ const wchar_t* u16_wide;
+} kConvertCodepageCases[] = {
+ // Test a case where the input cannot be decoded, using SKIP, FAIL
+ // and SUBSTITUTE error handling rules. "A7 41" is valid, but "A6" isn't.
+ {"big5",
+ "\xA7\x41\xA6",
+ OnStringConversionError::FAIL,
+ false,
+ L"",
+ NULL},
+ {"big5",
+ "\xA7\x41\xA6",
+ OnStringConversionError::SKIP,
+ true,
+ L"\x4F60",
+ NULL},
+ {"big5",
+ "\xA7\x41\xA6",
+ OnStringConversionError::SUBSTITUTE,
+ true,
+ L"\x4F60\xFFFD",
+ NULL},
+ // Arabic (ISO-8859)
+ {"iso-8859-6",
+ "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " "
+ "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" "
+ L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652",
+ NULL},
+ // Chinese Simplified (GB2312)
+ {"gb2312",
+ "\xC4\xE3\xBA\xC3",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x4F60\x597D",
+ NULL},
+ // Chinese (GB18030) : 4 byte sequences mapped to BMP characters
+ {"gb18030",
+ "\x81\x30\x84\x36\xA1\xA7",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x00A5\x00A8",
+ NULL},
+ // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000)
+ {"gb18030",
+ "\x95\x32\x82\x36\xD2\xBB",
+ OnStringConversionError::FAIL,
+ true,
+#if defined(WCHAR_T_IS_UTF16)
+ L"\xD840\xDC00\x4E00",
+#else
+ L"\x20000\x4E00",
+#endif
+ L"\xD840\xDC00\x4E00"},
+ {"big5",
+ "\xA7\x41\xA6\x6E",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x4F60\x597D",
+ NULL},
+ // Greek (ISO-8859)
+ {"iso-8859-7",
+ "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5",
+ NULL},
+ // Hebrew (Windows)
+ {"windows-1255",
+ "\xF9\xD1\xC8\xEC\xE5\xC9\xED",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD",
+ NULL},
+ // Hindi Devanagari (ISCII)
+ {"iscii-dev",
+ "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930",
+ NULL},
+ // Korean (EUC)
+ {"euc-kr",
+ "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4",
+ OnStringConversionError::FAIL,
+ true,
+ L"\xC548\xB155\xD558\xC138\xC694",
+ NULL},
+ // Japanese (EUC)
+ {"euc-jp",
+ "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF\xB0\xEC\x8F\xB0\xA1\x8E\xA6",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F\x4E00\x4E02\xFF66",
+ NULL},
+ // Japanese (ISO-2022)
+ {"iso-2022-jp",
+ "\x1B$B" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F\x30\x6C" "\x1B(B"
+ "ab" "\x1B(J" "\x5C\x7E#$" "\x1B(B",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F\x4E00" L"ab\x00A5\x203E#$",
+ NULL},
+ // Japanese (Shift-JIS)
+ {"sjis",
+ "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD\x88\xEA\xA6",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F\x4E00\xFF66",
+ NULL},
+ // Russian (KOI8)
+ {"koi8-r",
+ "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
+ L"\x0443\x0439\x0442\x0435",
+ NULL},
+ // Thai (windows-874)
+ {"windows-874",
+ "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
+ OnStringConversionError::FAIL,
+ true,
+ L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
+ L"\x0E04\x0E23\x0e31\x0E1A",
+ NULL},
+};
+
+TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
+ std::wstring wide;
+ bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
+ kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error,
+ &wide);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
+
+ // When decoding was successful and nothing was skipped, we also check the
+ // reverse conversion. Not all conversions are round-trippable, but
+ // kConverterCodepageCases does not have any one-way conversion at the
+ // moment.
+ if (success &&
+ kConvertCodepageCases[i].on_error ==
+ OnStringConversionError::FAIL) {
+ std::string encoded;
+ success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error, &encoded);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
+ }
+ }
+
+ // The above cases handled codepage->wide errors, but not wide->codepage.
+ // Test that here.
+ std::string encoded("Temp data"); // Make sure the string gets cleared.
+
+ // First test going to an encoding that can not represent that character.
+ EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
+ OnStringConversionError::FAIL, &encoded));
+ EXPECT_TRUE(encoded.empty());
+ EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
+ OnStringConversionError::SKIP, &encoded));
+ EXPECT_STREQ("Chinese", encoded.c_str());
+ // From Unicode, SUBSTITUTE is the same as SKIP for now.
+ EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
+ OnStringConversionError::SUBSTITUTE,
+ &encoded));
+ EXPECT_STREQ("Chinese", encoded.c_str());
+
+#if defined(WCHAR_T_IS_UTF16)
+ // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.
+ EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",
+ OnStringConversionError::FAIL, &encoded));
+ EXPECT_TRUE(encoded.empty());
+ EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",
+ OnStringConversionError::SKIP, &encoded));
+ EXPECT_STREQ("az", encoded.c_str());
+#endif // WCHAR_T_IS_UTF16
+
+ // Invalid characters should fail.
+ EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
+ OnStringConversionError::SKIP, &encoded));
+ EXPECT_STREQ("az", encoded.c_str());
+
+ // Invalid codepages should fail.
+ EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
+ OnStringConversionError::SKIP, &encoded));
+}
+
+TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) {
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
+ string16 utf16;
+ bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,
+ kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error,
+ &utf16);
+ string16 utf16_expected;
+ if (kConvertCodepageCases[i].u16_wide == NULL)
+ utf16_expected = BuildString16(kConvertCodepageCases[i].wide);
+ else
+ utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(utf16_expected, utf16);
+
+ // When decoding was successful and nothing was skipped, we also check the
+ // reverse conversion. See also the corresponding comment in
+ // ConvertBetweenCodepageAndWide.
+ if (success &&
+ kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) {
+ std::string encoded;
+ success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error, &encoded);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
+ }
+ }
+}
+
+} // namespace base
diff --git a/base/string_util.cc b/base/string_util.cc
index c819373..480d199 100644
--- a/base/string_util.cc
+++ b/base/string_util.cc
@@ -383,7 +383,6 @@ const char kWhitespaceASCII[] = {
0x20, // Space
0
};
-const char* const kCodepageUTF8 = "UTF-8";
template<typename STR>
TrimPositions TrimStringT(const STR& input,
diff --git a/base/string_util.h b/base/string_util.h
index 5bacfcd..333f5f6 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -19,7 +19,6 @@
// TODO(brettw) this dependency should be removed and callers that need
// these functions should include this file directly.
#include "base/utf_string_conversions.h"
-#include "base/i18n/icu_string_conversions.h"
// Safe standard library wrappers for all platforms.
@@ -123,9 +122,6 @@ const string16& EmptyString16();
extern const wchar_t kWhitespaceWide[];
extern const char kWhitespaceASCII[];
-// Names of codepages (charsets) understood by icu.
-extern const char* const kCodepageUTF8;
-
// Removes characters in trim_chars from the beginning and end of input.
// NOTE: Safe to use the same variable for both input and output.
bool TrimString(const std::wstring& input,
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index a70b03e..0ccea91 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -12,6 +12,8 @@
#include "base/string_util.h"
#include "testing/gtest/include/gtest/gtest.h"
+namespace base {
+
namespace {
// Given a null-terminated string of wchar_t with each wchar_t representing
@@ -424,264 +426,6 @@ TEST(StringUtilTest, ConvertMultiString) {
EXPECT_EQ(expected, converted);
}
-TEST(StringUtilTest, ConvertCodepageUTF8) {
- // Make sure WideToCodepage works like WideToUTF8.
- for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
- std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
- std::string utf8;
- EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
- OnStringUtilConversionError::SKIP, &utf8));
- EXPECT_EQ(expected, utf8);
- }
-}
-
-// kConverterCodepageCases is not comprehensive. There are a number of cases
-// to add if we really want to have a comprehensive coverage of various
-// codepages and their 'idiosyncrasies'. Currently, the only implementation
-// for CodepageTo* and *ToCodepage uses ICU, which has a very extensive
-// set of tests for the charset conversion. So, we can get away with a
-// relatively small number of cases listed below.
-//
-// Note about |u16_wide| in the following struct.
-// On Windows, the field is always identical to |wide|. On Mac and Linux,
-// it's identical as long as there's no character outside the
-// BMP (<= U+FFFF). When there is, it is different from |wide| and
-// is not a real wide string (UTF-32 string) in that each wchar_t in
-// the string is a UTF-16 code unit zero-extended to be 32-bit
-// even when the code unit belongs to a surrogate pair.
-// For instance, a Unicode string (U+0041 U+010000) is represented as
-// L"\x0041\xD800\xDC00" instead of L"\x0041\x10000".
-// To avoid the clutter, |u16_wide| will be set to NULL
-// if it's identical to |wide| on *all* platforms.
-
-static const struct {
- const char* codepage_name;
- const char* encoded;
- OnStringUtilConversionError::Type on_error;
- bool success;
- const wchar_t* wide;
- const wchar_t* u16_wide;
-} kConvertCodepageCases[] = {
- // Test a case where the input cannot be decoded, using SKIP, FAIL
- // and SUBSTITUTE error handling rules. "A7 41" is valid, but "A6" isn't.
- {"big5",
- "\xA7\x41\xA6",
- OnStringUtilConversionError::FAIL,
- false,
- L"",
- NULL},
- {"big5",
- "\xA7\x41\xA6",
- OnStringUtilConversionError::SKIP,
- true,
- L"\x4F60",
- NULL},
- {"big5",
- "\xA7\x41\xA6",
- OnStringUtilConversionError::SUBSTITUTE,
- true,
- L"\x4F60\xFFFD",
- NULL},
- // Arabic (ISO-8859)
- {"iso-8859-6",
- "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " "
- "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" "
- L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652",
- NULL},
- // Chinese Simplified (GB2312)
- {"gb2312",
- "\xC4\xE3\xBA\xC3",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x4F60\x597D",
- NULL},
- // Chinese (GB18030) : 4 byte sequences mapped to BMP characters
- {"gb18030",
- "\x81\x30\x84\x36\xA1\xA7",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x00A5\x00A8",
- NULL},
- // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000)
- {"gb18030",
- "\x95\x32\x82\x36\xD2\xBB",
- OnStringUtilConversionError::FAIL,
- true,
-#if defined(WCHAR_T_IS_UTF16)
- L"\xD840\xDC00\x4E00",
-#else
- L"\x20000\x4E00",
-#endif
- L"\xD840\xDC00\x4E00"},
- {"big5",
- "\xA7\x41\xA6\x6E",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x4F60\x597D",
- NULL},
- // Greek (ISO-8859)
- {"iso-8859-7",
- "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5",
- NULL},
- // Hebrew (Windows)
- {"windows-1255",
- "\xF9\xD1\xC8\xEC\xE5\xC9\xED",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD",
- NULL},
- // Hindi Devanagari (ISCII)
- {"iscii-dev",
- "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930",
- NULL},
- // Korean (EUC)
- {"euc-kr",
- "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4",
- OnStringUtilConversionError::FAIL,
- true,
- L"\xC548\xB155\xD558\xC138\xC694",
- NULL},
- // Japanese (EUC)
- {"euc-jp",
- "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF\xB0\xEC\x8F\xB0\xA1\x8E\xA6",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x3053\x3093\x306B\x3061\x306F\x4E00\x4E02\xFF66",
- NULL},
- // Japanese (ISO-2022)
- {"iso-2022-jp",
- "\x1B$B" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F\x30\x6C" "\x1B(B"
- "ab" "\x1B(J" "\x5C\x7E#$" "\x1B(B",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x3053\x3093\x306B\x3061\x306F\x4E00" L"ab\x00A5\x203E#$",
- NULL},
- // Japanese (Shift-JIS)
- {"sjis",
- "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD\x88\xEA\xA6",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x3053\x3093\x306B\x3061\x306F\x4E00\xFF66",
- NULL},
- // Russian (KOI8)
- {"koi8-r",
- "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
- L"\x0443\x0439\x0442\x0435",
- NULL},
- // Thai (windows-874)
- {"windows-874",
- "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
- OnStringUtilConversionError::FAIL,
- true,
- L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
- L"\x0E04\x0E23\x0e31\x0E1A",
- NULL},
-};
-
-TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
- std::wstring wide;
- bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
- kConvertCodepageCases[i].codepage_name,
- kConvertCodepageCases[i].on_error,
- &wide);
- EXPECT_EQ(kConvertCodepageCases[i].success, success);
- EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
-
- // When decoding was successful and nothing was skipped, we also check the
- // reverse conversion. Not all conversions are round-trippable, but
- // kConverterCodepageCases does not have any one-way conversion at the
- // moment.
- if (success &&
- kConvertCodepageCases[i].on_error ==
- OnStringUtilConversionError::FAIL) {
- std::string encoded;
- success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,
- kConvertCodepageCases[i].on_error, &encoded);
- EXPECT_EQ(kConvertCodepageCases[i].success, success);
- EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
- }
- }
-
- // The above cases handled codepage->wide errors, but not wide->codepage.
- // Test that here.
- std::string encoded("Temp data"); // Make sure the string gets cleared.
-
- // First test going to an encoding that can not represent that character.
- EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
- OnStringUtilConversionError::FAIL, &encoded));
- EXPECT_TRUE(encoded.empty());
- EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
- OnStringUtilConversionError::SKIP, &encoded));
- EXPECT_STREQ("Chinese", encoded.c_str());
- // From Unicode, SUBSTITUTE is the same as SKIP for now.
- EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
- OnStringUtilConversionError::SUBSTITUTE,
- &encoded));
- EXPECT_STREQ("Chinese", encoded.c_str());
-
-#if defined(WCHAR_T_IS_UTF16)
- // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.
- EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",
- OnStringUtilConversionError::FAIL, &encoded));
- EXPECT_TRUE(encoded.empty());
- EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",
- OnStringUtilConversionError::SKIP, &encoded));
- EXPECT_STREQ("az", encoded.c_str());
-#endif // WCHAR_T_IS_UTF16
-
- // Invalid characters should fail.
- EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
- OnStringUtilConversionError::SKIP, &encoded));
- EXPECT_STREQ("az", encoded.c_str());
-
- // Invalid codepages should fail.
- EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
- OnStringUtilConversionError::SKIP, &encoded));
-}
-
-TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) {
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
- string16 utf16;
- bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,
- kConvertCodepageCases[i].codepage_name,
- kConvertCodepageCases[i].on_error,
- &utf16);
- string16 utf16_expected;
- if (kConvertCodepageCases[i].u16_wide == NULL)
- utf16_expected = BuildString16(kConvertCodepageCases[i].wide);
- else
- utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide);
- EXPECT_EQ(kConvertCodepageCases[i].success, success);
- EXPECT_EQ(utf16_expected, utf16);
-
- // When decoding was successful and nothing was skipped, we also check the
- // reverse conversion. See also the corresponding comment in
- // ConvertBetweenCodepageAndWide.
- if (success &&
- kConvertCodepageCases[i].on_error ==
- OnStringUtilConversionError::FAIL) {
- std::string encoded;
- success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name,
- kConvertCodepageCases[i].on_error, &encoded);
- EXPECT_EQ(kConvertCodepageCases[i].success, success);
- EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
- }
- }
-}
-
TEST(StringUtilTest, ConvertASCII) {
static const char* char_cases[] = {
"Google Video",
@@ -1800,3 +1544,5 @@ TEST(StringUtilTest, HexEncode) {
hex = HexEncode(bytes, sizeof(bytes));
EXPECT_EQ(hex.compare("01FF02FE038081"), 0);
}
+
+} // namaspace base
diff --git a/chrome/browser/autocomplete/search_provider.cc b/chrome/browser/autocomplete/search_provider.cc
index 5c9931c..e024280 100644
--- a/chrome/browser/autocomplete/search_provider.cc
+++ b/chrome/browser/autocomplete/search_provider.cc
@@ -5,6 +5,7 @@
#include "chrome/browser/autocomplete/search_provider.h"
#include "app/l10n_util.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/message_loop.h"
#include "base/string_util.h"
#include "chrome/browser/autocomplete/keyword_provider.h"
@@ -165,8 +166,9 @@ void SearchProvider::OnURLFetchComplete(const URLFetcher* source,
if (response_headers->GetCharset(&charset)) {
std::wstring wide_data;
// TODO(jungshik): Switch to CodePageToUTF8 after it's added.
- if (CodepageToWide(data, charset.c_str(),
- OnStringUtilConversionError::FAIL, &wide_data))
+ if (base::CodepageToWide(data, charset.c_str(),
+ base::OnStringConversionError::FAIL,
+ &wide_data))
json_data = WideToUTF8(wide_data);
}
}
diff --git a/chrome/browser/importer/firefox2_importer.cc b/chrome/browser/importer/firefox2_importer.cc
index 943cb9e..c529e39 100644
--- a/chrome/browser/importer/firefox2_importer.cc
+++ b/chrome/browser/importer/firefox2_importer.cc
@@ -7,6 +7,7 @@
#include "app/l10n_util.h"
#include "base/file_path.h"
#include "base/file_util.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/message_loop.h"
#include "base/path_service.h"
#include "base/stl_util-inl.h"
@@ -382,8 +383,8 @@ bool Firefox2Importer::ParseFolderNameFromLine(const std::string& line,
if (end == std::string::npos || tag_end < arraysize(kFolderOpen))
return false;
- CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(),
- OnStringUtilConversionError::SKIP, folder_name);
+ base::CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(),
+ base::OnStringConversionError::SKIP, folder_name);
HTMLUnescape(folder_name);
std::string attribute_list = line.substr(arraysize(kFolderOpen),
@@ -442,15 +443,15 @@ bool Firefox2Importer::ParseBookmarkFromLine(const std::string& line,
return false;
// Title
- CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(),
- OnStringUtilConversionError::SKIP, title);
+ base::CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(),
+ base::OnStringConversionError::SKIP, title);
HTMLUnescape(title);
// URL
if (GetAttribute(attribute_list, kHrefAttribute, &value)) {
std::wstring w_url;
- CodepageToWide(value, charset.c_str(), OnStringUtilConversionError::SKIP,
- &w_url);
+ base::CodepageToWide(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, &w_url);
HTMLUnescape(&w_url);
string16 url16 = WideToUTF16Hack(w_url);
@@ -464,8 +465,8 @@ bool Firefox2Importer::ParseBookmarkFromLine(const std::string& line,
// Keyword
if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) {
- CodepageToWide(value, charset.c_str(), OnStringUtilConversionError::SKIP,
- shortcut);
+ base::CodepageToWide(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, shortcut);
HTMLUnescape(shortcut);
}
@@ -479,8 +480,8 @@ bool Firefox2Importer::ParseBookmarkFromLine(const std::string& line,
// Post data.
if (GetAttribute(attribute_list, kPostDataAttribute, &value)) {
- CodepageToWide(value, charset.c_str(),
- OnStringUtilConversionError::SKIP, post_data);
+ base::CodepageToWide(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, post_data);
HTMLUnescape(post_data);
}
diff --git a/chrome/browser/importer/mork_reader.cc b/chrome/browser/importer/mork_reader.cc
index c8b24a4..66ff12b 100644
--- a/chrome/browser/importer/mork_reader.cc
+++ b/chrome/browser/importer/mork_reader.cc
@@ -45,6 +45,7 @@
#include <algorithm>
#include "base/file_path.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/message_loop.h"
#include "base/string_util.h"
@@ -518,11 +519,11 @@ void AddToHistory(MorkReader::ColumnDataList* column_values,
// title is really a UTF-16 string at this point
std::wstring title;
if (data.swap_bytes) {
- CodepageToWide(values[kNameColumn], "UTF-16BE",
- OnStringUtilConversionError::SKIP, &title);
+ base::CodepageToWide(values[kNameColumn], base::kCodepageUTF16BE,
+ base::OnStringConversionError::SKIP, &title);
} else {
- CodepageToWide(values[kNameColumn], "UTF-16LE",
- OnStringUtilConversionError::SKIP, &title);
+ base::CodepageToWide(values[kNameColumn], base::kCodepageUTF16LE,
+ base::OnStringConversionError::SKIP, &title);
}
row.set_title(title);
diff --git a/chrome/browser/search_engines/template_url.cc b/chrome/browser/search_engines/template_url.cc
index 7c57617..3febf96 100644
--- a/chrome/browser/search_engines/template_url.cc
+++ b/chrome/browser/search_engines/template_url.cc
@@ -6,6 +6,7 @@
#include "app/gfx/favicon_size.h"
#include "app/l10n_util.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/string_util.h"
#include "chrome/browser/browser_process.h"
@@ -318,8 +319,9 @@ std::wstring TemplateURLRef::ReplaceSearchTerms(
case GOOGLE_UNESCAPED_SEARCH_TERMS: {
std::string unescaped_terms;
- WideToCodepage(terms, WideToASCII(input_encoding).c_str(),
- OnStringUtilConversionError::SKIP, &unescaped_terms);
+ base::WideToCodepage(terms, WideToASCII(input_encoding).c_str(),
+ base::OnStringConversionError::SKIP,
+ &unescaped_terms);
url.insert(i->index, std::wstring(unescaped_terms.begin(),
unescaped_terms.end()));
break;
@@ -411,14 +413,14 @@ std::wstring TemplateURLRef::SearchTermToWide(const TemplateURL& host,
UnescapeURLComponent(term, UnescapeRule::REPLACE_PLUS_WITH_SPACE |
UnescapeRule::URL_SPECIAL_CHARS);
for (size_t i = 0; i < encodings.size(); ++i) {
- if (CodepageToWide(unescaped, encodings[i].c_str(),
- OnStringUtilConversionError::FAIL, &result))
+ if (base::CodepageToWide(unescaped, encodings[i].c_str(),
+ base::OnStringConversionError::FAIL, &result))
return result;
}
// Always fall back on UTF-8 if it works.
- if (CodepageToWide(unescaped, "UTF-8",
- OnStringUtilConversionError::FAIL, &result))
+ if (base::CodepageToWide(unescaped, base::kCodepageUTF8,
+ base::OnStringConversionError::FAIL, &result))
return result;
// When nothing worked, just use the escaped text. We have no idea what the
diff --git a/chrome/tools/convert_dict/aff_reader.cc b/chrome/tools/convert_dict/aff_reader.cc
index 2ea8301..33fa522 100644
--- a/chrome/tools/convert_dict/aff_reader.cc
+++ b/chrome/tools/convert_dict/aff_reader.cc
@@ -7,6 +7,7 @@
#include <algorithm>
#include "base/file_util.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/string_util.h"
#include "chrome/tools/convert_dict/hunspell_reader.h"
@@ -133,8 +134,8 @@ bool AffReader::Read() {
bool AffReader::EncodingToUTF8(const std::string& encoded,
std::string* utf8) const {
std::wstring wide_word;
- if (!CodepageToWide(encoded, encoding(),
- OnStringUtilConversionError::FAIL, &wide_word))
+ if (!base::CodepageToWide(encoded, encoding(),
+ base::OnStringConversionError::FAIL, &wide_word))
return false;
*utf8 = WideToUTF8(wide_word);
return true;
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 615f062..3d2aca2 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -6,6 +6,7 @@
#include "net/base/escape.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/string_util.h"
@@ -220,8 +221,8 @@ bool EscapeQueryParamValue(const std::wstring& text, const char* codepage,
// TODO(brettw) bug 1201094: this function should be removed, this "SKIP"
// behavior is wrong when the character can't be encoded properly.
std::string encoded;
- if (!WideToCodepage(text, codepage,
- OnStringUtilConversionError::SKIP, &encoded))
+ if (!base::WideToCodepage(text, codepage,
+ base::OnStringConversionError::SKIP, &encoded))
return false;
// It's safe to use UTF8ToWide here because Escape should only return
@@ -234,8 +235,8 @@ std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
const char* codepage,
UnescapeRule::Type rules) {
std::wstring result;
- if (CodepageToWide(UnescapeURLImpl(text, rules), codepage,
- OnStringUtilConversionError::FAIL, &result))
+ if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage,
+ base::OnStringConversionError::FAIL, &result))
return result; // Character set looks like it's valid.
return UTF8ToWide(text); // Return the escaped version when it's not.
}
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 3a9ed70..44bb9972 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -7,6 +7,7 @@
#include "net/base/escape.h"
#include "base/basictypes.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/string_util.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -42,7 +43,7 @@ struct EscapeForHTMLCase {
const char* expected_output;
};
-}
+} // namespace
TEST(Escape, EscapeTextForFormSubmission) {
const EscapeCase escape_cases[] = {
@@ -88,7 +89,7 @@ TEST(Escape, EscapeTextForFormSubmission) {
test_str.push_back(i);
}
std::wstring wide;
- EXPECT_TRUE(EscapeQueryParamValue(test_str, kCodepageUTF8, &wide));
+ EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, &wide));
EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str));
}
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index fc2fa84..1d7d558 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -30,6 +30,8 @@
#include "base/file_path.h"
#include "base/file_util.h"
#include "base/i18n/file_util_icu.h"
+#include "base/i18n/icu_string_conversions.h"
+#include "base/i18n/time_formatting.h"
#include "base/lock.h"
#include "base/logging.h"
#include "base/message_loop.h"
@@ -42,7 +44,7 @@
#include "base/string_util.h"
#include "base/sys_string_conversions.h"
#include "base/time.h"
-#include "base/i18n/time_formatting.h"
+#include "base/utf_string_conversions.h"
#include "grit/net_resources.h"
#include "googleurl/src/gurl.h"
#include "googleurl/src/url_canon.h"
@@ -264,8 +266,9 @@ bool DecodeWord(const std::string& encoded_word,
} else {
std::wstring wide_output;
if (!referrer_charset.empty() &&
- CodepageToWide(encoded_word, referrer_charset.c_str(),
- OnStringUtilConversionError::FAIL, &wide_output)) {
+ base::CodepageToWide(encoded_word, referrer_charset.c_str(),
+ base::OnStringConversionError::FAIL,
+ &wide_output)) {
*output = WideToUTF8(wide_output);
} else {
*output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
diff --git a/net/proxy/proxy_script_fetcher.cc b/net/proxy/proxy_script_fetcher.cc
index 83189ef..882c406 100644
--- a/net/proxy/proxy_script_fetcher.cc
+++ b/net/proxy/proxy_script_fetcher.cc
@@ -5,10 +5,12 @@
#include "net/proxy/proxy_script_fetcher.h"
#include "base/compiler_specific.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/message_loop.h"
#include "base/ref_counted.h"
#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
#include "net/base/io_buffer.h"
#include "net/base/load_flags.h"
#include "net/base/net_errors.h"
@@ -50,7 +52,7 @@ void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
if (charset.empty()) {
// Assume ISO-8859-1 if no charset was specified.
- codepage = "ISO-8859-1";
+ codepage = base::kCodepageLatin1;
} else {
// Otherwise trust the charset that was provided.
codepage = charset.c_str();
@@ -60,9 +62,9 @@ void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
// outside of |charset| (i.e. invalid), then substitute them with
// U+FFFD rather than failing.
std::wstring tmp_wide;
- CodepageToWide(*bytes, codepage,
- OnStringUtilConversionError::SUBSTITUTE,
- &tmp_wide);
+ base::CodepageToWide(*bytes, codepage,
+ base::OnStringConversionError::SUBSTITUTE,
+ &tmp_wide);
// TODO(eroman): would be nice to have a CodepageToUTF8() function.
*bytes = WideToUTF8(tmp_wide);
}
diff --git a/webkit/appcache/manifest_parser.cc b/webkit/appcache/manifest_parser.cc
index 4782ce5..652582a 100644
--- a/webkit/appcache/manifest_parser.cc
+++ b/webkit/appcache/manifest_parser.cc
@@ -31,8 +31,9 @@
#include "webkit/appcache/manifest_parser.h"
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
-#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
#include "googleurl/src/gurl.h"
namespace appcache {
@@ -58,8 +59,8 @@ bool ParseManifest(const GURL& manifest_url, const char* data, int length,
std::wstring data_string;
// TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
// until UTF8ToWide uses 0xFFFD Unicode replacement character.
- CodepageToWide(std::string(data, length), "UTF-8",
- OnStringUtilConversionError::SUBSTITUTE, &data_string);
+ base::CodepageToWide(std::string(data, length), base::kCodepageUTF8,
+ base::OnStringConversionError::SUBSTITUTE, &data_string);
const wchar_t* p = data_string.c_str();
const wchar_t* end = p + data_string.length();
diff --git a/webkit/glue/ftp_directory_listing_response_delegate.cc b/webkit/glue/ftp_directory_listing_response_delegate.cc
index c11fdbd..3ff9c0f 100644
--- a/webkit/glue/ftp_directory_listing_response_delegate.cc
+++ b/webkit/glue/ftp_directory_listing_response_delegate.cc
@@ -6,6 +6,7 @@
#include <vector>
+#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/string_util.h"
#include "base/sys_string_conversions.h"
@@ -53,8 +54,9 @@ string16 RawByteSequenceToFilename(const char* raw_filename,
// Using the native codepage does not make much sense, but we don't have
// much else to resort to.
string16 filename;
- if (!CodepageToUTF16(raw_filename, encoding.c_str(),
- OnStringUtilConversionError::SUBSTITUTE, &filename))
+ if (!base::CodepageToUTF16(raw_filename, encoding.c_str(),
+ base::OnStringConversionError::SUBSTITUTE,
+ &filename))
filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
return filename;
}
@@ -226,9 +228,9 @@ void FtpDirectoryListingResponseDelegate::Init() {
// Try the detected encoding. If it fails, resort to the
// OS native encoding.
if (encoding.empty() ||
- !CodepageToUTF16(unescaped_path, encoding.c_str(),
- OnStringUtilConversionError::SUBSTITUTE,
- &path_utf16))
+ !base::CodepageToUTF16(unescaped_path, encoding.c_str(),
+ base::OnStringConversionError::SUBSTITUTE,
+ &path_utf16))
path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path));
}