diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-10 01:43:15 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-10 01:43:15 +0000 |
commit | 2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch) | |
tree | 960a571bb2b49c9005fd152138e4398df9072854 | |
parent | 0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff) | |
download | chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2 |
Clean up recent string conversion function changes, part 1: Remove unnecessary code. Thanks to a change in escape.cc I can basically revert the ICU conversions back to what they used to be; I can also get rid of half the conversions immediately since they aren't used.
This does not split out the "adjust" versions of the UTF conversions into their own header/implementation; that's coming in the next patch.
BUG=4010
TEST=none
Review URL: http://codereview.chromium.org/380007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31533 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | base/i18n/icu_string_conversions.cc | 104 | ||||
-rw-r--r-- | base/i18n/icu_string_conversions.h | 44 | ||||
-rw-r--r-- | base/i18n/icu_string_conversions_unittest.cc | 41 | ||||
-rw-r--r-- | base/utf_string_conversions.cc | 40 | ||||
-rw-r--r-- | base/utf_string_conversions.h | 30 | ||||
-rw-r--r-- | base/utf_string_conversions_unittest.cc | 36 | ||||
-rw-r--r-- | net/base/escape.cc | 8 |
7 files changed, 43 insertions, 260 deletions
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc index c93b103..252eb9c 100644 --- a/base/i18n/icu_string_conversions.cc +++ b/base/i18n/icu_string_conversions.cc @@ -157,8 +157,6 @@ const char kCodepageUTF16LE[] = "UTF-16LE"; // Codepage <-> Wide/UTF-16 --------------------------------------------------- -// Convert a UTF-16 string into the specified codepage_name. If the codepage -// isn't found, return false. bool UTF16ToCodepage(const string16& utf16, const char* codepage_name, OnStringConversionError::Type on_error, @@ -174,11 +172,10 @@ bool UTF16ToCodepage(const string16& utf16, static_cast<int>(utf16.length()), on_error, encoded); } -bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16, - size_t* offset_for_adjustment) { +bool CodepageToUTF16(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + string16* utf16) { utf16->clear(); UErrorCode status = U_ZERO_ERROR; @@ -197,40 +194,9 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, size_t uchar_max_length = encoded.length() + 1; SetUpErrorHandlerForToUChars(on_error, converter, &status); - char16* byte_buffer = WriteInto(utf16, uchar_max_length); - int byte_buffer_length = static_cast<int>(uchar_max_length); - const char* data = encoded.data(); - int length = static_cast<int>(encoded.length()); - int actual_size = 0; - if (offset_for_adjustment) { - if (*offset_for_adjustment >= encoded.length()) { - *offset_for_adjustment = string16::npos; - } else if (*offset_for_adjustment != 0) { - // Try to adjust the offset by converting the string in two pieces and - // using the length of the first piece as the adjusted offset. - actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, - data, static_cast<int>(*offset_for_adjustment), &status); - if (U_SUCCESS(status)) { - // Conversion succeeded, so update the offset and then fall through to - // appending the second half of the string. - data += *offset_for_adjustment; - length -= *offset_for_adjustment; - *offset_for_adjustment = actual_size; - byte_buffer += actual_size; - byte_buffer_length -= actual_size; - } else { - // The offset may have been in the middle of an encoding sequence; mark - // it as having failed to adjust and then try to convert the entire - // string. - *offset_for_adjustment = string16::npos; - actual_size = 0; - ucnv_reset(converter); - status = U_ZERO_ERROR; - } - } - } - actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data, - length, &status); + int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length), + static_cast<int>(uchar_max_length), encoded.data(), + static_cast<int>(encoded.length()), &status); ucnv_close(converter); if (!U_SUCCESS(status)) { utf16->clear(); // Make sure the output is empty on error. @@ -241,8 +207,6 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, return true; } -// Convert a wstring into the specified codepage_name. If the codepage -// isn't found, return false. bool WideToCodepage(const std::wstring& wide, const char* codepage_name, OnStringConversionError::Type on_error, @@ -272,16 +236,12 @@ bool WideToCodepage(const std::wstring& wide, #endif // defined(WCHAR_T_IS_UTF32) } -// Converts a string of the given codepage into wstring. -// If the codepage isn't found, return false. -bool CodepageToWideAndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide, - size_t* offset_for_adjustment) { +bool CodepageToWide(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + std::wstring* wide) { #if defined(WCHAR_T_IS_UTF16) - return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide, - offset_for_adjustment); + return CodepageToUTF16(encoded, codepage_name, on_error, wide); #elif defined(WCHAR_T_IS_UTF32) wide->clear(); @@ -297,42 +257,10 @@ bool CodepageToWideAndAdjustOffset(const std::string& encoded, size_t wchar_max_length = encoded.length() + 1; SetUpErrorHandlerForToUChars(on_error, converter, &status); - char* byte_buffer = - reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)); - int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t); - const char* data = encoded.data(); - int length = static_cast<int>(encoded.length()); - int actual_size = 0; - if (offset_for_adjustment) { - if (*offset_for_adjustment >= encoded.length()) { - *offset_for_adjustment = std::wstring::npos; - } else if (*offset_for_adjustment != 0) { - // Try to adjust the offset by converting the string in two pieces and - // using the length of the first piece as the adjusted offset. - actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter, - byte_buffer, byte_buffer_length, data, - static_cast<int>(*offset_for_adjustment), &status); - if (U_SUCCESS(status)) { - // Conversion succeeded, so update the offset and then fall through to - // appending the second half of the string. - data += *offset_for_adjustment; - length -= *offset_for_adjustment; - *offset_for_adjustment = actual_size / sizeof(wchar_t); - byte_buffer += actual_size; - byte_buffer_length -= actual_size; - } else { - // The offset may have been in the middle of an encoding sequence; mark - // it as having failed to adjust and then try to convert the entire - // string. - *offset_for_adjustment = std::wstring::npos; - actual_size = 0; - ucnv_reset(converter); - status = U_ZERO_ERROR; - } - } - } - actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter, - byte_buffer, byte_buffer_length, data, length, &status); + int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, + reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)), + static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), + static_cast<int>(encoded.length()), &status); ucnv_close(converter); if (!U_SUCCESS(status)) { wide->clear(); // Make sure the output is empty on error. diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h index 6f2cab7..7b0c77e 100644 --- a/base/i18n/icu_string_conversions.h +++ b/base/i18n/icu_string_conversions.h @@ -40,17 +40,6 @@ extern const char kCodepageUTF8[]; extern const char kCodepageUTF16BE[]; extern const char kCodepageUTF16LE[]; -// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|, -// which will be adjusted to point at the same logical place in |utf16|. If -// this isn't possible because it points past the end of |encoded| or into the -// middle of a multibyte sequence, it will be set to std::string16::npos. -// |offset_for_adjustment| may be NULL. -bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16, - size_t* offset_for_adjustment); - // Converts between UTF-16 strings and the encoding specified. If the // encoding doesn't exist or the encoding fails (when on_error is FAIL), // returns false. @@ -58,24 +47,10 @@ bool UTF16ToCodepage(const string16& utf16, const char* codepage_name, OnStringConversionError::Type on_error, std::string* encoded); -inline bool CodepageToUTF16(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16) { - return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16, - NULL); -} - -// Like CodepageToWide() (see below), but also takes an offset into |encoded|, -// which will be adjusted to point at the same logical place in |wide|. If -// this isn't possible because it points past the end of |encoded| or into the -// middle of a multibyte sequence, it will be set to std::wstring::npos. -// |offset_for_adjustment| may be NULL. -bool CodepageToWideAndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide, - size_t* offset_for_adjustment); +bool CodepageToUTF16(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + string16* utf16); // Converts between wide strings and the encoding specified. If the // encoding doesn't exist or the encoding fails (when on_error is FAIL), @@ -84,13 +59,10 @@ bool WideToCodepage(const std::wstring& wide, const char* codepage_name, OnStringConversionError::Type on_error, std::string* encoded); -inline bool CodepageToWide(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide) { - return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide, - NULL); -} +bool CodepageToWide(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + std::wstring* wide); } // namespace base diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc index 0088a03..2083fa9 100644 --- a/base/i18n/icu_string_conversions_unittest.cc +++ b/base/i18n/icu_string_conversions_unittest.cc @@ -325,45 +325,4 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { } } -static const struct { - const char* codepage_name; - const char* encoded; - size_t input_offset; - size_t u16_output_offset; - size_t wide_output_offset; -} kAdjustOffsetCases[] = { - {"gb2312", "", 0, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0}, - {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1}, - {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos, - std::wstring::npos}, - {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos, - std::wstring::npos}, - {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1}, -}; - -TEST(ICUStringConversionsTest, AdjustOffset) { - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) { - string16 utf16; - size_t offset = kAdjustOffsetCases[i].input_offset; - EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded, - kAdjustOffsetCases[i].codepage_name, - OnStringConversionError::FAIL, &utf16, &offset)); - EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset); - - std::wstring wide; - offset = kAdjustOffsetCases[i].input_offset; - CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded, - kAdjustOffsetCases[i].codepage_name, - OnStringConversionError::FAIL, &wide, &offset); -#if defined(WCHAR_T_IS_UTF16) - EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset); -#elif defined(WCHAR_T_IS_UTF32) - EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset); -#endif - } -} - } // namespace base diff --git a/base/utf_string_conversions.cc b/base/utf_string_conversions.cc index ffff50a..ee52f47 100644 --- a/base/utf_string_conversions.cc +++ b/base/utf_string_conversions.cc @@ -221,22 +221,16 @@ void PrepareForUTF16Or32Output(const char* src, // UTF-8 <-> Wide -------------------------------------------------------------- -bool WideToUTF8AndAdjustOffset(const wchar_t* src, - size_t src_len, - std::string* output, - size_t* offset_for_adjustment) { +bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { PrepareForUTF8Output(src, src_len, output); - return ConvertUnicode<wchar_t, std::string>(src, src_len, output, - offset_for_adjustment); + return ConvertUnicode<wchar_t, std::string>(src, src_len, output, NULL); } -std::string WideToUTF8AndAdjustOffset(const std::wstring& wide, - size_t* offset_for_adjustment) { +std::string WideToUTF8(const std::wstring& wide) { std::string ret; // Ignore the success flag of this call, it will do the best it can for // invalid input, which is what we want here. - WideToUTF8AndAdjustOffset(wide.data(), wide.length(), &ret, - offset_for_adjustment); + WideToUTF8(wide.data(), wide.length(), &ret); return ret; } @@ -262,20 +256,12 @@ std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, #if defined(WCHAR_T_IS_UTF16) // When wide == UTF-16, then conversions are a NOP. -bool WideToUTF16AndAdjustOffset(const wchar_t* src, - size_t src_len, - string16* output, - size_t* offset_for_adjustment) { +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { output->assign(src, src_len); - if (offset_for_adjustment && (*offset_for_adjustment >= src_len)) - *offset_for_adjustment = string16::npos; return true; } -string16 WideToUTF16AndAdjustOffset(const std::wstring& wide, - size_t* offset_for_adjustment) { - if (offset_for_adjustment && (*offset_for_adjustment >= wide.length())) - *offset_for_adjustment = string16::npos; +string16 WideToUTF16(const std::wstring& wide) { return wide; } @@ -298,23 +284,17 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, #elif defined(WCHAR_T_IS_UTF32) -bool WideToUTF16AndAdjustOffset(const wchar_t* src, - size_t src_len, - string16* output, - size_t* offset_for_adjustment) { +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { output->clear(); // Assume that normally we won't have any non-BMP characters so the counts // will be the same. output->reserve(src_len); - return ConvertUnicode<wchar_t, string16>(src, src_len, output, - offset_for_adjustment); + return ConvertUnicode<wchar_t, string16>(src, src_len, output, NULL); } -string16 WideToUTF16AndAdjustOffset(const std::wstring& wide, - size_t* offset_for_adjustment) { +string16 WideToUTF16(const std::wstring& wide) { string16 ret; - WideToUTF16AndAdjustOffset(wide.data(), wide.length(), &ret, - offset_for_adjustment); + WideToUTF16(wide.data(), wide.length(), &ret); return ret; } diff --git a/base/utf_string_conversions.h b/base/utf_string_conversions.h index 323233b..7069f83 100644 --- a/base/utf_string_conversions.h +++ b/base/utf_string_conversions.h @@ -15,12 +15,6 @@ // string. If this isn't possible because it points past the end of the source // string or into the middle of a multibyte sequence, it will be set to // std::wstring::npos. |offset_for_adjustment| may be NULL. -bool WideToUTF8AndAdjustOffset(const wchar_t* src, - size_t src_len, - std::string* output, - size_t* offset_for_adjustment); -std::string WideToUTF8AndAdjustOffset(const std::wstring& wide, - size_t* offset_for_adjustment); bool UTF8ToWideAndAdjustOffset(const char* src, size_t src_len, std::wstring* output, @@ -28,12 +22,6 @@ bool UTF8ToWideAndAdjustOffset(const char* src, std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, size_t* offset_for_adjustment); -bool WideToUTF16AndAdjustOffset(const wchar_t* src, - size_t src_len, - string16* output, - size_t* offset_for_adjustment); -string16 WideToUTF16AndAdjustOffset(const std::wstring& wide, - size_t* offset_for_adjustment); bool UTF16ToWideAndAdjustOffset(const char16* src, size_t src_len, std::wstring* output, @@ -54,14 +42,8 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, // the Unicode replacement character or adding |replacement_char| parameter. // Currently, it's skipped in the ouput, which could be problematic in // some situations. -inline bool WideToUTF8(const wchar_t* src, - size_t src_len, - std::string* output) { - return WideToUTF8AndAdjustOffset(src, src_len, output, NULL); -} -inline std::string WideToUTF8(const std::wstring& wide) { - return WideToUTF8AndAdjustOffset(wide, NULL); -} +bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); +std::string WideToUTF8(const std::wstring& wide); inline bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { return UTF8ToWideAndAdjustOffset(src, src_len, output, NULL); } @@ -69,12 +51,8 @@ inline std::wstring UTF8ToWide(const base::StringPiece& utf8) { return UTF8ToWideAndAdjustOffset(utf8, NULL); } -inline bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { - return WideToUTF16AndAdjustOffset(src, src_len, output, NULL); -} -inline string16 WideToUTF16(const std::wstring& wide) { - return WideToUTF16AndAdjustOffset(wide, NULL); -} +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); +string16 WideToUTF16(const std::wstring& wide); inline bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { return UTF16ToWideAndAdjustOffset(src, src_len, output, NULL); diff --git a/base/utf_string_conversions_unittest.cc b/base/utf_string_conversions_unittest.cc index 67af7c3..ca79ec5 100644 --- a/base/utf_string_conversions_unittest.cc +++ b/base/utf_string_conversions_unittest.cc @@ -227,32 +227,12 @@ TEST(UTFStringConversionsTest, ConvertMultiString) { } TEST(UTFStringConversionsTest, AdjustOffset) { - // Under the hood, all the functions call the same converter function, so we - // don't need to exhaustively check every case. - struct WideToUTF8Case { - const wchar_t* wide; - size_t input_offset; - size_t output_offset; - } wide_to_utf8_cases[] = { - {L"", 0, std::string::npos}, - {L"\x4f60\x597d", 0, 0}, - {L"\x4f60\x597d", 1, 3}, - {L"\x4f60\x597d", 2, std::string::npos}, - {L"\x4f60\x597d", std::wstring::npos, std::string::npos}, - {L"\xd800\x597dz", 1, 0}, - {L"\xd800\x597dz", 2, 3}, - }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf8_cases); ++i) { - size_t offset = wide_to_utf8_cases[i].input_offset; - WideToUTF8AndAdjustOffset(wide_to_utf8_cases[i].wide, &offset); - EXPECT_EQ(wide_to_utf8_cases[i].output_offset, offset); - } - struct UTF8ToWideCase { const char* utf8; size_t input_offset; size_t output_offset; } utf8_to_wide_cases[] = { + {"", 0, std::wstring::npos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, {"\xed\xb0\x80z", 3, 0}, @@ -271,20 +251,6 @@ TEST(UTFStringConversionsTest, AdjustOffset) { } #if defined(WCHAR_T_IS_UTF32) - struct WideToUTF16Case { - const wchar_t* wide; - size_t input_offset; - size_t output_offset; - } wide_to_utf16_cases[] = { - {L"\x4F60\x597D", 1, 1}, - {L"\x20000\x4E00", 1, 2}, - }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf16_cases); ++i) { - size_t offset = wide_to_utf16_cases[i].input_offset; - WideToUTF16AndAdjustOffset(wide_to_utf16_cases[i].wide, &offset); - EXPECT_EQ(wide_to_utf16_cases[i].output_offset, offset); - } - struct UTF16ToWideCase { const wchar_t* wide; size_t input_offset; diff --git a/net/base/escape.cc b/net/base/escape.cc index 5196eb6..0327fc2 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -252,10 +252,10 @@ std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, size_t* offset_for_adjustment) { std::wstring result; size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; - if (base::CodepageToWideAndAdjustOffset( - UnescapeURLImpl(text, rules, offset_for_adjustment), - "UTF-8", base::OnStringConversionError::FAIL, &result, - offset_for_adjustment)) + std::string unescaped_url( + UnescapeURLImpl(text, rules, offset_for_adjustment)); + if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(), + &result, offset_for_adjustment)) return result; // Character set looks like it's valid. // Not valid. Return the escaped version. Undo our changes to |