diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-10 01:43:15 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-10 01:43:15 +0000 |
commit | 2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch) | |
tree | 960a571bb2b49c9005fd152138e4398df9072854 /base/i18n | |
parent | 0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff) | |
download | chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2 |
Clean up recent string conversion function changes, part 1: Remove unnecessary code. Thanks to a change in escape.cc I can basically revert the ICU conversions back to what they used to be; I can also get rid of half the conversions immediately since they aren't used.
This does not split out the "adjust" versions of the UTF conversions into their own header/implementation; that's coming in the next patch.
BUG=4010
TEST=none
Review URL: http://codereview.chromium.org/380007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31533 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/icu_string_conversions.cc | 104 | ||||
-rw-r--r-- | base/i18n/icu_string_conversions.h | 44 | ||||
-rw-r--r-- | base/i18n/icu_string_conversions_unittest.cc | 41 |
3 files changed, 24 insertions, 165 deletions
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc index c93b103..252eb9c 100644 --- a/base/i18n/icu_string_conversions.cc +++ b/base/i18n/icu_string_conversions.cc @@ -157,8 +157,6 @@ const char kCodepageUTF16LE[] = "UTF-16LE"; // Codepage <-> Wide/UTF-16 --------------------------------------------------- -// Convert a UTF-16 string into the specified codepage_name. If the codepage -// isn't found, return false. bool UTF16ToCodepage(const string16& utf16, const char* codepage_name, OnStringConversionError::Type on_error, @@ -174,11 +172,10 @@ bool UTF16ToCodepage(const string16& utf16, static_cast<int>(utf16.length()), on_error, encoded); } -bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16, - size_t* offset_for_adjustment) { +bool CodepageToUTF16(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + string16* utf16) { utf16->clear(); UErrorCode status = U_ZERO_ERROR; @@ -197,40 +194,9 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, size_t uchar_max_length = encoded.length() + 1; SetUpErrorHandlerForToUChars(on_error, converter, &status); - char16* byte_buffer = WriteInto(utf16, uchar_max_length); - int byte_buffer_length = static_cast<int>(uchar_max_length); - const char* data = encoded.data(); - int length = static_cast<int>(encoded.length()); - int actual_size = 0; - if (offset_for_adjustment) { - if (*offset_for_adjustment >= encoded.length()) { - *offset_for_adjustment = string16::npos; - } else if (*offset_for_adjustment != 0) { - // Try to adjust the offset by converting the string in two pieces and - // using the length of the first piece as the adjusted offset. - actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, - data, static_cast<int>(*offset_for_adjustment), &status); - if (U_SUCCESS(status)) { - // Conversion succeeded, so update the offset and then fall through to - // appending the second half of the string. - data += *offset_for_adjustment; - length -= *offset_for_adjustment; - *offset_for_adjustment = actual_size; - byte_buffer += actual_size; - byte_buffer_length -= actual_size; - } else { - // The offset may have been in the middle of an encoding sequence; mark - // it as having failed to adjust and then try to convert the entire - // string. - *offset_for_adjustment = string16::npos; - actual_size = 0; - ucnv_reset(converter); - status = U_ZERO_ERROR; - } - } - } - actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data, - length, &status); + int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length), + static_cast<int>(uchar_max_length), encoded.data(), + static_cast<int>(encoded.length()), &status); ucnv_close(converter); if (!U_SUCCESS(status)) { utf16->clear(); // Make sure the output is empty on error. @@ -241,8 +207,6 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, return true; } -// Convert a wstring into the specified codepage_name. If the codepage -// isn't found, return false. bool WideToCodepage(const std::wstring& wide, const char* codepage_name, OnStringConversionError::Type on_error, @@ -272,16 +236,12 @@ bool WideToCodepage(const std::wstring& wide, #endif // defined(WCHAR_T_IS_UTF32) } -// Converts a string of the given codepage into wstring. -// If the codepage isn't found, return false. -bool CodepageToWideAndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide, - size_t* offset_for_adjustment) { +bool CodepageToWide(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + std::wstring* wide) { #if defined(WCHAR_T_IS_UTF16) - return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide, - offset_for_adjustment); + return CodepageToUTF16(encoded, codepage_name, on_error, wide); #elif defined(WCHAR_T_IS_UTF32) wide->clear(); @@ -297,42 +257,10 @@ bool CodepageToWideAndAdjustOffset(const std::string& encoded, size_t wchar_max_length = encoded.length() + 1; SetUpErrorHandlerForToUChars(on_error, converter, &status); - char* byte_buffer = - reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)); - int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t); - const char* data = encoded.data(); - int length = static_cast<int>(encoded.length()); - int actual_size = 0; - if (offset_for_adjustment) { - if (*offset_for_adjustment >= encoded.length()) { - *offset_for_adjustment = std::wstring::npos; - } else if (*offset_for_adjustment != 0) { - // Try to adjust the offset by converting the string in two pieces and - // using the length of the first piece as the adjusted offset. - actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter, - byte_buffer, byte_buffer_length, data, - static_cast<int>(*offset_for_adjustment), &status); - if (U_SUCCESS(status)) { - // Conversion succeeded, so update the offset and then fall through to - // appending the second half of the string. - data += *offset_for_adjustment; - length -= *offset_for_adjustment; - *offset_for_adjustment = actual_size / sizeof(wchar_t); - byte_buffer += actual_size; - byte_buffer_length -= actual_size; - } else { - // The offset may have been in the middle of an encoding sequence; mark - // it as having failed to adjust and then try to convert the entire - // string. - *offset_for_adjustment = std::wstring::npos; - actual_size = 0; - ucnv_reset(converter); - status = U_ZERO_ERROR; - } - } - } - actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter, - byte_buffer, byte_buffer_length, data, length, &status); + int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, + reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)), + static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), + static_cast<int>(encoded.length()), &status); ucnv_close(converter); if (!U_SUCCESS(status)) { wide->clear(); // Make sure the output is empty on error. diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h index 6f2cab7..7b0c77e 100644 --- a/base/i18n/icu_string_conversions.h +++ b/base/i18n/icu_string_conversions.h @@ -40,17 +40,6 @@ extern const char kCodepageUTF8[]; extern const char kCodepageUTF16BE[]; extern const char kCodepageUTF16LE[]; -// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|, -// which will be adjusted to point at the same logical place in |utf16|. If -// this isn't possible because it points past the end of |encoded| or into the -// middle of a multibyte sequence, it will be set to std::string16::npos. -// |offset_for_adjustment| may be NULL. -bool CodepageToUTF16AndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16, - size_t* offset_for_adjustment); - // Converts between UTF-16 strings and the encoding specified. If the // encoding doesn't exist or the encoding fails (when on_error is FAIL), // returns false. @@ -58,24 +47,10 @@ bool UTF16ToCodepage(const string16& utf16, const char* codepage_name, OnStringConversionError::Type on_error, std::string* encoded); -inline bool CodepageToUTF16(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16) { - return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16, - NULL); -} - -// Like CodepageToWide() (see below), but also takes an offset into |encoded|, -// which will be adjusted to point at the same logical place in |wide|. If -// this isn't possible because it points past the end of |encoded| or into the -// middle of a multibyte sequence, it will be set to std::wstring::npos. -// |offset_for_adjustment| may be NULL. -bool CodepageToWideAndAdjustOffset(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide, - size_t* offset_for_adjustment); +bool CodepageToUTF16(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + string16* utf16); // Converts between wide strings and the encoding specified. If the // encoding doesn't exist or the encoding fails (when on_error is FAIL), @@ -84,13 +59,10 @@ bool WideToCodepage(const std::wstring& wide, const char* codepage_name, OnStringConversionError::Type on_error, std::string* encoded); -inline bool CodepageToWide(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::wstring* wide) { - return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide, - NULL); -} +bool CodepageToWide(const std::string& encoded, + const char* codepage_name, + OnStringConversionError::Type on_error, + std::wstring* wide); } // namespace base diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc index 0088a03..2083fa9 100644 --- a/base/i18n/icu_string_conversions_unittest.cc +++ b/base/i18n/icu_string_conversions_unittest.cc @@ -325,45 +325,4 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { } } -static const struct { - const char* codepage_name; - const char* encoded; - size_t input_offset; - size_t u16_output_offset; - size_t wide_output_offset; -} kAdjustOffsetCases[] = { - {"gb2312", "", 0, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0}, - {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1}, - {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos}, - {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos, - std::wstring::npos}, - {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos, - std::wstring::npos}, - {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1}, -}; - -TEST(ICUStringConversionsTest, AdjustOffset) { - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) { - string16 utf16; - size_t offset = kAdjustOffsetCases[i].input_offset; - EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded, - kAdjustOffsetCases[i].codepage_name, - OnStringConversionError::FAIL, &utf16, &offset)); - EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset); - - std::wstring wide; - offset = kAdjustOffsetCases[i].input_offset; - CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded, - kAdjustOffsetCases[i].codepage_name, - OnStringConversionError::FAIL, &wide, &offset); -#if defined(WCHAR_T_IS_UTF16) - EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset); -#elif defined(WCHAR_T_IS_UTF32) - EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset); -#endif - } -} - } // namespace base |