summaryrefslogtreecommitdiffstats
path: root/base/i18n
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-11-10 01:43:15 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-11-10 01:43:15 +0000
commit2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch)
tree960a571bb2b49c9005fd152138e4398df9072854 /base/i18n
parent0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff)
downloadchromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2
Clean up recent string conversion function changes, part 1: Remove unnecessary code. Thanks to a change in escape.cc I can basically revert the ICU conversions back to what they used to be; I can also get rid of half the conversions immediately since they aren't used.
This does not split out the "adjust" versions of the UTF conversions into their own header/implementation; that's coming in the next patch. BUG=4010 TEST=none Review URL: http://codereview.chromium.org/380007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31533 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r--base/i18n/icu_string_conversions.cc104
-rw-r--r--base/i18n/icu_string_conversions.h44
-rw-r--r--base/i18n/icu_string_conversions_unittest.cc41
3 files changed, 24 insertions, 165 deletions
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc
index c93b103..252eb9c 100644
--- a/base/i18n/icu_string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -157,8 +157,6 @@ const char kCodepageUTF16LE[] = "UTF-16LE";
// Codepage <-> Wide/UTF-16 ---------------------------------------------------
-// Convert a UTF-16 string into the specified codepage_name. If the codepage
-// isn't found, return false.
bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
OnStringConversionError::Type on_error,
@@ -174,11 +172,10 @@ bool UTF16ToCodepage(const string16& utf16,
static_cast<int>(utf16.length()), on_error, encoded);
}
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16,
- size_t* offset_for_adjustment) {
+bool CodepageToUTF16(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ string16* utf16) {
utf16->clear();
UErrorCode status = U_ZERO_ERROR;
@@ -197,40 +194,9 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
size_t uchar_max_length = encoded.length() + 1;
SetUpErrorHandlerForToUChars(on_error, converter, &status);
- char16* byte_buffer = WriteInto(utf16, uchar_max_length);
- int byte_buffer_length = static_cast<int>(uchar_max_length);
- const char* data = encoded.data();
- int length = static_cast<int>(encoded.length());
- int actual_size = 0;
- if (offset_for_adjustment) {
- if (*offset_for_adjustment >= encoded.length()) {
- *offset_for_adjustment = string16::npos;
- } else if (*offset_for_adjustment != 0) {
- // Try to adjust the offset by converting the string in two pieces and
- // using the length of the first piece as the adjusted offset.
- actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
- data, static_cast<int>(*offset_for_adjustment), &status);
- if (U_SUCCESS(status)) {
- // Conversion succeeded, so update the offset and then fall through to
- // appending the second half of the string.
- data += *offset_for_adjustment;
- length -= *offset_for_adjustment;
- *offset_for_adjustment = actual_size;
- byte_buffer += actual_size;
- byte_buffer_length -= actual_size;
- } else {
- // The offset may have been in the middle of an encoding sequence; mark
- // it as having failed to adjust and then try to convert the entire
- // string.
- *offset_for_adjustment = string16::npos;
- actual_size = 0;
- ucnv_reset(converter);
- status = U_ZERO_ERROR;
- }
- }
- }
- actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
- length, &status);
+ int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length),
+ static_cast<int>(uchar_max_length), encoded.data(),
+ static_cast<int>(encoded.length()), &status);
ucnv_close(converter);
if (!U_SUCCESS(status)) {
utf16->clear(); // Make sure the output is empty on error.
@@ -241,8 +207,6 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
return true;
}
-// Convert a wstring into the specified codepage_name. If the codepage
-// isn't found, return false.
bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
OnStringConversionError::Type on_error,
@@ -272,16 +236,12 @@ bool WideToCodepage(const std::wstring& wide,
#endif // defined(WCHAR_T_IS_UTF32)
}
-// Converts a string of the given codepage into wstring.
-// If the codepage isn't found, return false.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide,
- size_t* offset_for_adjustment) {
+bool CodepageToWide(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::wstring* wide) {
#if defined(WCHAR_T_IS_UTF16)
- return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
- offset_for_adjustment);
+ return CodepageToUTF16(encoded, codepage_name, on_error, wide);
#elif defined(WCHAR_T_IS_UTF32)
wide->clear();
@@ -297,42 +257,10 @@ bool CodepageToWideAndAdjustOffset(const std::string& encoded,
size_t wchar_max_length = encoded.length() + 1;
SetUpErrorHandlerForToUChars(on_error, converter, &status);
- char* byte_buffer =
- reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
- int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
- const char* data = encoded.data();
- int length = static_cast<int>(encoded.length());
- int actual_size = 0;
- if (offset_for_adjustment) {
- if (*offset_for_adjustment >= encoded.length()) {
- *offset_for_adjustment = std::wstring::npos;
- } else if (*offset_for_adjustment != 0) {
- // Try to adjust the offset by converting the string in two pieces and
- // using the length of the first piece as the adjusted offset.
- actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
- byte_buffer, byte_buffer_length, data,
- static_cast<int>(*offset_for_adjustment), &status);
- if (U_SUCCESS(status)) {
- // Conversion succeeded, so update the offset and then fall through to
- // appending the second half of the string.
- data += *offset_for_adjustment;
- length -= *offset_for_adjustment;
- *offset_for_adjustment = actual_size / sizeof(wchar_t);
- byte_buffer += actual_size;
- byte_buffer_length -= actual_size;
- } else {
- // The offset may have been in the middle of an encoding sequence; mark
- // it as having failed to adjust and then try to convert the entire
- // string.
- *offset_for_adjustment = std::wstring::npos;
- actual_size = 0;
- ucnv_reset(converter);
- status = U_ZERO_ERROR;
- }
- }
- }
- actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
- byte_buffer, byte_buffer_length, data, length, &status);
+ int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+ reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)),
+ static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(),
+ static_cast<int>(encoded.length()), &status);
ucnv_close(converter);
if (!U_SUCCESS(status)) {
wide->clear(); // Make sure the output is empty on error.
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
index 6f2cab7..7b0c77e 100644
--- a/base/i18n/icu_string_conversions.h
+++ b/base/i18n/icu_string_conversions.h
@@ -40,17 +40,6 @@ extern const char kCodepageUTF8[];
extern const char kCodepageUTF16BE[];
extern const char kCodepageUTF16LE[];
-// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |utf16|. If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::string16::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16,
- size_t* offset_for_adjustment);
-
// Converts between UTF-16 strings and the encoding specified. If the
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
// returns false.
@@ -58,24 +47,10 @@ bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
OnStringConversionError::Type on_error,
std::string* encoded);
-inline bool CodepageToUTF16(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16) {
- return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16,
- NULL);
-}
-
-// Like CodepageToWide() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |wide|. If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::wstring::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide,
- size_t* offset_for_adjustment);
+bool CodepageToUTF16(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ string16* utf16);
// Converts between wide strings and the encoding specified. If the
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
@@ -84,13 +59,10 @@ bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
OnStringConversionError::Type on_error,
std::string* encoded);
-inline bool CodepageToWide(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide) {
- return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide,
- NULL);
-}
+bool CodepageToWide(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::wstring* wide);
} // namespace base
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc
index 0088a03..2083fa9 100644
--- a/base/i18n/icu_string_conversions_unittest.cc
+++ b/base/i18n/icu_string_conversions_unittest.cc
@@ -325,45 +325,4 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
}
}
-static const struct {
- const char* codepage_name;
- const char* encoded;
- size_t input_offset;
- size_t u16_output_offset;
- size_t wide_output_offset;
-} kAdjustOffsetCases[] = {
- {"gb2312", "", 0, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},
- {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},
- {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,
- std::wstring::npos},
- {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,
- std::wstring::npos},
- {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},
-};
-
-TEST(ICUStringConversionsTest, AdjustOffset) {
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {
- string16 utf16;
- size_t offset = kAdjustOffsetCases[i].input_offset;
- EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,
- kAdjustOffsetCases[i].codepage_name,
- OnStringConversionError::FAIL, &utf16, &offset));
- EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-
- std::wstring wide;
- offset = kAdjustOffsetCases[i].input_offset;
- CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,
- kAdjustOffsetCases[i].codepage_name,
- OnStringConversionError::FAIL, &wide, &offset);
-#if defined(WCHAR_T_IS_UTF16)
- EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-#elif defined(WCHAR_T_IS_UTF32)
- EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);
-#endif
- }
-}
-
} // namespace base