summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-11-10 01:43:15 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-11-10 01:43:15 +0000
commit2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch)
tree960a571bb2b49c9005fd152138e4398df9072854
parent0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff)
downloadchromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2
Clean up recent string conversion function changes, part 1: Remove unnecessary code. Thanks to a change in escape.cc I can basically revert the ICU conversions back to what they used to be; I can also get rid of half the conversions immediately since they aren't used.
This does not split out the "adjust" versions of the UTF conversions into their own header/implementation; that's coming in the next patch. BUG=4010 TEST=none Review URL: http://codereview.chromium.org/380007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31533 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--base/i18n/icu_string_conversions.cc104
-rw-r--r--base/i18n/icu_string_conversions.h44
-rw-r--r--base/i18n/icu_string_conversions_unittest.cc41
-rw-r--r--base/utf_string_conversions.cc40
-rw-r--r--base/utf_string_conversions.h30
-rw-r--r--base/utf_string_conversions_unittest.cc36
-rw-r--r--net/base/escape.cc8
7 files changed, 43 insertions, 260 deletions
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc
index c93b103..252eb9c 100644
--- a/base/i18n/icu_string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -157,8 +157,6 @@ const char kCodepageUTF16LE[] = "UTF-16LE";
// Codepage <-> Wide/UTF-16 ---------------------------------------------------
-// Convert a UTF-16 string into the specified codepage_name. If the codepage
-// isn't found, return false.
bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
OnStringConversionError::Type on_error,
@@ -174,11 +172,10 @@ bool UTF16ToCodepage(const string16& utf16,
static_cast<int>(utf16.length()), on_error, encoded);
}
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16,
- size_t* offset_for_adjustment) {
+bool CodepageToUTF16(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ string16* utf16) {
utf16->clear();
UErrorCode status = U_ZERO_ERROR;
@@ -197,40 +194,9 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
size_t uchar_max_length = encoded.length() + 1;
SetUpErrorHandlerForToUChars(on_error, converter, &status);
- char16* byte_buffer = WriteInto(utf16, uchar_max_length);
- int byte_buffer_length = static_cast<int>(uchar_max_length);
- const char* data = encoded.data();
- int length = static_cast<int>(encoded.length());
- int actual_size = 0;
- if (offset_for_adjustment) {
- if (*offset_for_adjustment >= encoded.length()) {
- *offset_for_adjustment = string16::npos;
- } else if (*offset_for_adjustment != 0) {
- // Try to adjust the offset by converting the string in two pieces and
- // using the length of the first piece as the adjusted offset.
- actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
- data, static_cast<int>(*offset_for_adjustment), &status);
- if (U_SUCCESS(status)) {
- // Conversion succeeded, so update the offset and then fall through to
- // appending the second half of the string.
- data += *offset_for_adjustment;
- length -= *offset_for_adjustment;
- *offset_for_adjustment = actual_size;
- byte_buffer += actual_size;
- byte_buffer_length -= actual_size;
- } else {
- // The offset may have been in the middle of an encoding sequence; mark
- // it as having failed to adjust and then try to convert the entire
- // string.
- *offset_for_adjustment = string16::npos;
- actual_size = 0;
- ucnv_reset(converter);
- status = U_ZERO_ERROR;
- }
- }
- }
- actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
- length, &status);
+ int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length),
+ static_cast<int>(uchar_max_length), encoded.data(),
+ static_cast<int>(encoded.length()), &status);
ucnv_close(converter);
if (!U_SUCCESS(status)) {
utf16->clear(); // Make sure the output is empty on error.
@@ -241,8 +207,6 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
return true;
}
-// Convert a wstring into the specified codepage_name. If the codepage
-// isn't found, return false.
bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
OnStringConversionError::Type on_error,
@@ -272,16 +236,12 @@ bool WideToCodepage(const std::wstring& wide,
#endif // defined(WCHAR_T_IS_UTF32)
}
-// Converts a string of the given codepage into wstring.
-// If the codepage isn't found, return false.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide,
- size_t* offset_for_adjustment) {
+bool CodepageToWide(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::wstring* wide) {
#if defined(WCHAR_T_IS_UTF16)
- return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
- offset_for_adjustment);
+ return CodepageToUTF16(encoded, codepage_name, on_error, wide);
#elif defined(WCHAR_T_IS_UTF32)
wide->clear();
@@ -297,42 +257,10 @@ bool CodepageToWideAndAdjustOffset(const std::string& encoded,
size_t wchar_max_length = encoded.length() + 1;
SetUpErrorHandlerForToUChars(on_error, converter, &status);
- char* byte_buffer =
- reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
- int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
- const char* data = encoded.data();
- int length = static_cast<int>(encoded.length());
- int actual_size = 0;
- if (offset_for_adjustment) {
- if (*offset_for_adjustment >= encoded.length()) {
- *offset_for_adjustment = std::wstring::npos;
- } else if (*offset_for_adjustment != 0) {
- // Try to adjust the offset by converting the string in two pieces and
- // using the length of the first piece as the adjusted offset.
- actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
- byte_buffer, byte_buffer_length, data,
- static_cast<int>(*offset_for_adjustment), &status);
- if (U_SUCCESS(status)) {
- // Conversion succeeded, so update the offset and then fall through to
- // appending the second half of the string.
- data += *offset_for_adjustment;
- length -= *offset_for_adjustment;
- *offset_for_adjustment = actual_size / sizeof(wchar_t);
- byte_buffer += actual_size;
- byte_buffer_length -= actual_size;
- } else {
- // The offset may have been in the middle of an encoding sequence; mark
- // it as having failed to adjust and then try to convert the entire
- // string.
- *offset_for_adjustment = std::wstring::npos;
- actual_size = 0;
- ucnv_reset(converter);
- status = U_ZERO_ERROR;
- }
- }
- }
- actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
- byte_buffer, byte_buffer_length, data, length, &status);
+ int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+ reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)),
+ static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(),
+ static_cast<int>(encoded.length()), &status);
ucnv_close(converter);
if (!U_SUCCESS(status)) {
wide->clear(); // Make sure the output is empty on error.
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
index 6f2cab7..7b0c77e 100644
--- a/base/i18n/icu_string_conversions.h
+++ b/base/i18n/icu_string_conversions.h
@@ -40,17 +40,6 @@ extern const char kCodepageUTF8[];
extern const char kCodepageUTF16BE[];
extern const char kCodepageUTF16LE[];
-// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |utf16|. If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::string16::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16,
- size_t* offset_for_adjustment);
-
// Converts between UTF-16 strings and the encoding specified. If the
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
// returns false.
@@ -58,24 +47,10 @@ bool UTF16ToCodepage(const string16& utf16,
const char* codepage_name,
OnStringConversionError::Type on_error,
std::string* encoded);
-inline bool CodepageToUTF16(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16) {
- return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16,
- NULL);
-}
-
-// Like CodepageToWide() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |wide|. If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::wstring::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide,
- size_t* offset_for_adjustment);
+bool CodepageToUTF16(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ string16* utf16);
// Converts between wide strings and the encoding specified. If the
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
@@ -84,13 +59,10 @@ bool WideToCodepage(const std::wstring& wide,
const char* codepage_name,
OnStringConversionError::Type on_error,
std::string* encoded);
-inline bool CodepageToWide(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide) {
- return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide,
- NULL);
-}
+bool CodepageToWide(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::wstring* wide);
} // namespace base
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc
index 0088a03..2083fa9 100644
--- a/base/i18n/icu_string_conversions_unittest.cc
+++ b/base/i18n/icu_string_conversions_unittest.cc
@@ -325,45 +325,4 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
}
}
-static const struct {
- const char* codepage_name;
- const char* encoded;
- size_t input_offset;
- size_t u16_output_offset;
- size_t wide_output_offset;
-} kAdjustOffsetCases[] = {
- {"gb2312", "", 0, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},
- {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},
- {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},
- {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,
- std::wstring::npos},
- {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,
- std::wstring::npos},
- {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},
-};
-
-TEST(ICUStringConversionsTest, AdjustOffset) {
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {
- string16 utf16;
- size_t offset = kAdjustOffsetCases[i].input_offset;
- EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,
- kAdjustOffsetCases[i].codepage_name,
- OnStringConversionError::FAIL, &utf16, &offset));
- EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-
- std::wstring wide;
- offset = kAdjustOffsetCases[i].input_offset;
- CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,
- kAdjustOffsetCases[i].codepage_name,
- OnStringConversionError::FAIL, &wide, &offset);
-#if defined(WCHAR_T_IS_UTF16)
- EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-#elif defined(WCHAR_T_IS_UTF32)
- EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);
-#endif
- }
-}
-
} // namespace base
diff --git a/base/utf_string_conversions.cc b/base/utf_string_conversions.cc
index ffff50a..ee52f47 100644
--- a/base/utf_string_conversions.cc
+++ b/base/utf_string_conversions.cc
@@ -221,22 +221,16 @@ void PrepareForUTF16Or32Output(const char* src,
// UTF-8 <-> Wide --------------------------------------------------------------
-bool WideToUTF8AndAdjustOffset(const wchar_t* src,
- size_t src_len,
- std::string* output,
- size_t* offset_for_adjustment) {
+bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
PrepareForUTF8Output(src, src_len, output);
- return ConvertUnicode<wchar_t, std::string>(src, src_len, output,
- offset_for_adjustment);
+ return ConvertUnicode<wchar_t, std::string>(src, src_len, output, NULL);
}
-std::string WideToUTF8AndAdjustOffset(const std::wstring& wide,
- size_t* offset_for_adjustment) {
+std::string WideToUTF8(const std::wstring& wide) {
std::string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
- WideToUTF8AndAdjustOffset(wide.data(), wide.length(), &ret,
- offset_for_adjustment);
+ WideToUTF8(wide.data(), wide.length(), &ret);
return ret;
}
@@ -262,20 +256,12 @@ std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
#if defined(WCHAR_T_IS_UTF16)
// When wide == UTF-16, then conversions are a NOP.
-bool WideToUTF16AndAdjustOffset(const wchar_t* src,
- size_t src_len,
- string16* output,
- size_t* offset_for_adjustment) {
+bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
output->assign(src, src_len);
- if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
- *offset_for_adjustment = string16::npos;
return true;
}
-string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
- size_t* offset_for_adjustment) {
- if (offset_for_adjustment && (*offset_for_adjustment >= wide.length()))
- *offset_for_adjustment = string16::npos;
+string16 WideToUTF16(const std::wstring& wide) {
return wide;
}
@@ -298,23 +284,17 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
#elif defined(WCHAR_T_IS_UTF32)
-bool WideToUTF16AndAdjustOffset(const wchar_t* src,
- size_t src_len,
- string16* output,
- size_t* offset_for_adjustment) {
+bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
output->clear();
// Assume that normally we won't have any non-BMP characters so the counts
// will be the same.
output->reserve(src_len);
- return ConvertUnicode<wchar_t, string16>(src, src_len, output,
- offset_for_adjustment);
+ return ConvertUnicode<wchar_t, string16>(src, src_len, output, NULL);
}
-string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
- size_t* offset_for_adjustment) {
+string16 WideToUTF16(const std::wstring& wide) {
string16 ret;
- WideToUTF16AndAdjustOffset(wide.data(), wide.length(), &ret,
- offset_for_adjustment);
+ WideToUTF16(wide.data(), wide.length(), &ret);
return ret;
}
diff --git a/base/utf_string_conversions.h b/base/utf_string_conversions.h
index 323233b..7069f83 100644
--- a/base/utf_string_conversions.h
+++ b/base/utf_string_conversions.h
@@ -15,12 +15,6 @@
// string. If this isn't possible because it points past the end of the source
// string or into the middle of a multibyte sequence, it will be set to
// std::wstring::npos. |offset_for_adjustment| may be NULL.
-bool WideToUTF8AndAdjustOffset(const wchar_t* src,
- size_t src_len,
- std::string* output,
- size_t* offset_for_adjustment);
-std::string WideToUTF8AndAdjustOffset(const std::wstring& wide,
- size_t* offset_for_adjustment);
bool UTF8ToWideAndAdjustOffset(const char* src,
size_t src_len,
std::wstring* output,
@@ -28,12 +22,6 @@ bool UTF8ToWideAndAdjustOffset(const char* src,
std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
size_t* offset_for_adjustment);
-bool WideToUTF16AndAdjustOffset(const wchar_t* src,
- size_t src_len,
- string16* output,
- size_t* offset_for_adjustment);
-string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
- size_t* offset_for_adjustment);
bool UTF16ToWideAndAdjustOffset(const char16* src,
size_t src_len,
std::wstring* output,
@@ -54,14 +42,8 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
// the Unicode replacement character or adding |replacement_char| parameter.
// Currently, it's skipped in the ouput, which could be problematic in
// some situations.
-inline bool WideToUTF8(const wchar_t* src,
- size_t src_len,
- std::string* output) {
- return WideToUTF8AndAdjustOffset(src, src_len, output, NULL);
-}
-inline std::string WideToUTF8(const std::wstring& wide) {
- return WideToUTF8AndAdjustOffset(wide, NULL);
-}
+bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);
+std::string WideToUTF8(const std::wstring& wide);
inline bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
return UTF8ToWideAndAdjustOffset(src, src_len, output, NULL);
}
@@ -69,12 +51,8 @@ inline std::wstring UTF8ToWide(const base::StringPiece& utf8) {
return UTF8ToWideAndAdjustOffset(utf8, NULL);
}
-inline bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
- return WideToUTF16AndAdjustOffset(src, src_len, output, NULL);
-}
-inline string16 WideToUTF16(const std::wstring& wide) {
- return WideToUTF16AndAdjustOffset(wide, NULL);
-}
+bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output);
+string16 WideToUTF16(const std::wstring& wide);
inline bool UTF16ToWide(const char16* src, size_t src_len,
std::wstring* output) {
return UTF16ToWideAndAdjustOffset(src, src_len, output, NULL);
diff --git a/base/utf_string_conversions_unittest.cc b/base/utf_string_conversions_unittest.cc
index 67af7c3..ca79ec5 100644
--- a/base/utf_string_conversions_unittest.cc
+++ b/base/utf_string_conversions_unittest.cc
@@ -227,32 +227,12 @@ TEST(UTFStringConversionsTest, ConvertMultiString) {
}
TEST(UTFStringConversionsTest, AdjustOffset) {
- // Under the hood, all the functions call the same converter function, so we
- // don't need to exhaustively check every case.
- struct WideToUTF8Case {
- const wchar_t* wide;
- size_t input_offset;
- size_t output_offset;
- } wide_to_utf8_cases[] = {
- {L"", 0, std::string::npos},
- {L"\x4f60\x597d", 0, 0},
- {L"\x4f60\x597d", 1, 3},
- {L"\x4f60\x597d", 2, std::string::npos},
- {L"\x4f60\x597d", std::wstring::npos, std::string::npos},
- {L"\xd800\x597dz", 1, 0},
- {L"\xd800\x597dz", 2, 3},
- };
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf8_cases); ++i) {
- size_t offset = wide_to_utf8_cases[i].input_offset;
- WideToUTF8AndAdjustOffset(wide_to_utf8_cases[i].wide, &offset);
- EXPECT_EQ(wide_to_utf8_cases[i].output_offset, offset);
- }
-
struct UTF8ToWideCase {
const char* utf8;
size_t input_offset;
size_t output_offset;
} utf8_to_wide_cases[] = {
+ {"", 0, std::wstring::npos},
{"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos},
{"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
{"\xed\xb0\x80z", 3, 0},
@@ -271,20 +251,6 @@ TEST(UTFStringConversionsTest, AdjustOffset) {
}
#if defined(WCHAR_T_IS_UTF32)
- struct WideToUTF16Case {
- const wchar_t* wide;
- size_t input_offset;
- size_t output_offset;
- } wide_to_utf16_cases[] = {
- {L"\x4F60\x597D", 1, 1},
- {L"\x20000\x4E00", 1, 2},
- };
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf16_cases); ++i) {
- size_t offset = wide_to_utf16_cases[i].input_offset;
- WideToUTF16AndAdjustOffset(wide_to_utf16_cases[i].wide, &offset);
- EXPECT_EQ(wide_to_utf16_cases[i].output_offset, offset);
- }
-
struct UTF16ToWideCase {
const wchar_t* wide;
size_t input_offset;
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 5196eb6..0327fc2 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -252,10 +252,10 @@ std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
size_t* offset_for_adjustment) {
std::wstring result;
size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
- if (base::CodepageToWideAndAdjustOffset(
- UnescapeURLImpl(text, rules, offset_for_adjustment),
- "UTF-8", base::OnStringConversionError::FAIL, &result,
- offset_for_adjustment))
+ std::string unescaped_url(
+ UnescapeURLImpl(text, rules, offset_for_adjustment));
+ if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(),
+ &result, offset_for_adjustment))
return result; // Character set looks like it's valid.
// Not valid. Return the escaped version. Undo our changes to