summaryrefslogtreecommitdiffstats
path: root/base/utf_string_conversions_unittest.cc
diff options
context:
space:
mode:
authorcevans@chromium.org <cevans@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-01 22:16:38 +0000
committercevans@chromium.org <cevans@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-01 22:16:38 +0000
commitd7a3e8ec24958958db28dba44542a2c126d94e88 (patch)
tree624b1ccbf82d1bd2586088d624b465c4cfa72ee8 /base/utf_string_conversions_unittest.cc
parent4838a195200c971b1c81bddf7e483f4b95b2017a (diff)
downloadchromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.zip
chromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.tar.gz
chromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.tar.bz2
If we can't read a unicode character, write the standard "unknown" (0xFFFD) character. This will prevent security issues where the current behaviour can be used to strip characters out of a string after it has passed some validation.
BUG=30798 TEST=utf_string_conversions_unittest.cc,utf_offset_string_conversions_unittest.cc,zip_unittest.cc Review URL: http://codereview.chromium.org/522029 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35430 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/utf_string_conversions_unittest.cc')
-rw-r--r--base/utf_string_conversions_unittest.cc18
1 files changed, 9 insertions, 9 deletions
diff --git a/base/utf_string_conversions_unittest.cc b/base/utf_string_conversions_unittest.cc
index 19189971..6ba0b5b 100644
--- a/base/utf_string_conversions_unittest.cc
+++ b/base/utf_string_conversions_unittest.cc
@@ -94,13 +94,13 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
// Non-character is passed through.
{"\xef\xbf\xbfHello", L"\xffffHello", true},
// Truncated UTF-8 sequence.
- {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
+ {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
// Truncated off the end.
- {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},
+ {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false},
// Non-shortest-form UTF-8.
- {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
+ {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
// This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
- {"\xed\xb0\x80", L"", false},
+ {"\xed\xb0\x80", L"\xfffd", false},
// Non-BMP characters. The second is a non-character regarded as valid.
// The result will either be in UTF-16 or UTF-32.
#if defined(WCHAR_T_IS_UTF16)
@@ -152,9 +152,9 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
{L"\xffffHello", "\xEF\xBF\xBFHello", true},
{L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
// The first character is a truncated UTF-16 character.
- {L"\xd800\x597d", "\xe5\xa5\xbd", false},
+ {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
// Truncated at the end.
- {L"\x597d\xd800", "\xe5\xa5\xbd", false},
+ {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false},
};
for (int i = 0; i < arraysize(convert_cases); i++) {
@@ -184,10 +184,10 @@ TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {
{L"\xffffHello", "\xEF\xBF\xBFHello", true},
{L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
// Invalid Unicode code points.
- {L"\xfffffffHello", "Hello", false},
+ {L"\xfffffffHello", "\xEF\xBF\xBDHello", false},
// The first character is a truncated UTF-16 character.
- {L"\xd800\x597d", "\xe5\xa5\xbd", false},
- {L"\xdc01Hello", "Hello", false},
+ {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
+ {L"\xdc01Hello", "\xef\xbf\xbdHello", false},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {