diff options
author | cevans@chromium.org <cevans@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-01 22:16:38 +0000 |
---|---|---|
committer | cevans@chromium.org <cevans@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-01 22:16:38 +0000 |
commit | d7a3e8ec24958958db28dba44542a2c126d94e88 (patch) | |
tree | 624b1ccbf82d1bd2586088d624b465c4cfa72ee8 /base | |
parent | 4838a195200c971b1c81bddf7e483f4b95b2017a (diff) | |
download | chromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.zip chromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.tar.gz chromium_src-d7a3e8ec24958958db28dba44542a2c126d94e88.tar.bz2 |
If we can't read a unicode character, write the standard "unknown" (0xFFFD) character. This will prevent security issues where the current behaviour can be used to strip characters out of a string after it has passed some validation.
BUG=30798
TEST=utf_string_conversions_unittest.cc,utf_offset_string_conversions_unittest.cc,zip_unittest.cc
Review URL: http://codereview.chromium.org/522029
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35430 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/utf_offset_string_conversions.cc | 3 | ||||
-rw-r--r-- | base/utf_offset_string_conversions_unittest.cc | 2 | ||||
-rw-r--r-- | base/utf_string_conversions.cc | 3 | ||||
-rw-r--r-- | base/utf_string_conversions_unittest.cc | 18 |
4 files changed, 12 insertions, 14 deletions
diff --git a/base/utf_offset_string_conversions.cc b/base/utf_offset_string_conversions.cc index 69b572e..4c47ef8 100644 --- a/base/utf_offset_string_conversions.cc +++ b/base/utf_offset_string_conversions.cc @@ -36,8 +36,7 @@ bool ConvertUnicode(const SRC_CHAR* src, if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { chars_written = WriteUnicodeCharacter(code_point, output); } else { - // TODO(jungshik): consider adding 'Replacement character' (U+FFFD) - // in place of an invalid codepoint. + chars_written = WriteUnicodeCharacter(0xFFFD, output); success = false; } if ((output_offset != std::wstring::npos) && diff --git a/base/utf_offset_string_conversions_unittest.cc b/base/utf_offset_string_conversions_unittest.cc index 00d87d3..4f13ab3 100644 --- a/base/utf_offset_string_conversions_unittest.cc +++ b/base/utf_offset_string_conversions_unittest.cc @@ -43,7 +43,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) { {"", 0, std::wstring::npos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, - {"\xed\xb0\x80z", 3, 0}, + {"\xed\xb0\x80z", 3, 1}, {"A\xF0\x90\x8C\x80z", 1, 1}, {"A\xF0\x90\x8C\x80z", 2, std::wstring::npos}, #if defined(WCHAR_T_IS_UTF16) diff --git a/base/utf_string_conversions.cc b/base/utf_string_conversions.cc index 7376933..d517e1b 100644 --- a/base/utf_string_conversions.cc +++ b/base/utf_string_conversions.cc @@ -32,8 +32,7 @@ bool ConvertUnicode(const SRC_CHAR* src, if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { WriteUnicodeCharacter(code_point, output); } else { - // TODO(jungshik): consider adding 'Replacement character' (U+FFFD) - // in place of an invalid codepoint. + WriteUnicodeCharacter(0xFFFD, output); success = false; } } diff --git a/base/utf_string_conversions_unittest.cc b/base/utf_string_conversions_unittest.cc index 19189971..6ba0b5b 100644 --- a/base/utf_string_conversions_unittest.cc +++ b/base/utf_string_conversions_unittest.cc @@ -94,13 +94,13 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) { // Non-character is passed through. {"\xef\xbf\xbfHello", L"\xffffHello", true}, // Truncated UTF-8 sequence. - {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, + {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, // Truncated off the end. - {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, + {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, // Non-shortest-form UTF-8. - {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, + {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. - {"\xed\xb0\x80", L"", false}, + {"\xed\xb0\x80", L"\xfffd", false}, // Non-BMP characters. The second is a non-character regarded as valid. // The result will either be in UTF-16 or UTF-32. #if defined(WCHAR_T_IS_UTF16) @@ -152,9 +152,9 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { {L"\xffffHello", "\xEF\xBF\xBFHello", true}, {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, // The first character is a truncated UTF-16 character. - {L"\xd800\x597d", "\xe5\xa5\xbd", false}, + {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, // Truncated at the end. - {L"\x597d\xd800", "\xe5\xa5\xbd", false}, + {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false}, }; for (int i = 0; i < arraysize(convert_cases); i++) { @@ -184,10 +184,10 @@ TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { {L"\xffffHello", "\xEF\xBF\xBFHello", true}, {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, // Invalid Unicode code points. - {L"\xfffffffHello", "Hello", false}, + {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, // The first character is a truncated UTF-16 character. - {L"\xd800\x597d", "\xe5\xa5\xbd", false}, - {L"\xdc01Hello", "Hello", false}, + {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, + {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |