diff options
-rw-r--r-- | base/json_writer.cc | 21 | ||||
-rw-r--r-- | base/string_escape.cc | 67 | ||||
-rw-r--r-- | base/string_escape.h | 27 | ||||
-rw-r--r-- | base/string_escape_unittest.cc | 135 | ||||
-rw-r--r-- | base/string_util.cc | 29 | ||||
-rw-r--r-- | base/string_util.h | 28 | ||||
-rw-r--r-- | chrome/common/json_value_serializer_unittest.cc | 35 | ||||
-rw-r--r-- | net/base/net_util.cc | 11 |
8 files changed, 186 insertions, 167 deletions
diff --git a/base/json_writer.cc b/base/json_writer.cc index aa66306..a95798e 100644 --- a/base/json_writer.cc +++ b/base/json_writer.cc @@ -92,16 +92,15 @@ void JSONWriter::BuildJSONString(const Value* const node, case Value::TYPE_STRING: { + std::string value; + bool result = node->GetAsString(&value); + DCHECK(result); if (escape) { - std::wstring value; - bool result = node->GetAsString(&value); - DCHECK(result); - AppendQuotedString(value); + string_escape::JsonDoubleQuote(UTF8ToUTF16(value), + true, + json_string_); } else { - std::string value; - bool result = node->GetAsString(&value); - DCHECK(result); - string_escape::JavascriptDoubleQuote(value, true, json_string_); + string_escape::JsonDoubleQuote(value, true, json_string_); } break; } @@ -182,9 +181,9 @@ void JSONWriter::BuildJSONString(const Value* const node, } void JSONWriter::AppendQuotedString(const std::wstring& str) { - string_escape::JavascriptDoubleQuote(WideToUTF16Hack(str), - true, - json_string_); + string_escape::JsonDoubleQuote(WideToUTF16Hack(str), + true, + json_string_); } void JSONWriter::IndentLine(int depth) { diff --git a/base/string_escape.cc b/base/string_escape.cc index aafee11..0fecfa1 100644 --- a/base/string_escape.cc +++ b/base/string_escape.cc @@ -11,10 +11,13 @@ namespace string_escape { // Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, -// returns true and appends the escape sequence to |dst|. +// returns true and appends the escape sequence to |dst|. This isn't required +// by the spec, but it's more readable by humans than the \uXXXX alternatives. template<typename CHAR> -static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { +static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { // WARNING: if you add a new case here, you need to update the reader as well. + // Note: \v is in the reader, but not here since the JSON spec doesn't + // allow it. switch (c) { case '\b': dst->append("\\b"); @@ -31,9 +34,6 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { case '\t': dst->append("\\t"); break; - case '\v': - dst->append("\\v"); - break; case '\\': dst->append("\\\\"); break; @@ -46,25 +46,24 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { return true; } -void JavascriptDoubleQuote(const string16& str, - bool put_in_quotes, - std::string* dst) { +template <class STR> +void JsonDoubleQuoteT(const STR& str, + bool put_in_quotes, + std::string* dst) { if (put_in_quotes) dst->push_back('"'); - for (string16::const_iterator it = str.begin(); it != str.end(); ++it) { - char16 c = *it; - if (!JavascriptSingleEscapeChar(c, dst)) { - if (c > 255) { - // Non-ascii values need to be unicode dst-> - // TODO(tc): Some unicode values are handled specially. See - // spidermonkey code. - StringAppendF(dst, "\\u%04X", c); - } else if (c < 32 || c > 126) { - // Spidermonkey hex escapes these values. - StringAppendF(dst, "\\x%02X", c); + for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { + typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; + if (!JsonSingleEscapeChar(c, dst)) { + if (c < 32 || c > 126) { + // Technically, we could also pass through c > 126 as UTF8, but this is + // also optional. It would also be a pain to implement here. + unsigned int as_uint = static_cast<unsigned int>(c); + StringAppendF(dst, "\\u%04X", as_uint); } else { - dst->push_back(static_cast<char>(c)); + unsigned char ascii = static_cast<unsigned char>(*it); + dst->push_back(ascii); } } } @@ -73,26 +72,16 @@ void JavascriptDoubleQuote(const string16& str, dst->push_back('"'); } -void JavascriptDoubleQuote(const std::string& str, - bool put_in_quotes, - std::string* dst) { - if (put_in_quotes) - dst->push_back('"'); - - for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { - unsigned char c = *it; - if (!JavascriptSingleEscapeChar(c, dst)) { - // Hex encode if the character is non-printable 7bit ascii - if (c < 32 || c == 127) { - StringAppendF(dst, "\\x%02X", c); - } else { - dst->push_back(static_cast<char>(c)); - } - } - } +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); +} - if (put_in_quotes) - dst->push_back('"'); +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); } } // namespace string_escape diff --git a/base/string_escape.h b/base/string_escape.h index 9c6233a..3f0bf3f 100644 --- a/base/string_escape.h +++ b/base/string_escape.h @@ -7,28 +7,25 @@ #ifndef BASE_STRING_ESCAPE_H__ #define BASE_STRING_ESCAPE_H__ +#include <string> + #include "base/string16.h" namespace string_escape { -// Escape |str| appropriately for a javascript string litereal, _appending_ the -// result to |dst|. This will create standard escape sequences (\b, \n), -// hex escape sequences (\x00), and unicode escape sequences (\uXXXX). +// Escape |str| appropriately for a JSON string litereal, _appending_ the +// result to |dst|. This will create unicode escape sequences (\uXXXX). // If |put_in_quotes| is true, the result will be surrounded in double quotes. // The outputted literal, when interpreted by the browser, should result in a // javascript string that is identical and the same length as the input |str|. -void JavascriptDoubleQuote(const string16& str, - bool put_in_quotes, - std::string* dst); - -// Similar to the wide version, but for narrow strings. It will not use -// \uXXXX unicode escape sequences. It will pass non-7bit characters directly -// into the string unencoded, allowing the browser to interpret the encoding. -// The outputted literal, when interpreted by the browser, could result in a -// javascript string of a different length than the input |str|. -void JavascriptDoubleQuote(const std::string& str, - bool put_in_quotes, - std::string* dst); +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst); + +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst); + } // namespace string_escape diff --git a/base/string_escape_unittest.cc b/base/string_escape_unittest.cc index e6a1806..d731dc1 100644 --- a/base/string_escape_unittest.cc +++ b/base/string_escape_unittest.cc @@ -6,59 +6,90 @@ #include "base/string_escape.h" #include "base/string_util.h" -TEST(StringEscapeTest, JavascriptDoubleQuote) { - static const char* kToEscape = "\b\001aZ\"\\wee"; - static const char* kEscaped = "\\b\\x01aZ\\\"\\\\wee"; - static const char* kEscapedQuoted = "\"\\b\\x01aZ\\\"\\\\wee\""; - static const wchar_t* kUToEscape = L"\b\x0001" L"a\x123fZ\"\\wee"; - static const char* kUEscaped = "\\b\\x01a\\u123FZ\\\"\\\\wee"; - static const char* kUEscapedQuoted = "\"\\b\\x01a\\u123FZ\\\"\\\\wee\""; +namespace { +const struct json_narrow_test_data { + const char* to_escape; + const char* escaped; +} json_narrow_cases[] = { + {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteNarrow) { + for (size_t i = 0; i < arraysize(json_narrow_cases); ++i) { + std::string in = json_narrow_cases[i].to_escape; + std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_narrow_cases[i].escaped), out); + } + + std::string in = json_narrow_cases[0].to_escape; + std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + string_escape::JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); + + // now try with a NULL in the string + std::string null_prepend = "test"; + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_narrow_cases[0].escaped; + out.clear(); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); +} + +namespace { + +const struct json_wide_test_data { + const wchar_t* to_escape; + const char* escaped; +} json_wide_cases[] = { + {L"b\uffb1\u00ff", "b\\uFFB1\\u00FF"}, + {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {L"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {L"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteWide) { + + for (size_t i = 0; i < arraysize(json_wide_cases); ++i) { + std::string out; + string16 in = WideToUTF16(json_wide_cases[i].to_escape); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_wide_cases[i].escaped), out); + } + + string16 in = WideToUTF16(json_wide_cases[0].to_escape); std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + string_escape::JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); - // Test wide unicode escaping - out = "testy: "; - string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), false, &out); - ASSERT_EQ(std::string("testy: ") + kUEscaped, out); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), true, &out); - ASSERT_EQ(std::string("testy: ") + kUEscapedQuoted, out); - - // Test null and high bit / negative unicode values - string16 str16 = UTF8ToUTF16("TeSt"); - str16.push_back(0); - str16.push_back(0xffb1); - str16.push_back(0x00ff); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(str16, false, &out); - ASSERT_EQ("testy: TeSt\\x00\\uFFB1\\xFF", out); - - // Test escaping of 7bit ascii - out = "testy: "; - string_escape::JavascriptDoubleQuote(std::string(kToEscape), false, &out); - ASSERT_EQ(std::string("testy: ") + kEscaped, out); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(std::string(kToEscape), true, &out); - ASSERT_EQ(std::string("testy: ") + kEscapedQuoted, out); - - // Test null, non-printable, and non-7bit - std::string str("TeSt"); - str.push_back(0); - str.push_back(15); - str.push_back(127); - str.push_back(-16); - str.push_back(-128); - str.push_back('!'); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(str, false, &out); - ASSERT_EQ("testy: TeSt\\x00\\x0F\\x7F\xf0\x80!", out); - - // Test escape sequences - out = "testy: "; - string_escape::JavascriptDoubleQuote("a\b\f\n\r\t\v\1\\.\"z", false, &out); - ASSERT_EQ("testy: a\\b\\f\\n\\r\\t\\v\\x01\\\\.\\\"z", out); + // now try with a NULL in the string + string16 null_prepend = WideToUTF16(L"test"); + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_wide_cases[0].escaped; + out.clear(); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); } diff --git a/base/string_util.cc b/base/string_util.cc index 5923c10..c666960 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -37,35 +37,6 @@ struct EmptyStrings { const string16 s16; }; -// Hack to convert any char-like type to its unsigned counterpart. -// For example, it will convert char, signed char and unsigned char to unsigned -// char. -template<typename T> -struct ToUnsigned { - typedef T Unsigned; -}; - -template<> -struct ToUnsigned<char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<signed char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<wchar_t> { -#if defined(WCHAR_T_IS_UTF16) - typedef unsigned short Unsigned; -#elif defined(WCHAR_T_IS_UTF32) - typedef uint32 Unsigned; -#endif -}; -template<> -struct ToUnsigned<short> { - typedef unsigned short Unsigned; -}; - // Used by ReplaceStringPlaceholders to track the position in the string of // replaced parameters. struct ReplacementOffset { diff --git a/base/string_util.h b/base/string_util.h index cb71553..f40cc8d 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -595,5 +595,33 @@ bool MatchPattern(const std::string& string, const std::string& pattern); // std::numeric_limits<size_t>::max() / 2 std::string HexEncode(const void* bytes, size_t size); +// Hack to convert any char-like type to its unsigned counterpart. +// For example, it will convert char, signed char and unsigned char to unsigned +// char. +template<typename T> +struct ToUnsigned { + typedef T Unsigned; +}; + +template<> +struct ToUnsigned<char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<signed char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<wchar_t> { +#if defined(WCHAR_T_IS_UTF16) + typedef unsigned short Unsigned; +#elif defined(WCHAR_T_IS_UTF32) + typedef uint32 Unsigned; +#endif +}; +template<> +struct ToUnsigned<short> { + typedef unsigned short Unsigned; +}; #endif // BASE_STRING_UTIL_H_ diff --git a/chrome/common/json_value_serializer_unittest.cc b/chrome/common/json_value_serializer_unittest.cc index 41b1b7c..5c50596 100644 --- a/chrome/common/json_value_serializer_unittest.cc +++ b/chrome/common/json_value_serializer_unittest.cc @@ -73,19 +73,24 @@ TEST(JSONValueSerializerTest, StringEscape) { // for (var i = 1; i < 256; ++i) { s += String.fromCharCode(i); } // uneval(s).replace(/\\/g, "\\\\"); std::string all_chars_expected = - "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0E\\x0F\\x10" - "\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1A\\x1B\\x1C\\x1D\\x1E" - "\\x1F !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\" - "\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7F\\x80\\x81\\x82\\x83\\x84\\x85" - "\\x86\\x87\\x88\\x89\\x8A\\x8B\\x8C\\x8D\\x8E\\x8F\\x90\\x91\\x92\\x93" - "\\x94\\x95\\x96\\x97\\x98\\x99\\x9A\\x9B\\x9C\\x9D\\x9E\\x9F\\xA0\\xA1" - "\\xA2\\xA3\\xA4\\xA5\\xA6\\xA7\\xA8\\xA9\\xAA\\xAB\\xAC\\xAD\\xAE\\xAF" - "\\xB0\\xB1\\xB2\\xB3\\xB4\\xB5\\xB6\\xB7\\xB8\\xB9\\xBA\\xBB\\xBC\\xBD" - "\\xBE\\xBF\\xC0\\xC1\\xC2\\xC3\\xC4\\xC5\\xC6\\xC7\\xC8\\xC9\\xCA\\xCB" - "\\xCC\\xCD\\xCE\\xCF\\xD0\\xD1\\xD2\\xD3\\xD4\\xD5\\xD6\\xD7\\xD8\\xD9" - "\\xDA\\xDB\\xDC\\xDD\\xDE\\xDF\\xE0\\xE1\\xE2\\xE3\\xE4\\xE5\\xE6\\xE7" - "\\xE8\\xE9\\xEA\\xEB\\xEC\\xED\\xEE\\xEF\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5" - "\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF"; + "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000B\\f\\r" + "\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017" + "\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E" + "\\u001F !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\" + "\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\u007F\\u0080\\u0081\\u0082\\u0083" + "\\u0084\\u0085\\u0086\\u0087\\u0088\\u0089\\u008A\\u008B\\u008C\\u008D" + "\\u008E\\u008F\\u0090\\u0091\\u0092\\u0093\\u0094\\u0095\\u0096\\u0097" + "\\u0098\\u0099\\u009A\\u009B\\u009C\\u009D\\u009E\\u009F\\u00A0\\u00A1" + "\\u00A2\\u00A3\\u00A4\\u00A5\\u00A6\\u00A7\\u00A8\\u00A9\\u00AA\\u00AB" + "\\u00AC\\u00AD\\u00AE\\u00AF\\u00B0\\u00B1\\u00B2\\u00B3\\u00B4\\u00B5" + "\\u00B6\\u00B7\\u00B8\\u00B9\\u00BA\\u00BB\\u00BC\\u00BD\\u00BE\\u00BF" + "\\u00C0\\u00C1\\u00C2\\u00C3\\u00C4\\u00C5\\u00C6\\u00C7\\u00C8\\u00C9" + "\\u00CA\\u00CB\\u00CC\\u00CD\\u00CE\\u00CF\\u00D0\\u00D1\\u00D2\\u00D3" + "\\u00D4\\u00D5\\u00D6\\u00D7\\u00D8\\u00D9\\u00DA\\u00DB\\u00DC\\u00DD" + "\\u00DE\\u00DF\\u00E0\\u00E1\\u00E2\\u00E3\\u00E4\\u00E5\\u00E6\\u00E7" + "\\u00E8\\u00E9\\u00EA\\u00EB\\u00EC\\u00ED\\u00EE\\u00EF\\u00F0\\u00F1" + "\\u00F2\\u00F3\\u00F4\\u00F5\\u00F6\\u00F7\\u00F8\\u00F9\\u00FA\\u00FB" + "\\u00FC\\u00FD\\u00FE\\u00FF"; std::string expected_output = "{\"all_chars\":\"" + all_chars_expected + "\"}"; @@ -132,7 +137,7 @@ TEST(JSONValueSerializerTest, HexStrings) { std::wstring test(L"\x01\x02"); root.SetString(L"test", test); - std::string expected = "{\"test\":\"\\x01\\x02\"}"; + std::string expected = "{\"test\":\"\\u0001\\u0002\"}"; std::string actual; JSONStringValueSerializer serializer(&actual); @@ -150,7 +155,7 @@ TEST(JSONValueSerializerTest, HexStrings) { ASSERT_EQ(test, test_value); // Test converting escaped regular chars - std::string escaped_chars = "{\"test\":\"\\x67\\x6f\"}"; + std::string escaped_chars = "{\"test\":\"\\u0067\\u006f\"}"; JSONStringValueSerializer deserializer2(escaped_chars); deserial_root.reset(deserializer2.Deserialize(NULL)); ASSERT_TRUE(deserial_root.get()); diff --git a/net/base/net_util.cc b/net/base/net_util.cc index e5c53eb..c309653 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -824,7 +824,7 @@ std::string GetDirectoryListingHeader(const std::string& title) { std::string result(header.data(), header.size()); result.append("<script>start("); - string_escape::JavascriptDoubleQuote(title, true, &result); + string_escape::JsonDoubleQuote(title, true, &result); result.append(");</script>\n"); return result; @@ -836,17 +836,16 @@ std::string GetDirectoryListingEntry(const std::string& name, const Time& modified) { std::string result; result.append("<script>addRow("); - string_escape::JavascriptDoubleQuote(name, true, &result); + string_escape::JsonDoubleQuote(name, true, &result); result.append(","); - string_escape::JavascriptDoubleQuote( - EscapePath(name), true, &result); + string_escape::JsonDoubleQuote(EscapePath(name), true, &result); if (is_dir) { result.append(",1,"); } else { result.append(",0,"); } - string_escape::JavascriptDoubleQuote( + string_escape::JsonDoubleQuote( WideToUTF16Hack(FormatBytes(size, GetByteDisplayUnits(size), true)), true, &result); @@ -857,7 +856,7 @@ std::string GetDirectoryListingEntry(const std::string& name, if (!modified.is_null()) { modified_str = WideToUTF16Hack(base::TimeFormatShortDateAndTime(modified)); } - string_escape::JavascriptDoubleQuote(modified_str, true, &result); + string_escape::JsonDoubleQuote(modified_str, true, &result); result.append(");</script>\n"); |