diff options
author | erikkay@google.com <erikkay@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-20 16:43:49 +0000 |
---|---|---|
committer | erikkay@google.com <erikkay@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-20 16:43:49 +0000 |
commit | d36519b5068d92f4e71b8d6fd51eda2c42e54e38 (patch) | |
tree | 6aaf53187b1a36cc1d1f45c8f5ca994d55900650 /base | |
parent | c66cd7d3bfe3b572c7824db8c7d8d5c71cd21afd (diff) | |
download | chromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.zip chromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.tar.gz chromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.tar.bz2 |
Add JSON-specific escaping, which has different rules from JS escaping.BUG=http://crbug.com/11431TEST=base_unittests.exe --gtest_filter=StringEscapeTest.Json*
Review URL: http://codereview.chromium.org/113606
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16485 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/json_writer.cc | 21 | ||||
-rw-r--r-- | base/string_escape.cc | 67 | ||||
-rw-r--r-- | base/string_escape.h | 27 | ||||
-rw-r--r-- | base/string_escape_unittest.cc | 135 | ||||
-rw-r--r-- | base/string_util.cc | 29 | ||||
-rw-r--r-- | base/string_util.h | 28 |
6 files changed, 161 insertions, 146 deletions
diff --git a/base/json_writer.cc b/base/json_writer.cc index aa66306..a95798e 100644 --- a/base/json_writer.cc +++ b/base/json_writer.cc @@ -92,16 +92,15 @@ void JSONWriter::BuildJSONString(const Value* const node, case Value::TYPE_STRING: { + std::string value; + bool result = node->GetAsString(&value); + DCHECK(result); if (escape) { - std::wstring value; - bool result = node->GetAsString(&value); - DCHECK(result); - AppendQuotedString(value); + string_escape::JsonDoubleQuote(UTF8ToUTF16(value), + true, + json_string_); } else { - std::string value; - bool result = node->GetAsString(&value); - DCHECK(result); - string_escape::JavascriptDoubleQuote(value, true, json_string_); + string_escape::JsonDoubleQuote(value, true, json_string_); } break; } @@ -182,9 +181,9 @@ void JSONWriter::BuildJSONString(const Value* const node, } void JSONWriter::AppendQuotedString(const std::wstring& str) { - string_escape::JavascriptDoubleQuote(WideToUTF16Hack(str), - true, - json_string_); + string_escape::JsonDoubleQuote(WideToUTF16Hack(str), + true, + json_string_); } void JSONWriter::IndentLine(int depth) { diff --git a/base/string_escape.cc b/base/string_escape.cc index aafee11..0fecfa1 100644 --- a/base/string_escape.cc +++ b/base/string_escape.cc @@ -11,10 +11,13 @@ namespace string_escape { // Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, -// returns true and appends the escape sequence to |dst|. +// returns true and appends the escape sequence to |dst|. This isn't required +// by the spec, but it's more readable by humans than the \uXXXX alternatives. template<typename CHAR> -static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { +static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { // WARNING: if you add a new case here, you need to update the reader as well. + // Note: \v is in the reader, but not here since the JSON spec doesn't + // allow it. switch (c) { case '\b': dst->append("\\b"); @@ -31,9 +34,6 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { case '\t': dst->append("\\t"); break; - case '\v': - dst->append("\\v"); - break; case '\\': dst->append("\\\\"); break; @@ -46,25 +46,24 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) { return true; } -void JavascriptDoubleQuote(const string16& str, - bool put_in_quotes, - std::string* dst) { +template <class STR> +void JsonDoubleQuoteT(const STR& str, + bool put_in_quotes, + std::string* dst) { if (put_in_quotes) dst->push_back('"'); - for (string16::const_iterator it = str.begin(); it != str.end(); ++it) { - char16 c = *it; - if (!JavascriptSingleEscapeChar(c, dst)) { - if (c > 255) { - // Non-ascii values need to be unicode dst-> - // TODO(tc): Some unicode values are handled specially. See - // spidermonkey code. - StringAppendF(dst, "\\u%04X", c); - } else if (c < 32 || c > 126) { - // Spidermonkey hex escapes these values. - StringAppendF(dst, "\\x%02X", c); + for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { + typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; + if (!JsonSingleEscapeChar(c, dst)) { + if (c < 32 || c > 126) { + // Technically, we could also pass through c > 126 as UTF8, but this is + // also optional. It would also be a pain to implement here. + unsigned int as_uint = static_cast<unsigned int>(c); + StringAppendF(dst, "\\u%04X", as_uint); } else { - dst->push_back(static_cast<char>(c)); + unsigned char ascii = static_cast<unsigned char>(*it); + dst->push_back(ascii); } } } @@ -73,26 +72,16 @@ void JavascriptDoubleQuote(const string16& str, dst->push_back('"'); } -void JavascriptDoubleQuote(const std::string& str, - bool put_in_quotes, - std::string* dst) { - if (put_in_quotes) - dst->push_back('"'); - - for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { - unsigned char c = *it; - if (!JavascriptSingleEscapeChar(c, dst)) { - // Hex encode if the character is non-printable 7bit ascii - if (c < 32 || c == 127) { - StringAppendF(dst, "\\x%02X", c); - } else { - dst->push_back(static_cast<char>(c)); - } - } - } +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); +} - if (put_in_quotes) - dst->push_back('"'); +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); } } // namespace string_escape diff --git a/base/string_escape.h b/base/string_escape.h index 9c6233a..3f0bf3f 100644 --- a/base/string_escape.h +++ b/base/string_escape.h @@ -7,28 +7,25 @@ #ifndef BASE_STRING_ESCAPE_H__ #define BASE_STRING_ESCAPE_H__ +#include <string> + #include "base/string16.h" namespace string_escape { -// Escape |str| appropriately for a javascript string litereal, _appending_ the -// result to |dst|. This will create standard escape sequences (\b, \n), -// hex escape sequences (\x00), and unicode escape sequences (\uXXXX). +// Escape |str| appropriately for a JSON string litereal, _appending_ the +// result to |dst|. This will create unicode escape sequences (\uXXXX). // If |put_in_quotes| is true, the result will be surrounded in double quotes. // The outputted literal, when interpreted by the browser, should result in a // javascript string that is identical and the same length as the input |str|. -void JavascriptDoubleQuote(const string16& str, - bool put_in_quotes, - std::string* dst); - -// Similar to the wide version, but for narrow strings. It will not use -// \uXXXX unicode escape sequences. It will pass non-7bit characters directly -// into the string unencoded, allowing the browser to interpret the encoding. -// The outputted literal, when interpreted by the browser, could result in a -// javascript string of a different length than the input |str|. -void JavascriptDoubleQuote(const std::string& str, - bool put_in_quotes, - std::string* dst); +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst); + +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst); + } // namespace string_escape diff --git a/base/string_escape_unittest.cc b/base/string_escape_unittest.cc index e6a1806..d731dc1 100644 --- a/base/string_escape_unittest.cc +++ b/base/string_escape_unittest.cc @@ -6,59 +6,90 @@ #include "base/string_escape.h" #include "base/string_util.h" -TEST(StringEscapeTest, JavascriptDoubleQuote) { - static const char* kToEscape = "\b\001aZ\"\\wee"; - static const char* kEscaped = "\\b\\x01aZ\\\"\\\\wee"; - static const char* kEscapedQuoted = "\"\\b\\x01aZ\\\"\\\\wee\""; - static const wchar_t* kUToEscape = L"\b\x0001" L"a\x123fZ\"\\wee"; - static const char* kUEscaped = "\\b\\x01a\\u123FZ\\\"\\\\wee"; - static const char* kUEscapedQuoted = "\"\\b\\x01a\\u123FZ\\\"\\\\wee\""; +namespace { +const struct json_narrow_test_data { + const char* to_escape; + const char* escaped; +} json_narrow_cases[] = { + {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteNarrow) { + for (size_t i = 0; i < arraysize(json_narrow_cases); ++i) { + std::string in = json_narrow_cases[i].to_escape; + std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_narrow_cases[i].escaped), out); + } + + std::string in = json_narrow_cases[0].to_escape; + std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + string_escape::JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); + + // now try with a NULL in the string + std::string null_prepend = "test"; + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_narrow_cases[0].escaped; + out.clear(); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); +} + +namespace { + +const struct json_wide_test_data { + const wchar_t* to_escape; + const char* escaped; +} json_wide_cases[] = { + {L"b\uffb1\u00ff", "b\\uFFB1\\u00FF"}, + {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {L"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {L"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteWide) { + + for (size_t i = 0; i < arraysize(json_wide_cases); ++i) { + std::string out; + string16 in = WideToUTF16(json_wide_cases[i].to_escape); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_wide_cases[i].escaped), out); + } + + string16 in = WideToUTF16(json_wide_cases[0].to_escape); std::string out; + string_escape::JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + string_escape::JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); - // Test wide unicode escaping - out = "testy: "; - string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), false, &out); - ASSERT_EQ(std::string("testy: ") + kUEscaped, out); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), true, &out); - ASSERT_EQ(std::string("testy: ") + kUEscapedQuoted, out); - - // Test null and high bit / negative unicode values - string16 str16 = UTF8ToUTF16("TeSt"); - str16.push_back(0); - str16.push_back(0xffb1); - str16.push_back(0x00ff); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(str16, false, &out); - ASSERT_EQ("testy: TeSt\\x00\\uFFB1\\xFF", out); - - // Test escaping of 7bit ascii - out = "testy: "; - string_escape::JavascriptDoubleQuote(std::string(kToEscape), false, &out); - ASSERT_EQ(std::string("testy: ") + kEscaped, out); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(std::string(kToEscape), true, &out); - ASSERT_EQ(std::string("testy: ") + kEscapedQuoted, out); - - // Test null, non-printable, and non-7bit - std::string str("TeSt"); - str.push_back(0); - str.push_back(15); - str.push_back(127); - str.push_back(-16); - str.push_back(-128); - str.push_back('!'); - - out = "testy: "; - string_escape::JavascriptDoubleQuote(str, false, &out); - ASSERT_EQ("testy: TeSt\\x00\\x0F\\x7F\xf0\x80!", out); - - // Test escape sequences - out = "testy: "; - string_escape::JavascriptDoubleQuote("a\b\f\n\r\t\v\1\\.\"z", false, &out); - ASSERT_EQ("testy: a\\b\\f\\n\\r\\t\\v\\x01\\\\.\\\"z", out); + // now try with a NULL in the string + string16 null_prepend = WideToUTF16(L"test"); + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_wide_cases[0].escaped; + out.clear(); + string_escape::JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); } diff --git a/base/string_util.cc b/base/string_util.cc index 5923c10..c666960 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -37,35 +37,6 @@ struct EmptyStrings { const string16 s16; }; -// Hack to convert any char-like type to its unsigned counterpart. -// For example, it will convert char, signed char and unsigned char to unsigned -// char. -template<typename T> -struct ToUnsigned { - typedef T Unsigned; -}; - -template<> -struct ToUnsigned<char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<signed char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<wchar_t> { -#if defined(WCHAR_T_IS_UTF16) - typedef unsigned short Unsigned; -#elif defined(WCHAR_T_IS_UTF32) - typedef uint32 Unsigned; -#endif -}; -template<> -struct ToUnsigned<short> { - typedef unsigned short Unsigned; -}; - // Used by ReplaceStringPlaceholders to track the position in the string of // replaced parameters. struct ReplacementOffset { diff --git a/base/string_util.h b/base/string_util.h index cb71553..f40cc8d 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -595,5 +595,33 @@ bool MatchPattern(const std::string& string, const std::string& pattern); // std::numeric_limits<size_t>::max() / 2 std::string HexEncode(const void* bytes, size_t size); +// Hack to convert any char-like type to its unsigned counterpart. +// For example, it will convert char, signed char and unsigned char to unsigned +// char. +template<typename T> +struct ToUnsigned { + typedef T Unsigned; +}; + +template<> +struct ToUnsigned<char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<signed char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<wchar_t> { +#if defined(WCHAR_T_IS_UTF16) + typedef unsigned short Unsigned; +#elif defined(WCHAR_T_IS_UTF32) + typedef uint32 Unsigned; +#endif +}; +template<> +struct ToUnsigned<short> { + typedef unsigned short Unsigned; +}; #endif // BASE_STRING_UTIL_H_ |