summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorerikkay@google.com <erikkay@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-20 16:43:49 +0000
committererikkay@google.com <erikkay@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-20 16:43:49 +0000
commitd36519b5068d92f4e71b8d6fd51eda2c42e54e38 (patch)
tree6aaf53187b1a36cc1d1f45c8f5ca994d55900650 /base
parentc66cd7d3bfe3b572c7824db8c7d8d5c71cd21afd (diff)
downloadchromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.zip
chromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.tar.gz
chromium_src-d36519b5068d92f4e71b8d6fd51eda2c42e54e38.tar.bz2
Add JSON-specific escaping, which has different rules from JS escaping.BUG=http://crbug.com/11431TEST=base_unittests.exe --gtest_filter=StringEscapeTest.Json*
Review URL: http://codereview.chromium.org/113606 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16485 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/json_writer.cc21
-rw-r--r--base/string_escape.cc67
-rw-r--r--base/string_escape.h27
-rw-r--r--base/string_escape_unittest.cc135
-rw-r--r--base/string_util.cc29
-rw-r--r--base/string_util.h28
6 files changed, 161 insertions, 146 deletions
diff --git a/base/json_writer.cc b/base/json_writer.cc
index aa66306..a95798e 100644
--- a/base/json_writer.cc
+++ b/base/json_writer.cc
@@ -92,16 +92,15 @@ void JSONWriter::BuildJSONString(const Value* const node,
case Value::TYPE_STRING:
{
+ std::string value;
+ bool result = node->GetAsString(&value);
+ DCHECK(result);
if (escape) {
- std::wstring value;
- bool result = node->GetAsString(&value);
- DCHECK(result);
- AppendQuotedString(value);
+ string_escape::JsonDoubleQuote(UTF8ToUTF16(value),
+ true,
+ json_string_);
} else {
- std::string value;
- bool result = node->GetAsString(&value);
- DCHECK(result);
- string_escape::JavascriptDoubleQuote(value, true, json_string_);
+ string_escape::JsonDoubleQuote(value, true, json_string_);
}
break;
}
@@ -182,9 +181,9 @@ void JSONWriter::BuildJSONString(const Value* const node,
}
void JSONWriter::AppendQuotedString(const std::wstring& str) {
- string_escape::JavascriptDoubleQuote(WideToUTF16Hack(str),
- true,
- json_string_);
+ string_escape::JsonDoubleQuote(WideToUTF16Hack(str),
+ true,
+ json_string_);
}
void JSONWriter::IndentLine(int depth) {
diff --git a/base/string_escape.cc b/base/string_escape.cc
index aafee11..0fecfa1 100644
--- a/base/string_escape.cc
+++ b/base/string_escape.cc
@@ -11,10 +11,13 @@
namespace string_escape {
// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful,
-// returns true and appends the escape sequence to |dst|.
+// returns true and appends the escape sequence to |dst|. This isn't required
+// by the spec, but it's more readable by humans than the \uXXXX alternatives.
template<typename CHAR>
-static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
+static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
// WARNING: if you add a new case here, you need to update the reader as well.
+ // Note: \v is in the reader, but not here since the JSON spec doesn't
+ // allow it.
switch (c) {
case '\b':
dst->append("\\b");
@@ -31,9 +34,6 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
case '\t':
dst->append("\\t");
break;
- case '\v':
- dst->append("\\v");
- break;
case '\\':
dst->append("\\\\");
break;
@@ -46,25 +46,24 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
return true;
}
-void JavascriptDoubleQuote(const string16& str,
- bool put_in_quotes,
- std::string* dst) {
+template <class STR>
+void JsonDoubleQuoteT(const STR& str,
+ bool put_in_quotes,
+ std::string* dst) {
if (put_in_quotes)
dst->push_back('"');
- for (string16::const_iterator it = str.begin(); it != str.end(); ++it) {
- char16 c = *it;
- if (!JavascriptSingleEscapeChar(c, dst)) {
- if (c > 255) {
- // Non-ascii values need to be unicode dst->
- // TODO(tc): Some unicode values are handled specially. See
- // spidermonkey code.
- StringAppendF(dst, "\\u%04X", c);
- } else if (c < 32 || c > 126) {
- // Spidermonkey hex escapes these values.
- StringAppendF(dst, "\\x%02X", c);
+ for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) {
+ typename ToUnsigned<typename STR::value_type>::Unsigned c = *it;
+ if (!JsonSingleEscapeChar(c, dst)) {
+ if (c < 32 || c > 126) {
+ // Technically, we could also pass through c > 126 as UTF8, but this is
+ // also optional. It would also be a pain to implement here.
+ unsigned int as_uint = static_cast<unsigned int>(c);
+ StringAppendF(dst, "\\u%04X", as_uint);
} else {
- dst->push_back(static_cast<char>(c));
+ unsigned char ascii = static_cast<unsigned char>(*it);
+ dst->push_back(ascii);
}
}
}
@@ -73,26 +72,16 @@ void JavascriptDoubleQuote(const string16& str,
dst->push_back('"');
}
-void JavascriptDoubleQuote(const std::string& str,
- bool put_in_quotes,
- std::string* dst) {
- if (put_in_quotes)
- dst->push_back('"');
-
- for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {
- unsigned char c = *it;
- if (!JavascriptSingleEscapeChar(c, dst)) {
- // Hex encode if the character is non-printable 7bit ascii
- if (c < 32 || c == 127) {
- StringAppendF(dst, "\\x%02X", c);
- } else {
- dst->push_back(static_cast<char>(c));
- }
- }
- }
+void JsonDoubleQuote(const std::string& str,
+ bool put_in_quotes,
+ std::string* dst) {
+ JsonDoubleQuoteT(str, put_in_quotes, dst);
+}
- if (put_in_quotes)
- dst->push_back('"');
+void JsonDoubleQuote(const string16& str,
+ bool put_in_quotes,
+ std::string* dst) {
+ JsonDoubleQuoteT(str, put_in_quotes, dst);
}
} // namespace string_escape
diff --git a/base/string_escape.h b/base/string_escape.h
index 9c6233a..3f0bf3f 100644
--- a/base/string_escape.h
+++ b/base/string_escape.h
@@ -7,28 +7,25 @@
#ifndef BASE_STRING_ESCAPE_H__
#define BASE_STRING_ESCAPE_H__
+#include <string>
+
#include "base/string16.h"
namespace string_escape {
-// Escape |str| appropriately for a javascript string litereal, _appending_ the
-// result to |dst|. This will create standard escape sequences (\b, \n),
-// hex escape sequences (\x00), and unicode escape sequences (\uXXXX).
+// Escape |str| appropriately for a JSON string litereal, _appending_ the
+// result to |dst|. This will create unicode escape sequences (\uXXXX).
// If |put_in_quotes| is true, the result will be surrounded in double quotes.
// The outputted literal, when interpreted by the browser, should result in a
// javascript string that is identical and the same length as the input |str|.
-void JavascriptDoubleQuote(const string16& str,
- bool put_in_quotes,
- std::string* dst);
-
-// Similar to the wide version, but for narrow strings. It will not use
-// \uXXXX unicode escape sequences. It will pass non-7bit characters directly
-// into the string unencoded, allowing the browser to interpret the encoding.
-// The outputted literal, when interpreted by the browser, could result in a
-// javascript string of a different length than the input |str|.
-void JavascriptDoubleQuote(const std::string& str,
- bool put_in_quotes,
- std::string* dst);
+void JsonDoubleQuote(const std::string& str,
+ bool put_in_quotes,
+ std::string* dst);
+
+void JsonDoubleQuote(const string16& str,
+ bool put_in_quotes,
+ std::string* dst);
+
} // namespace string_escape
diff --git a/base/string_escape_unittest.cc b/base/string_escape_unittest.cc
index e6a1806..d731dc1 100644
--- a/base/string_escape_unittest.cc
+++ b/base/string_escape_unittest.cc
@@ -6,59 +6,90 @@
#include "base/string_escape.h"
#include "base/string_util.h"
-TEST(StringEscapeTest, JavascriptDoubleQuote) {
- static const char* kToEscape = "\b\001aZ\"\\wee";
- static const char* kEscaped = "\\b\\x01aZ\\\"\\\\wee";
- static const char* kEscapedQuoted = "\"\\b\\x01aZ\\\"\\\\wee\"";
- static const wchar_t* kUToEscape = L"\b\x0001" L"a\x123fZ\"\\wee";
- static const char* kUEscaped = "\\b\\x01a\\u123FZ\\\"\\\\wee";
- static const char* kUEscapedQuoted = "\"\\b\\x01a\\u123FZ\\\"\\\\wee\"";
+namespace {
+const struct json_narrow_test_data {
+ const char* to_escape;
+ const char* escaped;
+} json_narrow_cases[] = {
+ {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
+ {"a\b\f\n\r\t\v\1\\.\"z",
+ "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
+ {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
+};
+
+}
+
+TEST(StringEscapeTest, JsonDoubleQuoteNarrow) {
+ for (size_t i = 0; i < arraysize(json_narrow_cases); ++i) {
+ std::string in = json_narrow_cases[i].to_escape;
+ std::string out;
+ string_escape::JsonDoubleQuote(in, false, &out);
+ EXPECT_EQ(std::string(json_narrow_cases[i].escaped), out);
+ }
+
+ std::string in = json_narrow_cases[0].to_escape;
+ std::string out;
+ string_escape::JsonDoubleQuote(in, false, &out);
+
+ // test quoting
+ std::string out_quoted;
+ string_escape::JsonDoubleQuote(in, true, &out_quoted);
+ EXPECT_EQ(out.length() + 2, out_quoted.length());
+ EXPECT_EQ(out_quoted.find(out), 1U);
+
+ // now try with a NULL in the string
+ std::string null_prepend = "test";
+ null_prepend.push_back(0);
+ in = null_prepend + in;
+ std::string expected = "test\\u0000";
+ expected += json_narrow_cases[0].escaped;
+ out.clear();
+ string_escape::JsonDoubleQuote(in, false, &out);
+ EXPECT_EQ(expected, out);
+}
+
+namespace {
+
+const struct json_wide_test_data {
+ const wchar_t* to_escape;
+ const char* escaped;
+} json_wide_cases[] = {
+ {L"b\uffb1\u00ff", "b\\uFFB1\\u00FF"},
+ {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
+ {L"a\b\f\n\r\t\v\1\\.\"z",
+ "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
+ {L"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
+};
+
+}
+
+TEST(StringEscapeTest, JsonDoubleQuoteWide) {
+
+ for (size_t i = 0; i < arraysize(json_wide_cases); ++i) {
+ std::string out;
+ string16 in = WideToUTF16(json_wide_cases[i].to_escape);
+ string_escape::JsonDoubleQuote(in, false, &out);
+ EXPECT_EQ(std::string(json_wide_cases[i].escaped), out);
+ }
+
+ string16 in = WideToUTF16(json_wide_cases[0].to_escape);
std::string out;
+ string_escape::JsonDoubleQuote(in, false, &out);
+
+ // test quoting
+ std::string out_quoted;
+ string_escape::JsonDoubleQuote(in, true, &out_quoted);
+ EXPECT_EQ(out.length() + 2, out_quoted.length());
+ EXPECT_EQ(out_quoted.find(out), 1U);
- // Test wide unicode escaping
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), false, &out);
- ASSERT_EQ(std::string("testy: ") + kUEscaped, out);
-
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), true, &out);
- ASSERT_EQ(std::string("testy: ") + kUEscapedQuoted, out);
-
- // Test null and high bit / negative unicode values
- string16 str16 = UTF8ToUTF16("TeSt");
- str16.push_back(0);
- str16.push_back(0xffb1);
- str16.push_back(0x00ff);
-
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(str16, false, &out);
- ASSERT_EQ("testy: TeSt\\x00\\uFFB1\\xFF", out);
-
- // Test escaping of 7bit ascii
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(std::string(kToEscape), false, &out);
- ASSERT_EQ(std::string("testy: ") + kEscaped, out);
-
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(std::string(kToEscape), true, &out);
- ASSERT_EQ(std::string("testy: ") + kEscapedQuoted, out);
-
- // Test null, non-printable, and non-7bit
- std::string str("TeSt");
- str.push_back(0);
- str.push_back(15);
- str.push_back(127);
- str.push_back(-16);
- str.push_back(-128);
- str.push_back('!');
-
- out = "testy: ";
- string_escape::JavascriptDoubleQuote(str, false, &out);
- ASSERT_EQ("testy: TeSt\\x00\\x0F\\x7F\xf0\x80!", out);
-
- // Test escape sequences
- out = "testy: ";
- string_escape::JavascriptDoubleQuote("a\b\f\n\r\t\v\1\\.\"z", false, &out);
- ASSERT_EQ("testy: a\\b\\f\\n\\r\\t\\v\\x01\\\\.\\\"z", out);
+ // now try with a NULL in the string
+ string16 null_prepend = WideToUTF16(L"test");
+ null_prepend.push_back(0);
+ in = null_prepend + in;
+ std::string expected = "test\\u0000";
+ expected += json_wide_cases[0].escaped;
+ out.clear();
+ string_escape::JsonDoubleQuote(in, false, &out);
+ EXPECT_EQ(expected, out);
}
diff --git a/base/string_util.cc b/base/string_util.cc
index 5923c10..c666960 100644
--- a/base/string_util.cc
+++ b/base/string_util.cc
@@ -37,35 +37,6 @@ struct EmptyStrings {
const string16 s16;
};
-// Hack to convert any char-like type to its unsigned counterpart.
-// For example, it will convert char, signed char and unsigned char to unsigned
-// char.
-template<typename T>
-struct ToUnsigned {
- typedef T Unsigned;
-};
-
-template<>
-struct ToUnsigned<char> {
- typedef unsigned char Unsigned;
-};
-template<>
-struct ToUnsigned<signed char> {
- typedef unsigned char Unsigned;
-};
-template<>
-struct ToUnsigned<wchar_t> {
-#if defined(WCHAR_T_IS_UTF16)
- typedef unsigned short Unsigned;
-#elif defined(WCHAR_T_IS_UTF32)
- typedef uint32 Unsigned;
-#endif
-};
-template<>
-struct ToUnsigned<short> {
- typedef unsigned short Unsigned;
-};
-
// Used by ReplaceStringPlaceholders to track the position in the string of
// replaced parameters.
struct ReplacementOffset {
diff --git a/base/string_util.h b/base/string_util.h
index cb71553..f40cc8d 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -595,5 +595,33 @@ bool MatchPattern(const std::string& string, const std::string& pattern);
// std::numeric_limits<size_t>::max() / 2
std::string HexEncode(const void* bytes, size_t size);
+// Hack to convert any char-like type to its unsigned counterpart.
+// For example, it will convert char, signed char and unsigned char to unsigned
+// char.
+template<typename T>
+struct ToUnsigned {
+ typedef T Unsigned;
+};
+
+template<>
+struct ToUnsigned<char> {
+ typedef unsigned char Unsigned;
+};
+template<>
+struct ToUnsigned<signed char> {
+ typedef unsigned char Unsigned;
+};
+template<>
+struct ToUnsigned<wchar_t> {
+#if defined(WCHAR_T_IS_UTF16)
+ typedef unsigned short Unsigned;
+#elif defined(WCHAR_T_IS_UTF32)
+ typedef uint32 Unsigned;
+#endif
+};
+template<>
+struct ToUnsigned<short> {
+ typedef unsigned short Unsigned;
+};
#endif // BASE_STRING_UTIL_H_