diff options
author | Patrick Scott <phanna@android.com> | 2010-02-04 10:37:17 -0500 |
---|---|---|
committer | Patrick Scott <phanna@android.com> | 2010-02-04 10:39:42 -0500 |
commit | c7f5f8508d98d5952d42ed7648c2a8f30a4da156 (patch) | |
tree | dd51dbfbf6670daa61279b3a19e7b1835b301dbf /base/json | |
parent | 139d8152182f9093f03d9089822b688e49fa7667 (diff) | |
download | external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.zip external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.gz external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.bz2 |
Initial source checkin.
The source files were determined by building net_unittests in chromium's source
tree. Some of the obvious libraries were left out (v8, gmock, gtest).
The Android.mk file has all the sources (minus unittests and tools) that were
used during net_unittests compilation. Nothing builds yet because of STL but
that is the next task. The .cpp files will most likely not compile anyways
because of the LOCAL_CPP_EXTENSION mod. I will have to break this into multiple
projects to get around that limitation.
Diffstat (limited to 'base/json')
-rw-r--r-- | base/json/json_reader.cc | 643 | ||||
-rw-r--r-- | base/json/json_reader.h | 195 | ||||
-rw-r--r-- | base/json/json_reader_unittest.cc | 547 | ||||
-rw-r--r-- | base/json/json_writer.cc | 200 | ||||
-rw-r--r-- | base/json/json_writer.h | 63 | ||||
-rw-r--r-- | base/json/json_writer_unittest.cc | 98 | ||||
-rw-r--r-- | base/json/string_escape.cc | 91 | ||||
-rw-r--r-- | base/json/string_escape.h | 32 | ||||
-rw-r--r-- | base/json/string_escape_unittest.cc | 99 |
9 files changed, 1968 insertions, 0 deletions
diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc new file mode 100644 index 0000000..bdc682b --- /dev/null +++ b/base/json/json_reader.cc @@ -0,0 +1,643 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/json_reader.h" + +#include "base/float_util.h" +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "base/values.h" + +namespace base { + +static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN, + 0, 0); +static const int kStackLimit = 100; + +namespace { + +inline int HexToInt(wchar_t c) { + if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } else if ('a' <= c && c <= 'f') { + return c - 'a' + 10; + } + NOTREACHED(); + return 0; +} + +// A helper method for ParseNumberToken. It reads an int from the end of +// token. The method returns false if there is no valid integer at the end of +// the token. +bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) { + wchar_t first = token.NextChar(); + int len = 0; + + // Read in more digits + wchar_t c = first; + while ('\0' != c && '0' <= c && c <= '9') { + ++token.length; + ++len; + c = token.NextChar(); + } + // We need at least 1 digit. + if (len == 0) + return false; + + if (!can_have_leading_zeros && len > 1 && '0' == first) + return false; + + return true; +} + +// A helper method for ParseStringToken. It reads |digits| hex digits from the +// token. If the sequence if digits is not valid (contains other characters), +// the method returns false. +bool ReadHexDigits(JSONReader::Token& token, int digits) { + for (int i = 1; i <= digits; ++i) { + wchar_t c = *(token.begin + token.length + i); + if ('\0' == c) + return false; + if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'))) { + return false; + } + } + + token.length += digits; + return true; +} + +} // anonymous namespace + +const char* JSONReader::kBadRootElementType = + "Root value must be an array or object."; +const char* JSONReader::kInvalidEscape = + "Invalid escape sequence."; +const char* JSONReader::kSyntaxError = + "Syntax error."; +const char* JSONReader::kTrailingComma = + "Trailing comma not allowed."; +const char* JSONReader::kTooMuchNesting = + "Too much nesting."; +const char* JSONReader::kUnexpectedDataAfterRoot = + "Unexpected data after root element."; +const char* JSONReader::kUnsupportedEncoding = + "Unsupported encoding. JSON must be UTF-8."; +const char* JSONReader::kUnquotedDictionaryKey = + "Dictionary keys must be quoted."; + +/* static */ +Value* JSONReader::Read(const std::string& json, + bool allow_trailing_comma) { + return ReadAndReturnError(json, allow_trailing_comma, NULL); +} + +/* static */ +Value* JSONReader::ReadAndReturnError(const std::string& json, + bool allow_trailing_comma, + std::string *error_message_out) { + JSONReader reader = JSONReader(); + Value* root = reader.JsonToValue(json, true, allow_trailing_comma); + if (root) + return root; + + if (error_message_out) + *error_message_out = reader.error_message(); + + return NULL; +} + +/* static */ +std::string JSONReader::FormatErrorMessage(int line, int column, + const char* description) { + return StringPrintf("Line: %i, column: %i, %s", + line, column, description); +} + +JSONReader::JSONReader() + : start_pos_(NULL), json_pos_(NULL), stack_depth_(0), + allow_trailing_comma_(false) {} + +Value* JSONReader::JsonToValue(const std::string& json, bool check_root, + bool allow_trailing_comma) { + // The input must be in UTF-8. + if (!IsStringUTF8(json.c_str())) { + error_message_ = kUnsupportedEncoding; + return NULL; + } + + // The conversion from UTF8 to wstring removes null bytes for us + // (a good thing). + std::wstring json_wide(UTF8ToWide(json)); + start_pos_ = json_wide.c_str(); + + // When the input JSON string starts with a UTF-8 Byte-Order-Mark + // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode + // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from + // mis-treating a Unicode BOM as an invalid character and returning NULL, + // skip a converted Unicode BOM if it exists. + if (!json_wide.empty() && start_pos_[0] == 0xFEFF) { + ++start_pos_; + } + + json_pos_ = start_pos_; + allow_trailing_comma_ = allow_trailing_comma; + stack_depth_ = 0; + error_message_.clear(); + + scoped_ptr<Value> root(BuildValue(check_root)); + if (root.get()) { + if (ParseToken().type == Token::END_OF_INPUT) { + return root.release(); + } else { + SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_); + } + } + + // Default to calling errors "syntax errors". + if (error_message_.empty()) + SetErrorMessage(kSyntaxError, json_pos_); + + return NULL; +} + +Value* JSONReader::BuildValue(bool is_root) { + ++stack_depth_; + if (stack_depth_ > kStackLimit) { + SetErrorMessage(kTooMuchNesting, json_pos_); + return NULL; + } + + Token token = ParseToken(); + // The root token must be an array or an object. + if (is_root && token.type != Token::OBJECT_BEGIN && + token.type != Token::ARRAY_BEGIN) { + SetErrorMessage(kBadRootElementType, json_pos_); + return NULL; + } + + scoped_ptr<Value> node; + + switch (token.type) { + case Token::END_OF_INPUT: + case Token::INVALID_TOKEN: + return NULL; + + case Token::NULL_TOKEN: + node.reset(Value::CreateNullValue()); + break; + + case Token::BOOL_TRUE: + node.reset(Value::CreateBooleanValue(true)); + break; + + case Token::BOOL_FALSE: + node.reset(Value::CreateBooleanValue(false)); + break; + + case Token::NUMBER: + node.reset(DecodeNumber(token)); + if (!node.get()) + return NULL; + break; + + case Token::STRING: + node.reset(DecodeString(token)); + if (!node.get()) + return NULL; + break; + + case Token::ARRAY_BEGIN: + { + json_pos_ += token.length; + token = ParseToken(); + + node.reset(new ListValue()); + while (token.type != Token::ARRAY_END) { + Value* array_node = BuildValue(false); + if (!array_node) + return NULL; + static_cast<ListValue*>(node.get())->Append(array_node); + + // After a list value, we expect a comma or the end of the list. + token = ParseToken(); + if (token.type == Token::LIST_SEPARATOR) { + json_pos_ += token.length; + token = ParseToken(); + // Trailing commas are invalid according to the JSON RFC, but some + // consumers need the parsing leniency, so handle accordingly. + if (token.type == Token::ARRAY_END) { + if (!allow_trailing_comma_) { + SetErrorMessage(kTrailingComma, json_pos_); + return NULL; + } + // Trailing comma OK, stop parsing the Array. + break; + } + } else if (token.type != Token::ARRAY_END) { + // Unexpected value after list value. Bail out. + return NULL; + } + } + if (token.type != Token::ARRAY_END) { + return NULL; + } + break; + } + + case Token::OBJECT_BEGIN: + { + json_pos_ += token.length; + token = ParseToken(); + + node.reset(new DictionaryValue); + while (token.type != Token::OBJECT_END) { + if (token.type != Token::STRING) { + SetErrorMessage(kUnquotedDictionaryKey, json_pos_); + return NULL; + } + scoped_ptr<Value> dict_key_value(DecodeString(token)); + if (!dict_key_value.get()) + return NULL; + + // Convert the key into a wstring. + std::wstring dict_key; + bool success = dict_key_value->GetAsString(&dict_key); + DCHECK(success); + + json_pos_ += token.length; + token = ParseToken(); + if (token.type != Token::OBJECT_PAIR_SEPARATOR) + return NULL; + + json_pos_ += token.length; + token = ParseToken(); + Value* dict_value = BuildValue(false); + if (!dict_value) + return NULL; + static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion( + dict_key, dict_value); + + // After a key/value pair, we expect a comma or the end of the + // object. + token = ParseToken(); + if (token.type == Token::LIST_SEPARATOR) { + json_pos_ += token.length; + token = ParseToken(); + // Trailing commas are invalid according to the JSON RFC, but some + // consumers need the parsing leniency, so handle accordingly. + if (token.type == Token::OBJECT_END) { + if (!allow_trailing_comma_) { + SetErrorMessage(kTrailingComma, json_pos_); + return NULL; + } + // Trailing comma OK, stop parsing the Object. + break; + } + } else if (token.type != Token::OBJECT_END) { + // Unexpected value after last object value. Bail out. + return NULL; + } + } + if (token.type != Token::OBJECT_END) + return NULL; + + break; + } + + default: + // We got a token that's not a value. + return NULL; + } + json_pos_ += token.length; + + --stack_depth_; + return node.release(); +} + +JSONReader::Token JSONReader::ParseNumberToken() { + // We just grab the number here. We validate the size in DecodeNumber. + // According to RFC4627, a valid number is: [minus] int [frac] [exp] + Token token(Token::NUMBER, json_pos_, 0); + wchar_t c = *json_pos_; + if ('-' == c) { + ++token.length; + c = token.NextChar(); + } + + if (!ReadInt(token, false)) + return kInvalidToken; + + // Optional fraction part + c = token.NextChar(); + if ('.' == c) { + ++token.length; + if (!ReadInt(token, true)) + return kInvalidToken; + c = token.NextChar(); + } + + // Optional exponent part + if ('e' == c || 'E' == c) { + ++token.length; + c = token.NextChar(); + if ('-' == c || '+' == c) { + ++token.length; + c = token.NextChar(); + } + if (!ReadInt(token, true)) + return kInvalidToken; + } + + return token; +} + +Value* JSONReader::DecodeNumber(const Token& token) { + const std::wstring num_string(token.begin, token.length); + + int num_int; + if (StringToInt(WideToUTF16Hack(num_string), &num_int)) + return Value::CreateIntegerValue(num_int); + + double num_double; + if (StringToDouble(WideToUTF16Hack(num_string), &num_double) && + base::IsFinite(num_double)) + return Value::CreateRealValue(num_double); + + return NULL; +} + +JSONReader::Token JSONReader::ParseStringToken() { + Token token(Token::STRING, json_pos_, 1); + wchar_t c = token.NextChar(); + while ('\0' != c) { + if ('\\' == c) { + ++token.length; + c = token.NextChar(); + // Make sure the escaped char is valid. + switch (c) { + case 'x': + if (!ReadHexDigits(token, 2)) { + SetErrorMessage(kInvalidEscape, json_pos_ + token.length); + return kInvalidToken; + } + break; + case 'u': + if (!ReadHexDigits(token, 4)) { + SetErrorMessage(kInvalidEscape, json_pos_ + token.length); + return kInvalidToken; + } + break; + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '"': + break; + default: + SetErrorMessage(kInvalidEscape, json_pos_ + token.length); + return kInvalidToken; + } + } else if ('"' == c) { + ++token.length; + return token; + } + ++token.length; + c = token.NextChar(); + } + return kInvalidToken; +} + +Value* JSONReader::DecodeString(const Token& token) { + std::wstring decoded_str; + decoded_str.reserve(token.length - 2); + + for (int i = 1; i < token.length - 1; ++i) { + wchar_t c = *(token.begin + i); + if ('\\' == c) { + ++i; + c = *(token.begin + i); + switch (c) { + case '"': + case '/': + case '\\': + decoded_str.push_back(c); + break; + case 'b': + decoded_str.push_back('\b'); + break; + case 'f': + decoded_str.push_back('\f'); + break; + case 'n': + decoded_str.push_back('\n'); + break; + case 'r': + decoded_str.push_back('\r'); + break; + case 't': + decoded_str.push_back('\t'); + break; + case 'v': + decoded_str.push_back('\v'); + break; + + case 'x': + decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) + + HexToInt(*(token.begin + i + 2))); + i += 2; + break; + case 'u': + decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) + + (HexToInt(*(token.begin + i + 2)) << 8) + + (HexToInt(*(token.begin + i + 3)) << 4) + + HexToInt(*(token.begin + i + 4))); + i += 4; + break; + + default: + // We should only have valid strings at this point. If not, + // ParseStringToken didn't do it's job. + NOTREACHED(); + return NULL; + } + } else { + // Not escaped + decoded_str.push_back(c); + } + } + return Value::CreateStringValue(decoded_str); +} + +JSONReader::Token JSONReader::ParseToken() { + static const std::wstring kNullString(L"null"); + static const std::wstring kTrueString(L"true"); + static const std::wstring kFalseString(L"false"); + + EatWhitespaceAndComments(); + + Token token(Token::INVALID_TOKEN, 0, 0); + switch (*json_pos_) { + case '\0': + token.type = Token::END_OF_INPUT; + break; + + case 'n': + if (NextStringMatch(kNullString)) + token = Token(Token::NULL_TOKEN, json_pos_, 4); + break; + + case 't': + if (NextStringMatch(kTrueString)) + token = Token(Token::BOOL_TRUE, json_pos_, 4); + break; + + case 'f': + if (NextStringMatch(kFalseString)) + token = Token(Token::BOOL_FALSE, json_pos_, 5); + break; + + case '[': + token = Token(Token::ARRAY_BEGIN, json_pos_, 1); + break; + + case ']': + token = Token(Token::ARRAY_END, json_pos_, 1); + break; + + case ',': + token = Token(Token::LIST_SEPARATOR, json_pos_, 1); + break; + + case '{': + token = Token(Token::OBJECT_BEGIN, json_pos_, 1); + break; + + case '}': + token = Token(Token::OBJECT_END, json_pos_, 1); + break; + + case ':': + token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token = ParseNumberToken(); + break; + + case '"': + token = ParseStringToken(); + break; + } + return token; +} + +bool JSONReader::NextStringMatch(const std::wstring& str) { + for (size_t i = 0; i < str.length(); ++i) { + if ('\0' == *json_pos_) + return false; + if (*(json_pos_ + i) != str[i]) + return false; + } + return true; +} + +void JSONReader::EatWhitespaceAndComments() { + while ('\0' != *json_pos_) { + switch (*json_pos_) { + case ' ': + case '\n': + case '\r': + case '\t': + ++json_pos_; + break; + case '/': + // TODO(tc): This isn't in the RFC so it should be a parser flag. + if (!EatComment()) + return; + break; + default: + // Not a whitespace char, just exit. + return; + } + } +} + +bool JSONReader::EatComment() { + if ('/' != *json_pos_) + return false; + + wchar_t next_char = *(json_pos_ + 1); + if ('/' == next_char) { + // Line comment, read until \n or \r + json_pos_ += 2; + while ('\0' != *json_pos_) { + switch (*json_pos_) { + case '\n': + case '\r': + ++json_pos_; + return true; + default: + ++json_pos_; + } + } + } else if ('*' == next_char) { + // Block comment, read until */ + json_pos_ += 2; + while ('\0' != *json_pos_) { + if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { + json_pos_ += 2; + return true; + } + ++json_pos_; + } + } else { + return false; + } + return true; +} + +void JSONReader::SetErrorMessage(const char* description, + const wchar_t* error_pos) { + int line_number = 1; + int column_number = 1; + + // Figure out the line and column the error occured at. + for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) { + if (*pos == '\0') { + NOTREACHED(); + return; + } + + if (*pos == '\n') { + ++line_number; + column_number = 1; + } else { + ++column_number; + } + } + + error_message_ = FormatErrorMessage(line_number, column_number, description); +} + +} // namespace base diff --git a/base/json/json_reader.h b/base/json/json_reader.h new file mode 100644 index 0000000..47598f4 --- /dev/null +++ b/base/json/json_reader.h @@ -0,0 +1,195 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// A JSON parser. Converts strings of JSON into a Value object (see +// base/values.h). +// http://www.ietf.org/rfc/rfc4627.txt?number=4627 +// +// Known limitations/deviations from the RFC: +// - Only knows how to parse ints within the range of a signed 32 bit int and +// decimal numbers within a double. +// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 +// (BE or LE) and UTF-32 (BE or LE) as well. +// - We limit nesting to 100 levels to prevent stack overflow (this is allowed +// by the RFC). +// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data +// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input +// UTF-8 string for the JSONReader::JsonToValue() function may start with a +// UTF-8 BOM (0xEF, 0xBB, 0xBF). +// To avoid the function from mis-treating a UTF-8 BOM as an invalid +// character, the function skips a Unicode BOM at the beginning of the +// Unicode string (converted from the input UTF-8 string) before parsing it. +// +// TODO(tc): Add a parsing option to to relax object keys being wrapped in +// double quotes +// TODO(tc): Add an option to disable comment stripping +// TODO(aa): Consider making the constructor public and the static Read() method +// only a convenience for the common uses with more complex configuration going +// on the instance. + +#ifndef BASE_JSON_JSON_READER_H_ +#define BASE_JSON_JSON_READER_H_ + +#include <string> + +#include "base/basictypes.h" + +// Chromium and Chromium OS check out gtest to different places, so we're +// unable to compile on both if we include gtest_prod.h here. Instead, include +// its only contents -- this will need to be updated if the macro ever changes. +#define FRIEND_TEST(test_case_name, test_name)\ +friend class test_case_name##_##test_name##_Test + +class Value; + +namespace base { + +class JSONReader { + public: + // A struct to hold a JS token. + class Token { + public: + enum Type { + OBJECT_BEGIN, // { + OBJECT_END, // } + ARRAY_BEGIN, // [ + ARRAY_END, // ] + STRING, + NUMBER, + BOOL_TRUE, // true + BOOL_FALSE, // false + NULL_TOKEN, // null + LIST_SEPARATOR, // , + OBJECT_PAIR_SEPARATOR, // : + END_OF_INPUT, + INVALID_TOKEN, + }; + Token(Type t, const wchar_t* b, int len) + : type(t), begin(b), length(len) {} + + Type type; + + // A pointer into JSONReader::json_pos_ that's the beginning of this token. + const wchar_t* begin; + + // End should be one char past the end of the token. + int length; + + // Get the character that's one past the end of this token. + wchar_t NextChar() { + return *(begin + length); + } + }; + + // Error messages that can be returned. + static const char* kBadRootElementType; + static const char* kInvalidEscape; + static const char* kSyntaxError; + static const char* kTrailingComma; + static const char* kTooMuchNesting; + static const char* kUnexpectedDataAfterRoot; + static const char* kUnsupportedEncoding; + static const char* kUnquotedDictionaryKey; + + JSONReader(); + + // Reads and parses |json|, returning a Value. The caller owns the returned + // instance. If |json| is not a properly formed JSON string, returns NULL. + // If |allow_trailing_comma| is true, we will ignore trailing commas in + // objects and arrays even though this goes against the RFC. + static Value* Read(const std::string& json, bool allow_trailing_comma); + + // Reads and parses |json| like Read(). |error_message_out| is optional. If + // specified and NULL is returned, |error_message_out| will be populated with + // a string describing the error. Otherwise, |error_message_out| is + // unmodified. + static Value* ReadAndReturnError(const std::string& json, + bool allow_trailing_comma, + std::string* error_message_out); + + // Returns the error message if the last call to JsonToValue() failed. If the + // last call did not fail, returns a valid empty string. + std::string error_message() { return error_message_; } + + // Reads and parses |json|, returning a Value. The caller owns the returned + // instance. If |json| is not a properly formed JSON string, returns NULL and + // a detailed error can be retrieved from |error_message()|. + // If |check_root| is true, we require that the root object be an object or + // array. Otherwise, it can be any valid JSON type. + // If |allow_trailing_comma| is true, we will ignore trailing commas in + // objects and arrays even though this goes against the RFC. + Value* JsonToValue(const std::string& json, bool check_root, + bool allow_trailing_comma); + + private: + static std::string FormatErrorMessage(int line, int column, + const char* description); + + DISALLOW_COPY_AND_ASSIGN(JSONReader); + + FRIEND_TEST(JSONReaderTest, Reading); + FRIEND_TEST(JSONReaderTest, ErrorMessages); + + // Recursively build Value. Returns NULL if we don't have a valid JSON + // string. If |is_root| is true, we verify that the root element is either + // an object or an array. + Value* BuildValue(bool is_root); + + // Parses a sequence of characters into a Token::NUMBER. If the sequence of + // characters is not a valid number, returns a Token::INVALID_TOKEN. Note + // that DecodeNumber is used to actually convert from a string to an + // int/double. + Token ParseNumberToken(); + + // Try and convert the substring that token holds into an int or a double. If + // we can (ie., no overflow), return the value, else return NULL. + Value* DecodeNumber(const Token& token); + + // Parses a sequence of characters into a Token::STRING. If the sequence of + // characters is not a valid string, returns a Token::INVALID_TOKEN. Note + // that DecodeString is used to actually decode the escaped string into an + // actual wstring. + Token ParseStringToken(); + + // Convert the substring into a value string. This should always succeed + // (otherwise ParseStringToken would have failed). + Value* DecodeString(const Token& token); + + // Grabs the next token in the JSON stream. This does not increment the + // stream so it can be used to look ahead at the next token. + Token ParseToken(); + + // Increments |json_pos_| past leading whitespace and comments. + void EatWhitespaceAndComments(); + + // If |json_pos_| is at the start of a comment, eat it, otherwise, returns + // false. + bool EatComment(); + + // Checks if |json_pos_| matches str. + bool NextStringMatch(const std::wstring& str); + + // Creates the error message that will be returned to the caller. The current + // line and column are determined and added into the final message. + void SetErrorMessage(const char* description, const wchar_t* error_pos); + + // Pointer to the starting position in the input string. + const wchar_t* start_pos_; + + // Pointer to the current position in the input string. + const wchar_t* json_pos_; + + // Used to keep track of how many nested lists/dicts there are. + int stack_depth_; + + // A parser flag that allows trailing commas in objects and arrays. + bool allow_trailing_comma_; + + // Contains the error message for the last call to JsonToValue(), if any. + std::string error_message_; +}; + +} // namespace base + +#endif // BASE_JSON_JSON_READER_H_ diff --git a/base/json/json_reader_unittest.cc b/base/json/json_reader_unittest.cc new file mode 100644 index 0000000..17dea56 --- /dev/null +++ b/base/json/json_reader_unittest.cc @@ -0,0 +1,547 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "testing/gtest/include/gtest/gtest.h" +#include "base/json/json_reader.h" +#include "base/scoped_ptr.h" +#include "base/values.h" +#include "build/build_config.h" + +namespace base { + +TEST(JSONReaderTest, Reading) { + // some whitespace checking + scoped_ptr<Value> root; + root.reset(JSONReader().JsonToValue(" null ", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_NULL)); + + // Invalid JSON string + root.reset(JSONReader().JsonToValue("nu", false, false)); + ASSERT_FALSE(root.get()); + + // Simple bool + root.reset(JSONReader().JsonToValue("true ", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_BOOLEAN)); + + // Embedded comment + root.reset(JSONReader().JsonToValue("/* comment */null", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_NULL)); + root.reset(JSONReader().JsonToValue("40 /* comment */", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_INTEGER)); + root.reset(JSONReader().JsonToValue("true // comment", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_BOOLEAN)); + root.reset(JSONReader().JsonToValue("/* comment */\"sample string\"", + false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + std::string value; + ASSERT_TRUE(root->GetAsString(&value)); + ASSERT_EQ("sample string", value); + + // Test number formats + root.reset(JSONReader().JsonToValue("43", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_INTEGER)); + int int_val = 0; + ASSERT_TRUE(root->GetAsInteger(&int_val)); + ASSERT_EQ(43, int_val); + + // According to RFC4627, oct, hex, and leading zeros are invalid JSON. + root.reset(JSONReader().JsonToValue("043", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("0x43", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("00", false, false)); + ASSERT_FALSE(root.get()); + + // Test 0 (which needs to be special cased because of the leading zero + // clause). + root.reset(JSONReader().JsonToValue("0", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_INTEGER)); + int_val = 1; + ASSERT_TRUE(root->GetAsInteger(&int_val)); + ASSERT_EQ(0, int_val); + + // Numbers that overflow ints should succeed, being internally promoted to + // storage as doubles + root.reset(JSONReader().JsonToValue("2147483648", false, false)); + ASSERT_TRUE(root.get()); + double real_val; + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(2147483648.0, real_val); + root.reset(JSONReader().JsonToValue("-2147483649", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(-2147483649.0, real_val); + + // Parse a double + root.reset(JSONReader().JsonToValue("43.1", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(43.1, real_val); + + root.reset(JSONReader().JsonToValue("4.3e-1", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(.43, real_val); + + root.reset(JSONReader().JsonToValue("2.1e0", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(2.1, real_val); + + root.reset(JSONReader().JsonToValue("2.1e+0001", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(21.0, real_val); + + root.reset(JSONReader().JsonToValue("0.01", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(0.01, real_val); + + root.reset(JSONReader().JsonToValue("1.00", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_REAL)); + real_val = 0.0; + ASSERT_TRUE(root->GetAsReal(&real_val)); + ASSERT_DOUBLE_EQ(1.0, real_val); + + // Fractional parts must have a digit before and after the decimal point. + root.reset(JSONReader().JsonToValue("1.", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue(".1", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("1.e10", false, false)); + ASSERT_FALSE(root.get()); + + // Exponent must have a digit following the 'e'. + root.reset(JSONReader().JsonToValue("1e", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("1E", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("1e1.", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("1e1.0", false, false)); + ASSERT_FALSE(root.get()); + + // INF/-INF/NaN are not valid + root.reset(JSONReader().JsonToValue("1e1000", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("-1e1000", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("NaN", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("nan", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("inf", false, false)); + ASSERT_FALSE(root.get()); + + // Invalid number formats + root.reset(JSONReader().JsonToValue("4.3.1", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("4e3.1", false, false)); + ASSERT_FALSE(root.get()); + + // Test string parser + root.reset(JSONReader().JsonToValue("\"hello world\"", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + std::wstring str_val; + ASSERT_TRUE(root->GetAsString(&str_val)); + ASSERT_EQ(L"hello world", str_val); + + // Empty string + root.reset(JSONReader().JsonToValue("\"\"", false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + str_val.clear(); + ASSERT_TRUE(root->GetAsString(&str_val)); + ASSERT_EQ(L"", str_val); + + // Test basic string escapes + root.reset(JSONReader().JsonToValue("\" \\\"\\\\\\/\\b\\f\\n\\r\\t\\v\"", + false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + str_val.clear(); + ASSERT_TRUE(root->GetAsString(&str_val)); + ASSERT_EQ(L" \"\\/\b\f\n\r\t\v", str_val); + + // Test hex and unicode escapes including the null character. + root.reset(JSONReader().JsonToValue("\"\\x41\\x00\\u1234\"", false, + false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + str_val.clear(); + ASSERT_TRUE(root->GetAsString(&str_val)); + ASSERT_EQ(std::wstring(L"A\0\x1234", 3), str_val); + + // Test invalid strings + root.reset(JSONReader().JsonToValue("\"no closing quote", false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("\"\\z invalid escape char\"", false, + false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("\"\\xAQ invalid hex code\"", false, + false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("not enough hex chars\\x1\"", false, + false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("\"not enough escape chars\\u123\"", + false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("\"extra backslash at end of input\\\"", + false, false)); + ASSERT_FALSE(root.get()); + + // Basic array + root.reset(JSONReader::Read("[true, false, null]", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_LIST)); + ListValue* list = static_cast<ListValue*>(root.get()); + ASSERT_EQ(3U, list->GetSize()); + + // Test with trailing comma. Should be parsed the same as above. + scoped_ptr<Value> root2; + root2.reset(JSONReader::Read("[true, false, null, ]", true)); + EXPECT_TRUE(root->Equals(root2.get())); + + // Empty array + root.reset(JSONReader::Read("[]", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_LIST)); + list = static_cast<ListValue*>(root.get()); + ASSERT_EQ(0U, list->GetSize()); + + // Nested arrays + root.reset(JSONReader::Read("[[true], [], [false, [], [null]], null]", + false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_LIST)); + list = static_cast<ListValue*>(root.get()); + ASSERT_EQ(4U, list->GetSize()); + + // Lots of trailing commas. + root2.reset(JSONReader::Read("[[true], [], [false, [], [null, ] , ], null,]", + true)); + EXPECT_TRUE(root->Equals(root2.get())); + + // Invalid, missing close brace. + root.reset(JSONReader::Read("[[true], [], [false, [], [null]], null", false)); + ASSERT_FALSE(root.get()); + + // Invalid, too many commas + root.reset(JSONReader::Read("[true,, null]", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("[true,, null]", true)); + ASSERT_FALSE(root.get()); + + // Invalid, no commas + root.reset(JSONReader::Read("[true null]", false)); + ASSERT_FALSE(root.get()); + + // Invalid, trailing comma + root.reset(JSONReader::Read("[true,]", false)); + ASSERT_FALSE(root.get()); + + // Valid if we set |allow_trailing_comma| to true. + root.reset(JSONReader::Read("[true,]", true)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_LIST)); + list = static_cast<ListValue*>(root.get()); + EXPECT_EQ(1U, list->GetSize()); + Value* tmp_value = NULL; + ASSERT_TRUE(list->Get(0, &tmp_value)); + EXPECT_TRUE(tmp_value->IsType(Value::TYPE_BOOLEAN)); + bool bool_value = false; + ASSERT_TRUE(tmp_value->GetAsBoolean(&bool_value)); + EXPECT_TRUE(bool_value); + + // Don't allow empty elements, even if |allow_trailing_comma| is + // true. + root.reset(JSONReader::Read("[,]", true)); + EXPECT_FALSE(root.get()); + root.reset(JSONReader::Read("[true,,]", true)); + EXPECT_FALSE(root.get()); + root.reset(JSONReader::Read("[,true,]", true)); + EXPECT_FALSE(root.get()); + root.reset(JSONReader::Read("[true,,false]", true)); + EXPECT_FALSE(root.get()); + + // Test objects + root.reset(JSONReader::Read("{}", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + + root.reset(JSONReader::Read( + "{\"number\":9.87654321, \"null\":null , \"\\x53\" : \"str\" }", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + DictionaryValue* dict_val = static_cast<DictionaryValue*>(root.get()); + real_val = 0.0; + ASSERT_TRUE(dict_val->GetReal(L"number", &real_val)); + ASSERT_DOUBLE_EQ(9.87654321, real_val); + Value* null_val = NULL; + ASSERT_TRUE(dict_val->Get(L"null", &null_val)); + ASSERT_TRUE(null_val->IsType(Value::TYPE_NULL)); + str_val.clear(); + ASSERT_TRUE(dict_val->GetString(L"S", &str_val)); + ASSERT_EQ(L"str", str_val); + + root2.reset(JSONReader::Read( + "{\"number\":9.87654321, \"null\":null , \"\\x53\" : \"str\", }", true)); + ASSERT_TRUE(root2.get()); + EXPECT_TRUE(root->Equals(root2.get())); + + // Test newline equivalence. + root2.reset(JSONReader::Read( + "{\n" + " \"number\":9.87654321,\n" + " \"null\":null,\n" + " \"\\x53\":\"str\",\n" + "}\n", true)); + ASSERT_TRUE(root2.get()); + EXPECT_TRUE(root->Equals(root2.get())); + + root2.reset(JSONReader::Read( + "{\r\n" + " \"number\":9.87654321,\r\n" + " \"null\":null,\r\n" + " \"\\x53\":\"str\",\r\n" + "}\r\n", true)); + ASSERT_TRUE(root2.get()); + EXPECT_TRUE(root->Equals(root2.get())); + + // Test nesting + root.reset(JSONReader::Read( + "{\"inner\":{\"array\":[true]},\"false\":false,\"d\":{}}", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + dict_val = static_cast<DictionaryValue*>(root.get()); + DictionaryValue* inner_dict = NULL; + ASSERT_TRUE(dict_val->GetDictionary(L"inner", &inner_dict)); + ListValue* inner_array = NULL; + ASSERT_TRUE(inner_dict->GetList(L"array", &inner_array)); + ASSERT_EQ(1U, inner_array->GetSize()); + bool_value = true; + ASSERT_TRUE(dict_val->GetBoolean(L"false", &bool_value)); + ASSERT_FALSE(bool_value); + inner_dict = NULL; + ASSERT_TRUE(dict_val->GetDictionary(L"d", &inner_dict)); + + root2.reset(JSONReader::Read( + "{\"inner\": {\"array\":[true] , },\"false\":false,\"d\":{},}", true)); + EXPECT_TRUE(root->Equals(root2.get())); + + // Test keys with periods + root.reset(JSONReader::Read( + "{\"a.b\":3,\"c\":2,\"d.e.f\":{\"g.h.i.j\":1}}", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + dict_val = static_cast<DictionaryValue*>(root.get()); + int integer_value = 0; + EXPECT_TRUE(dict_val->GetIntegerWithoutPathExpansion(L"a.b", &integer_value)); + EXPECT_EQ(3, integer_value); + EXPECT_TRUE(dict_val->GetIntegerWithoutPathExpansion(L"c", &integer_value)); + EXPECT_EQ(2, integer_value); + inner_dict = NULL; + ASSERT_TRUE(dict_val->GetDictionaryWithoutPathExpansion(L"d.e.f", + &inner_dict)); + ASSERT_EQ(1U, inner_dict->size()); + EXPECT_TRUE(inner_dict->GetIntegerWithoutPathExpansion(L"g.h.i.j", + &integer_value)); + EXPECT_EQ(1, integer_value); + + root.reset(JSONReader::Read("{\"a\":{\"b\":2},\"a.b\":1}", false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + dict_val = static_cast<DictionaryValue*>(root.get()); + EXPECT_TRUE(dict_val->GetInteger(L"a.b", &integer_value)); + EXPECT_EQ(2, integer_value); + EXPECT_TRUE(dict_val->GetIntegerWithoutPathExpansion(L"a.b", &integer_value)); + EXPECT_EQ(1, integer_value); + + // Invalid, no closing brace + root.reset(JSONReader::Read("{\"a\": true", false)); + ASSERT_FALSE(root.get()); + + // Invalid, keys must be quoted + root.reset(JSONReader::Read("{foo:true}", false)); + ASSERT_FALSE(root.get()); + + // Invalid, trailing comma + root.reset(JSONReader::Read("{\"a\":true,}", false)); + ASSERT_FALSE(root.get()); + + // Invalid, too many commas + root.reset(JSONReader::Read("{\"a\":true,,\"b\":false}", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("{\"a\":true,,\"b\":false}", true)); + ASSERT_FALSE(root.get()); + + // Invalid, no separator + root.reset(JSONReader::Read("{\"a\" \"b\"}", false)); + ASSERT_FALSE(root.get()); + + // Invalid, lone comma. + root.reset(JSONReader::Read("{,}", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("{,}", true)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("{\"a\":true,,}", true)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("{,\"a\":true}", true)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("{\"a\":true,,\"b\":false}", true)); + ASSERT_FALSE(root.get()); + + // Test stack overflow + std::string evil(1000000, '['); + evil.append(std::string(1000000, ']')); + root.reset(JSONReader::Read(evil, false)); + ASSERT_FALSE(root.get()); + + // A few thousand adjacent lists is fine. + std::string not_evil("["); + not_evil.reserve(15010); + for (int i = 0; i < 5000; ++i) { + not_evil.append("[],"); + } + not_evil.append("[]]"); + root.reset(JSONReader::Read(not_evil, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_LIST)); + list = static_cast<ListValue*>(root.get()); + ASSERT_EQ(5001U, list->GetSize()); + + // Test utf8 encoded input + root.reset(JSONReader().JsonToValue("\"\xe7\xbd\x91\xe9\xa1\xb5\"", + false, false)); + ASSERT_TRUE(root.get()); + ASSERT_TRUE(root->IsType(Value::TYPE_STRING)); + str_val.clear(); + ASSERT_TRUE(root->GetAsString(&str_val)); + ASSERT_EQ(L"\x7f51\x9875", str_val); + + // Test invalid utf8 encoded input + root.reset(JSONReader().JsonToValue("\"345\xb0\xa1\xb0\xa2\"", + false, false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader().JsonToValue("\"123\xc0\x81\"", + false, false)); + ASSERT_FALSE(root.get()); + + // Test invalid root objects. + root.reset(JSONReader::Read("null", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("true", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("10", false)); + ASSERT_FALSE(root.get()); + root.reset(JSONReader::Read("\"root\"", false)); + ASSERT_FALSE(root.get()); +} + +TEST(JSONReaderTest, ErrorMessages) { + // Error strings should not be modified in case of success. + std::string error_message; + scoped_ptr<Value> root; + root.reset(JSONReader::ReadAndReturnError("[42]", false, &error_message)); + EXPECT_TRUE(error_message.empty()); + + // Test line and column counting + const char* big_json = "[\n0,\n1,\n2,\n3,4,5,6 7,\n8,\n9\n]"; + // error here --------------------------------^ + root.reset(JSONReader::ReadAndReturnError(big_json, false, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(5, 9, JSONReader::kSyntaxError), + error_message); + + // Test each of the error conditions + root.reset(JSONReader::ReadAndReturnError("{},{}", false, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 3, + JSONReader::kUnexpectedDataAfterRoot), error_message); + + std::string nested_json; + for (int i = 0; i < 101; ++i) { + nested_json.insert(nested_json.begin(), '['); + nested_json.append(1, ']'); + } + root.reset(JSONReader::ReadAndReturnError(nested_json, false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 101, JSONReader::kTooMuchNesting), + error_message); + + root.reset(JSONReader::ReadAndReturnError("42", false, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 1, + JSONReader::kBadRootElementType), error_message); + + root.reset(JSONReader::ReadAndReturnError("[1,]", false, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 4, JSONReader::kTrailingComma), + error_message); + + root.reset(JSONReader::ReadAndReturnError("{foo:\"bar\"}", false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2, + JSONReader::kUnquotedDictionaryKey), error_message); + + root.reset(JSONReader::ReadAndReturnError("{\"foo\":\"bar\",}", false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 14, JSONReader::kTrailingComma), + error_message); + + root.reset(JSONReader::ReadAndReturnError("[nu]", false, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2, JSONReader::kSyntaxError), + error_message); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\xq\"]", false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\uq\"]", false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\q\"]", false, + &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + +} + +} // namespace base diff --git a/base/json/json_writer.cc b/base/json/json_writer.cc new file mode 100644 index 0000000..ffdad76 --- /dev/null +++ b/base/json/json_writer.cc @@ -0,0 +1,200 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/json_writer.h" + +#include "base/json/string_escape.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "base/values.h" +#include "base/utf_string_conversions.h" + +namespace base { + +#if defined(OS_WIN) +static const char kPrettyPrintLineEnding[] = "\r\n"; +#else +static const char kPrettyPrintLineEnding[] = "\n"; +#endif + +/* static */ +const char* JSONWriter::kEmptyArray = "[]"; + +/* static */ +void JSONWriter::Write(const Value* const node, + bool pretty_print, + std::string* json) { + WriteWithOptionalEscape(node, pretty_print, true, json); +} + +/* static */ +void JSONWriter::WriteWithOptionalEscape(const Value* const node, + bool pretty_print, + bool escape, + std::string* json) { + json->clear(); + // Is there a better way to estimate the size of the output? + json->reserve(1024); + JSONWriter writer(pretty_print, json); + writer.BuildJSONString(node, 0, escape); + if (pretty_print) + json->append(kPrettyPrintLineEnding); +} + +JSONWriter::JSONWriter(bool pretty_print, std::string* json) + : json_string_(json), + pretty_print_(pretty_print) { + DCHECK(json); +} + +void JSONWriter::BuildJSONString(const Value* const node, + int depth, + bool escape) { + switch (node->GetType()) { + case Value::TYPE_NULL: + json_string_->append("null"); + break; + + case Value::TYPE_BOOLEAN: + { + bool value; + bool result = node->GetAsBoolean(&value); + DCHECK(result); + json_string_->append(value ? "true" : "false"); + break; + } + + case Value::TYPE_INTEGER: + { + int value; + bool result = node->GetAsInteger(&value); + DCHECK(result); + StringAppendF(json_string_, "%d", value); + break; + } + + case Value::TYPE_REAL: + { + double value; + bool result = node->GetAsReal(&value); + DCHECK(result); + std::string real = DoubleToString(value); + // Ensure that the number has a .0 if there's no decimal or 'e'. This + // makes sure that when we read the JSON back, it's interpreted as a + // real rather than an int. + if (real.find('.') == std::string::npos && + real.find('e') == std::string::npos && + real.find('E') == std::string::npos) { + real.append(".0"); + } + // The JSON spec requires that non-integer values in the range (-1,1) + // have a zero before the decimal point - ".52" is not valid, "0.52" is. + if (real[0] == '.') { + real.insert(0, "0"); + } else if (real.length() > 1 && real[0] == '-' && real[1] == '.') { + // "-.1" bad "-0.1" good + real.insert(1, "0"); + } + json_string_->append(real); + break; + } + + case Value::TYPE_STRING: + { + std::string value; + bool result = node->GetAsString(&value); + DCHECK(result); + if (escape) { + JsonDoubleQuote(UTF8ToUTF16(value), true, json_string_); + } else { + JsonDoubleQuote(value, true, json_string_); + } + break; + } + + case Value::TYPE_LIST: + { + json_string_->append("["); + if (pretty_print_) + json_string_->append(" "); + + const ListValue* list = static_cast<const ListValue*>(node); + for (size_t i = 0; i < list->GetSize(); ++i) { + if (i != 0) { + json_string_->append(","); + if (pretty_print_) + json_string_->append(" "); + } + + Value* value = NULL; + bool result = list->Get(i, &value); + DCHECK(result); + BuildJSONString(value, depth, escape); + } + + if (pretty_print_) + json_string_->append(" "); + json_string_->append("]"); + break; + } + + case Value::TYPE_DICTIONARY: + { + json_string_->append("{"); + if (pretty_print_) + json_string_->append(kPrettyPrintLineEnding); + + const DictionaryValue* dict = + static_cast<const DictionaryValue*>(node); + for (DictionaryValue::key_iterator key_itr = dict->begin_keys(); + key_itr != dict->end_keys(); + ++key_itr) { + if (key_itr != dict->begin_keys()) { + json_string_->append(","); + if (pretty_print_) + json_string_->append(kPrettyPrintLineEnding); + } + + Value* value = NULL; + bool result = dict->GetWithoutPathExpansion(*key_itr, &value); + DCHECK(result); + + if (pretty_print_) + IndentLine(depth + 1); + AppendQuotedString(*key_itr); + if (pretty_print_) { + json_string_->append(": "); + } else { + json_string_->append(":"); + } + BuildJSONString(value, depth + 1, escape); + } + + if (pretty_print_) { + json_string_->append(kPrettyPrintLineEnding); + IndentLine(depth); + json_string_->append("}"); + } else { + json_string_->append("}"); + } + break; + } + + default: + // TODO(jhughes): handle TYPE_BINARY + NOTREACHED() << "unknown json type"; + } +} + +void JSONWriter::AppendQuotedString(const std::wstring& str) { + JsonDoubleQuote(WideToUTF16Hack(str), true, json_string_); +} + +void JSONWriter::IndentLine(int depth) { + // It may be faster to keep an indent string so we don't have to keep + // reallocating. + json_string_->append(std::string(depth * 3, ' ')); +} + +} // namespace base diff --git a/base/json/json_writer.h b/base/json/json_writer.h new file mode 100644 index 0000000..0ebee0a --- /dev/null +++ b/base/json/json_writer.h @@ -0,0 +1,63 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_JSON_JSON_WRITER_H_ +#define BASE_JSON_JSON_WRITER_H_ + +#include <string> + +#include "base/basictypes.h" + +class Value; + +namespace base { + +class JSONWriter { + public: + // Given a root node, generates a JSON string and puts it into |json|. + // If |pretty_print| is true, return a slightly nicer formated json string + // (pads with whitespace to help readability). If |pretty_print| is false, + // we try to generate as compact a string as possible. + // TODO(tc): Should we generate json if it would be invalid json (e.g., + // |node| is not a DictionaryValue/ListValue or if there are inf/-inf float + // values)? + static void Write(const Value* const node, bool pretty_print, + std::string* json); + + // Same as above, but has an option to not escape the string, preserving its + // UTF8 characters. It is useful if you can pass resulting string to the + // JSON parser in binary form (as UTF8). + static void WriteWithOptionalEscape(const Value* const node, + bool pretty_print, + bool escape, + std::string* json); + + // A static, constant JSON string representing an empty array. Useful + // for empty JSON argument passing. + static const char* kEmptyArray; + + private: + JSONWriter(bool pretty_print, std::string* json); + + // Called recursively to build the JSON string. Whe completed, value is + // json_string_ will contain the JSON. + void BuildJSONString(const Value* const node, int depth, bool escape); + + // Appends a quoted, escaped, version of str to json_string_. + void AppendQuotedString(const std::wstring& str); + + // Adds space to json_string_ for the indent level. + void IndentLine(int depth); + + // Where we write JSON data as we generate it. + std::string* json_string_; + + bool pretty_print_; + + DISALLOW_COPY_AND_ASSIGN(JSONWriter); +}; + +} // namespace base + +#endif // BASE_JSON_JSON_WRITER_H_ diff --git a/base/json/json_writer_unittest.cc b/base/json/json_writer_unittest.cc new file mode 100644 index 0000000..e7d0f05 --- /dev/null +++ b/base/json/json_writer_unittest.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/json_writer.h" +#include "base/values.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace base { + +TEST(JSONWriterTest, Writing) { + // Test null + Value* root = Value::CreateNullValue(); + std::string output_js; + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("null", output_js); + delete root; + + // Test empty dict + root = new DictionaryValue; + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("{}", output_js); + delete root; + + // Test empty list + root = new ListValue; + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("[]", output_js); + delete root; + + // Test Real values should always have a decimal or an 'e'. + root = Value::CreateRealValue(1.0); + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("1.0", output_js); + delete root; + + // Test Real values in the the range (-1, 1) must have leading zeros + root = Value::CreateRealValue(0.2); + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("0.2", output_js); + delete root; + + // Test Real values in the the range (-1, 1) must have leading zeros + root = Value::CreateRealValue(-0.8); + JSONWriter::Write(root, false, &output_js); + ASSERT_EQ("-0.8", output_js); + delete root; + + // Writer unittests like empty list/dict nesting, + // list list nesting, etc. + DictionaryValue root_dict; + ListValue* list = new ListValue; + root_dict.Set(L"list", list); + DictionaryValue* inner_dict = new DictionaryValue; + list->Append(inner_dict); + inner_dict->SetInteger(L"inner int", 10); + ListValue* inner_list = new ListValue; + list->Append(inner_list); + list->Append(Value::CreateBooleanValue(true)); + + // Test the pretty-printer. + JSONWriter::Write(&root_dict, false, &output_js); + ASSERT_EQ("{\"list\":[{\"inner int\":10},[],true]}", output_js); + JSONWriter::Write(&root_dict, true, &output_js); + // The pretty-printer uses a different newline style on Windows than on + // other platforms. +#if defined(OS_WIN) +#define JSON_NEWLINE "\r\n" +#else +#define JSON_NEWLINE "\n" +#endif + ASSERT_EQ("{" JSON_NEWLINE + " \"list\": [ {" JSON_NEWLINE + " \"inner int\": 10" JSON_NEWLINE + " }, [ ], true ]" JSON_NEWLINE + "}" JSON_NEWLINE, + output_js); +#undef JSON_NEWLINE + + // Test keys with periods + DictionaryValue period_dict; + period_dict.SetWithoutPathExpansion(L"a.b", Value::CreateIntegerValue(3)); + period_dict.SetWithoutPathExpansion(L"c", Value::CreateIntegerValue(2)); + DictionaryValue* period_dict2 = new DictionaryValue; + period_dict2->SetWithoutPathExpansion(L"g.h.i.j", + Value::CreateIntegerValue(1)); + period_dict.SetWithoutPathExpansion(L"d.e.f", period_dict2); + JSONWriter::Write(&period_dict, false, &output_js); + ASSERT_EQ("{\"a.b\":3,\"c\":2,\"d.e.f\":{\"g.h.i.j\":1}}", output_js); + + DictionaryValue period_dict3; + period_dict3.Set(L"a.b", Value::CreateIntegerValue(2)); + period_dict3.SetWithoutPathExpansion(L"a.b", Value::CreateIntegerValue(1)); + JSONWriter::Write(&period_dict3, false, &output_js); + ASSERT_EQ("{\"a\":{\"b\":2},\"a.b\":1}", output_js); +} + +} // namespace base diff --git a/base/json/string_escape.cc b/base/json/string_escape.cc new file mode 100644 index 0000000..4e1418c --- /dev/null +++ b/base/json/string_escape.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/string_escape.h" + +#include <string> + +#include "base/string_util.h" + +namespace base { + +namespace { + +// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, +// returns true and appends the escape sequence to |dst|. This isn't required +// by the spec, but it's more readable by humans than the \uXXXX alternatives. +template<typename CHAR> +static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { + // WARNING: if you add a new case here, you need to update the reader as well. + // Note: \v is in the reader, but not here since the JSON spec doesn't + // allow it. + switch (c) { + case '\b': + dst->append("\\b"); + break; + case '\f': + dst->append("\\f"); + break; + case '\n': + dst->append("\\n"); + break; + case '\r': + dst->append("\\r"); + break; + case '\t': + dst->append("\\t"); + break; + case '\\': + dst->append("\\\\"); + break; + case '"': + dst->append("\\\""); + break; + default: + return false; + } + return true; +} + +template <class STR> +void JsonDoubleQuoteT(const STR& str, + bool put_in_quotes, + std::string* dst) { + if (put_in_quotes) + dst->push_back('"'); + + for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { + typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; + if (!JsonSingleEscapeChar(c, dst)) { + if (c < 32 || c > 126) { + // Technically, we could also pass through c > 126 as UTF8, but this is + // also optional. It would also be a pain to implement here. + unsigned int as_uint = static_cast<unsigned int>(c); + StringAppendF(dst, "\\u%04X", as_uint); + } else { + unsigned char ascii = static_cast<unsigned char>(*it); + dst->push_back(ascii); + } + } + } + + if (put_in_quotes) + dst->push_back('"'); +} + +} // namespace + +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); +} + +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst) { + JsonDoubleQuoteT(str, put_in_quotes, dst); +} + +} // namespace base diff --git a/base/json/string_escape.h b/base/json/string_escape.h new file mode 100644 index 0000000..7d74021 --- /dev/null +++ b/base/json/string_escape.h @@ -0,0 +1,32 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file defines utility functions for escaping strings. + +#ifndef BASE_JSON_STRING_ESCAPE_H_ +#define BASE_JSON_STRING_ESCAPE_H_ + +#include <string> + +#include "base/string16.h" + +namespace base { + +// Escape |str| appropriately for a JSON string litereal, _appending_ the +// result to |dst|. This will create unicode escape sequences (\uXXXX). +// If |put_in_quotes| is true, the result will be surrounded in double quotes. +// The outputted literal, when interpreted by the browser, should result in a +// javascript string that is identical and the same length as the input |str|. +void JsonDoubleQuote(const std::string& str, + bool put_in_quotes, + std::string* dst); + +void JsonDoubleQuote(const string16& str, + bool put_in_quotes, + std::string* dst); + + +} // namespace base + +#endif // BASE_JSON_STRING_ESCAPE_H_ diff --git a/base/json/string_escape_unittest.cc b/base/json/string_escape_unittest.cc new file mode 100644 index 0000000..a3d6262 --- /dev/null +++ b/base/json/string_escape_unittest.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/string_escape.h" +#include "base/string_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace base { + +namespace { + +const struct json_narrow_test_data { + const char* to_escape; + const char* escaped; +} json_narrow_cases[] = { + {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteNarrow) { + for (size_t i = 0; i < arraysize(json_narrow_cases); ++i) { + std::string in = json_narrow_cases[i].to_escape; + std::string out; + JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_narrow_cases[i].escaped), out); + } + + std::string in = json_narrow_cases[0].to_escape; + std::string out; + JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); + + // now try with a NULL in the string + std::string null_prepend = "test"; + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_narrow_cases[0].escaped; + out.clear(); + JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); +} + +namespace { + +const struct json_wide_test_data { + const wchar_t* to_escape; + const char* escaped; +} json_wide_cases[] = { + {L"b\uffb1\u00ff", "b\\uFFB1\\u00FF"}, + {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, + {L"a\b\f\n\r\t\v\1\\.\"z", + "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, + {L"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, +}; + +} + +TEST(StringEscapeTest, JsonDoubleQuoteWide) { + + for (size_t i = 0; i < arraysize(json_wide_cases); ++i) { + std::string out; + string16 in = WideToUTF16(json_wide_cases[i].to_escape); + JsonDoubleQuote(in, false, &out); + EXPECT_EQ(std::string(json_wide_cases[i].escaped), out); + } + + string16 in = WideToUTF16(json_wide_cases[0].to_escape); + std::string out; + JsonDoubleQuote(in, false, &out); + + // test quoting + std::string out_quoted; + JsonDoubleQuote(in, true, &out_quoted); + EXPECT_EQ(out.length() + 2, out_quoted.length()); + EXPECT_EQ(out_quoted.find(out), 1U); + + // now try with a NULL in the string + string16 null_prepend = WideToUTF16(L"test"); + null_prepend.push_back(0); + in = null_prepend + in; + std::string expected = "test\\u0000"; + expected += json_wide_cases[0].escaped; + out.clear(); + JsonDoubleQuote(in, false, &out); + EXPECT_EQ(expected, out); +} + +} // namespace base |