diff options
author | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-16 15:23:30 +0000 |
---|---|---|
committer | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-16 15:23:30 +0000 |
commit | 6e680cfca66d0461f2824ccb5128e4f9cbb20bb6 (patch) | |
tree | 8810c22ec7ab357f5cc15cc508565d2a9872dcf5 /base | |
parent | 97570b99a4341b1d1d6512f5d82b43c3123eb927 (diff) | |
download | chromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.zip chromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.tar.gz chromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.tar.bz2 |
Rewrite base::JSONReader to be 35-40% faster, depending on the input string.
This change does the following:
* Parses the input string and generates the object representation in O(n) time.
* Optimizes string decoding by using StringPiece where possible, which also
introduces the JSON_DETACHABLE_CHILDREN parser option.
* Makes JSONReader a simpler interface by hiding the parser details in an
internal JSONParser class.
BUG=49212,111581,121469
TEST=Hopefully covered by all test suites. New tests added for edge cases.
Review URL: https://chromiumcodereview.appspot.com/10035042
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@137430 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/base.gyp | 1 | ||||
-rw-r--r-- | base/base.gypi | 2 | ||||
-rw-r--r-- | base/base_export.h | 9 | ||||
-rw-r--r-- | base/debug/trace_event_unittest.cc | 3 | ||||
-rw-r--r-- | base/json/json_parser.cc | 973 | ||||
-rw-r--r-- | base/json/json_parser.h | 273 | ||||
-rw-r--r-- | base/json/json_parser_unittest.cc | 293 | ||||
-rw-r--r-- | base/json/json_reader.cc | 680 | ||||
-rw-r--r-- | base/json/json_reader.h | 174 | ||||
-rw-r--r-- | base/json/json_reader_unittest.cc | 334 | ||||
-rw-r--r-- | base/string_util.cc | 2 | ||||
-rw-r--r-- | base/values.cc | 10 | ||||
-rw-r--r-- | base/values.h | 15 |
13 files changed, 1808 insertions, 961 deletions
diff --git a/base/base.gyp b/base/base.gyp index 52df286..99c826c 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -169,6 +169,7 @@ 'i18n/rtl_unittest.cc', 'i18n/string_search_unittest.cc', 'i18n/time_formatting_unittest.cc', + 'json/json_parser_unittest.cc', 'json/json_reader_unittest.cc', 'json/json_value_converter_unittest.cc', 'json/json_value_serializer_unittest.cc', diff --git a/base/base.gypi b/base/base.gypi index 897cabd..78e8a764 100644 --- a/base/base.gypi +++ b/base/base.gypi @@ -143,6 +143,8 @@ 'id_map.h', 'json/json_file_value_serializer.cc', 'json/json_file_value_serializer.h', + 'json/json_parser.cc', + 'json/json_parser.h', 'json/json_reader.cc', 'json/json_reader.h', 'json/json_string_value_serializer.cc', diff --git a/base/base_export.h b/base/base_export.h index 239360e..37bbc60 100644 --- a/base/base_export.h +++ b/base/base_export.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,20 +11,25 @@ #if defined(BASE_IMPLEMENTATION) #define BASE_EXPORT __declspec(dllexport) +#define BASE_EXPORT_PRIVATE __declspec(dllexport) #else #define BASE_EXPORT __declspec(dllimport) +#define BASE_EXPORT_PRIVATE __declspec(dllimport) #endif // defined(BASE_IMPLEMENTATION) #else // defined(WIN32) #if defined(BASE_IMPLEMENTATION) #define BASE_EXPORT __attribute__((visibility("default"))) +#define BASE_EXPORT_PRIVATE __attribute__((visibility("default"))) #else #define BASE_EXPORT -#endif +#define BASE_EXPORT_PRIVATE +#endif // defined(BASE_IMPLEMENTATION) #endif #else // defined(COMPONENT_BUILD) #define BASE_EXPORT +#define BASE_EXPORT_PRIVATE #endif #endif // BASE_BASE_EXPORT_H_ diff --git a/base/debug/trace_event_unittest.cc b/base/debug/trace_event_unittest.cc index 66dff0d..f839d27 100644 --- a/base/debug/trace_event_unittest.cc +++ b/base/debug/trace_event_unittest.cc @@ -99,7 +99,8 @@ void TraceEventTestFixture::OnTraceDataCollected( trace_buffer_.Finish(); scoped_ptr<Value> root; - root.reset(base::JSONReader::Read(json_output_.json_output)); + root.reset(base::JSONReader::Read(json_output_.json_output, + JSON_PARSE_RFC | JSON_DETACHABLE_CHILDREN)); if (!root.get()) { LOG(ERROR) << json_output_.json_output; diff --git a/base/json/json_parser.cc b/base/json/json_parser.cc new file mode 100644 index 0000000..766c764 --- /dev/null +++ b/base/json/json_parser.cc @@ -0,0 +1,973 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/json_parser.h" + +#include "base/float_util.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/string_number_conversions.h" +#include "base/string_util.h" +#include "base/stringprintf.h" +#include "base/third_party/icu/icu_utf.h" +#include "base/utf_string_conversion_utils.h" +#include "base/utf_string_conversions.h" +#include "base/values.h" + +namespace base { +namespace internal { + +namespace { + +const int kStackMaxDepth = 100; + +const int32 kExtendedASCIIStart = 0x80; + +// This and the class below are used to own the JSON input string for when +// string tokens are stored as StringPiece instead of std::string. This +// optimization avoids about 2/3rds of string memory copies. The constructor +// takes the input string and swaps its data into the new instance. The real +// root value is also Swap()ed into the new instance. +class DictionaryHiddenRootValue : public base::DictionaryValue { + public: + DictionaryHiddenRootValue(std::string* json, Value* root) { + DCHECK(root->IsType(Value::TYPE_DICTIONARY)); + DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); + json->swap(json_); + } + + virtual void Swap(DictionaryValue* other) OVERRIDE { + DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; + + // First deep copy to convert JSONStringValue to std::string and swap that + // copy with |other|, which contains the new contents of |this|. + scoped_ptr<base::DictionaryValue> copy(DeepCopy()); + copy->Swap(other); + + // Then erase the contents of the current dictionary and swap in the + // new contents, originally from |other|. + Clear(); + json_.clear(); + DictionaryValue::Swap(copy.get()); + } + + // Not overriding DictionaryValue::Remove because it just calls through to + // the method below. + + virtual bool RemoveWithoutPathExpansion(const std::string& key, + Value** out) OVERRIDE { + // If the caller won't take ownership of the removed value, just call up. + if (!out) + return DictionaryValue::RemoveWithoutPathExpansion(key, out); + + DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; + + // Otherwise, remove the value while its still "owned" by this and copy it + // to convert any JSONStringValues to std::string. + Value* out_owned = NULL; + if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) + return false; + + *out = out_owned->DeepCopy(); + delete out_owned; + + return true; + } + + private: + std::string json_; + + DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); +}; + +class ListHiddenRootValue : public base::ListValue { + public: + ListHiddenRootValue(std::string* json, Value* root) { + DCHECK(root->IsType(Value::TYPE_LIST)); + ListValue::Swap(static_cast<ListValue*>(root)); + json->swap(json_); + } + + virtual void Swap(ListValue* other) OVERRIDE { + DVLOG(1) << "Swap()ing a ListValue inefficiently."; + + // First deep copy to convert JSONStringValue to std::string and swap that + // copy with |other|, which contains the new contents of |this|. + scoped_ptr<base::ListValue> copy(DeepCopy()); + copy->Swap(other); + + // Then erase the contents of the current list and swap in the new contents, + // originally from |other|. + Clear(); + json_.clear(); + ListValue::Swap(copy.get()); + } + + virtual bool Remove(size_t index, Value** out) OVERRIDE { + // If the caller won't take ownership of the removed value, just call up. + if (!out) + return ListValue::Remove(index, out); + + DVLOG(1) << "Remove()ing from a ListValue inefficiently."; + + // Otherwise, remove the value while its still "owned" by this and copy it + // to convert any JSONStringValues to std::string. + Value* out_owned = NULL; + if (!ListValue::Remove(index, &out_owned)) + return false; + + *out = out_owned->DeepCopy(); + delete out_owned; + + return true; + } + + private: + std::string json_; + + DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); +}; + +// A variant on StringValue that uses StringPiece instead of copying the string +// into the Value. This can only be stored in a child of hidden root (above), +// otherwise the referenced string will not be guaranteed to outlive it. +class JSONStringValue : public base::Value { + public: + explicit JSONStringValue(const base::StringPiece& piece) + : Value(TYPE_STRING), + string_piece_(piece) { + } + + // Value: + bool GetAsString(std::string* out_value) const OVERRIDE { + string_piece_.CopyToString(out_value); + return true; + } + bool GetAsString(string16* out_value) const OVERRIDE { + *out_value = UTF8ToUTF16(string_piece_); + return true; + } + virtual Value* DeepCopy() const OVERRIDE { + return Value::CreateStringValue(string_piece_.as_string()); + } + virtual bool Equals(const Value* other) const OVERRIDE { + std::string other_string; + return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && + StringPiece(other_string) == string_piece_; + } + + private: + // The location in the original input stream. + base::StringPiece string_piece_; + + DISALLOW_COPY_AND_ASSIGN(JSONStringValue); +}; + +// Simple class that checks for maximum recursion/"stack overflow." +class StackMarker { + public: + explicit StackMarker(int* depth) : depth_(depth) { + ++(*depth_); + DCHECK_LE(*depth_, kStackMaxDepth); + } + ~StackMarker() { + --(*depth_); + } + + bool IsTooDeep() const { + return *depth_ >= kStackMaxDepth; + } + + private: + int* const depth_; + + DISALLOW_COPY_AND_ASSIGN(StackMarker); +}; + +} // namespace + +JSONParser::JSONParser(int options) + : options_(options), + start_pos_(NULL), + pos_(NULL), + end_pos_(NULL), + index_(0), + stack_depth_(0), + line_number_(0), + index_last_line_(0), + error_code_(JSONReader::JSON_NO_ERROR), + error_line_(0), + error_column_(0) { +} + +JSONParser::~JSONParser() { +} + +Value* JSONParser::Parse(const std::string& input) { + // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix + // <http://crbug.com/126107> when my Windows box arrives. +#if defined(OS_WIN) + options_ |= JSON_DETACHABLE_CHILDREN; +#endif + + std::string input_copy; + // If the children of a JSON root can be detached, then hidden roots cannot + // be used, so do not bother copying the input because StringPiece will not + // be used anywhere. + if (!(options_ & JSON_DETACHABLE_CHILDREN)) { + input_copy = input; + start_pos_ = input_copy.data(); + } else { + start_pos_ = input.data(); + } + pos_ = start_pos_; + end_pos_ = start_pos_ + input.length(); + index_ = 0; + line_number_ = 1; + index_last_line_ = 0; + + error_code_ = JSONReader::JSON_NO_ERROR; + error_line_ = 0; + error_column_ = 0; + + // When the input JSON string starts with a UTF-8 Byte-Order-Mark + // <0xEF 0xBB 0xBF>, advance the start position to avoid the + // ParseNextToken function mis-treating a Unicode BOM as an invalid + // character and returning NULL. + if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && + static_cast<uint8>(*(pos_ + 1)) == 0xBB && + static_cast<uint8>(*(pos_ + 2)) == 0xBF) { + NextNChars(3); + } + + // Parse the first and any nested tokens. + scoped_ptr<Value> root(ParseNextToken()); + if (!root.get()) + return NULL; + + // Make sure the input stream is at an end. + if (GetNextToken() != T_END_OF_INPUT) { + if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { + ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); + return NULL; + } + } + + // Dictionaries and lists can contain JSONStringValues, so wrap them in a + // hidden root. + if (!(options_ & JSON_DETACHABLE_CHILDREN)) { + if (root->IsType(Value::TYPE_DICTIONARY)) { + return new DictionaryHiddenRootValue(&input_copy, root.release()); + } else if (root->IsType(Value::TYPE_LIST)) { + return new ListHiddenRootValue(&input_copy, root.release()); + } else if (root->IsType(Value::TYPE_STRING)) { + // A string type could be a JSONStringValue, but because there's no + // corresponding HiddenRootValue, the memory will be lost. Deep copy to + // preserve it. + return root->DeepCopy(); + } + } + + // All other values can be returned directly. + return root.release(); +} + +JSONReader::JsonParseError JSONParser::error_code() const { + return error_code_; +} + +std::string JSONParser::GetErrorMessage() const { + return FormatErrorMessage(error_line_, error_column_, + JSONReader::ErrorCodeToString(error_code_)); +} + +// StringBuilder /////////////////////////////////////////////////////////////// + +JSONParser::StringBuilder::StringBuilder() + : pos_(NULL), + length_(0), + string_(NULL) { +} + +JSONParser::StringBuilder::StringBuilder(const char* pos) + : pos_(pos), + length_(0), + string_(NULL) { +} + +void JSONParser::StringBuilder::Swap(StringBuilder* other) { + std::swap(other->string_, string_); + std::swap(other->pos_, pos_); + std::swap(other->length_, length_); +} + +JSONParser::StringBuilder::~StringBuilder() { + delete string_; +} + +void JSONParser::StringBuilder::Append(const char& c) { + DCHECK_GE(c, 0); + DCHECK_LT(c, 128); + + if (string_) + string_->push_back(c); + else + ++length_; +} + +void JSONParser::StringBuilder::AppendString(const std::string& str) { + DCHECK(string_); + string_->append(str); +} + +void JSONParser::StringBuilder::Convert() { + if (string_) + return; + string_ = new std::string(pos_, length_); +} + +bool JSONParser::StringBuilder::CanBeStringPiece() const { + return !string_; +} + +StringPiece JSONParser::StringBuilder::AsStringPiece() { + if (string_) + return StringPiece(); + return StringPiece(pos_, length_); +} + +const std::string& JSONParser::StringBuilder::AsString() { + if (!string_) + Convert(); + return *string_; +} + +// JSONParser private ////////////////////////////////////////////////////////// + +inline bool JSONParser::CanConsume(int length) { + return pos_ + length <= end_pos_; +} + +const char* JSONParser::NextChar() { + DCHECK(CanConsume(1)); + ++index_; + ++pos_; + return pos_; +} + +void JSONParser::NextNChars(int n) { + DCHECK(CanConsume(n)); + index_ += n; + pos_ += n; +} + +JSONParser::Token JSONParser::GetNextToken() { + EatWhitespaceAndComments(); + if (!CanConsume(1)) + return T_END_OF_INPUT; + + switch (*pos_) { + case '{': + return T_OBJECT_BEGIN; + case '}': + return T_OBJECT_END; + case '[': + return T_ARRAY_BEGIN; + case ']': + return T_ARRAY_END; + case '"': + return T_STRING; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + return T_NUMBER; + case 't': + return T_BOOL_TRUE; + case 'f': + return T_BOOL_FALSE; + case 'n': + return T_NULL; + case ',': + return T_LIST_SEPARATOR; + case ':': + return T_OBJECT_PAIR_SEPARATOR; + default: + return T_INVALID_TOKEN; + } +} + +void JSONParser::EatWhitespaceAndComments() { + while (pos_ < end_pos_) { + switch (*pos_) { + case '\r': + case '\n': + index_last_line_ = index_; + ++line_number_; + // Fall through. + case ' ': + case '\t': + NextChar(); + break; + case '/': + if (!EatComment()) + return; + break; + default: + return; + } + } +} + +bool JSONParser::EatComment() { + if (*pos_ != '/' || !CanConsume(1)) + return false; + + char next_char = *NextChar(); + if (next_char == '/') { + // Single line comment, read to newline. + while (CanConsume(1)) { + char next_char = *NextChar(); + if (next_char == '\n' || next_char == '\r') + return true; + } + } else if (next_char == '*') { + // Block comment, read until end marker. + while (CanConsume(2)) { + if (*NextChar() == '*' && *NextChar() == '/') { + // EatWhitespaceAndComments will inspect pos_, which will still be on + // the last / of the comment, so advance once more (which may also be + // end of input). + NextChar(); + return true; + } + } + + // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. + } + + return false; +} + +Value* JSONParser::ParseNextToken() { + return ParseToken(GetNextToken()); +} + +Value* JSONParser::ParseToken(Token token) { + switch (token) { + case T_OBJECT_BEGIN: + return ConsumeDictionary(); + case T_ARRAY_BEGIN: + return ConsumeList(); + case T_STRING: + return ConsumeString(); + case T_NUMBER: + return ConsumeNumber(); + case T_BOOL_TRUE: + case T_BOOL_FALSE: + case T_NULL: + return ConsumeLiteral(); + default: + ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); + return NULL; + } +} + +Value* JSONParser::ConsumeDictionary() { + if (*pos_ != '{') { + ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); + return NULL; + } + + StackMarker depth_check(&stack_depth_); + if (depth_check.IsTooDeep()) { + ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); + return NULL; + } + + scoped_ptr<DictionaryValue> dict(new DictionaryValue); + + NextChar(); + Token token = GetNextToken(); + while (token != T_OBJECT_END) { + if (token != T_STRING) { + ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); + return NULL; + } + + // First consume the key. + StringBuilder key; + if (!ConsumeStringRaw(&key)) { + return NULL; + } + + // Read the separator. + NextChar(); + token = GetNextToken(); + if (token != T_OBJECT_PAIR_SEPARATOR) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + + // The next token is the value. Ownership transfers to |dict|. + NextChar(); + Value* value = ParseNextToken(); + if (!value) { + // ReportError from deeper level. + return NULL; + } + + dict->SetWithoutPathExpansion(key.AsString(), value); + + NextChar(); + token = GetNextToken(); + if (token == T_LIST_SEPARATOR) { + NextChar(); + token = GetNextToken(); + if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { + ReportError(JSONReader::JSON_TRAILING_COMMA, 1); + return NULL; + } + } else if (token != T_OBJECT_END) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); + return NULL; + } + } + + if (token != T_OBJECT_END) + return NULL; + + return dict.release(); +} + +Value* JSONParser::ConsumeList() { + if (*pos_ != '[') { + ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); + return NULL; + } + + StackMarker depth_check(&stack_depth_); + if (depth_check.IsTooDeep()) { + ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); + return NULL; + } + + scoped_ptr<ListValue> list(new ListValue); + + NextChar(); + Token token = GetNextToken(); + while (token != T_ARRAY_END) { + Value* item = ParseToken(token); + if (!item) { + // ReportError from deeper level. + return NULL; + } + + list->Append(item); + + NextChar(); + token = GetNextToken(); + if (token == T_LIST_SEPARATOR) { + NextChar(); + token = GetNextToken(); + if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { + ReportError(JSONReader::JSON_TRAILING_COMMA, 1); + return NULL; + } + } else if (token != T_ARRAY_END) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + } + + if (token != T_ARRAY_END) + return NULL; + + return list.release(); +} + +Value* JSONParser::ConsumeString() { + StringBuilder string; + if (!ConsumeStringRaw(&string)) + return NULL; + + // Create the Value representation, using a hidden root, if configured + // to do so, and if the string can be represented by StringPiece. + if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { + return new JSONStringValue(string.AsStringPiece()); + } else { + if (string.CanBeStringPiece()) + string.Convert(); + return new StringValue(string.AsString()); + } +} + +bool JSONParser::ConsumeStringRaw(StringBuilder* out) { + if (*pos_ != '"') { + ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); + return false; + } + + // StringBuilder will internally build a StringPiece unless a UTF-16 + // conversion occurs, at which point it will perform a copy into a + // std::string. + StringBuilder string(NextChar()); + + int length = end_pos_ - start_pos_; + int32 next_char = 0; + + while (CanConsume(1)) { + pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. + CBU8_NEXT(start_pos_, index_, length, next_char); + if (next_char < 0 || !IsValidCharacter(next_char)) { + ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); + return false; + } + + // If this character is an escape sequence... + if (next_char == '\\') { + // The input string will be adjusted (either by combining the two + // characters of an encoded escape sequence, or with a UTF conversion), + // so using StringPiece isn't possible -- force a conversion. + string.Convert(); + + if (!CanConsume(1)) { + ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); + return false; + } + + switch (*NextChar()) { + // Allowed esape sequences: + case 'x': { // UTF-8 sequence. + // UTF-8 \x escape sequences are not allowed in the spec, but they + // are supported here for backwards-compatiblity with the old parser. + if (!CanConsume(2)) { + ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); + return false; + } + + int hex_digit = 0; + if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { + ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); + return false; + } + NextChar(); + + if (hex_digit < kExtendedASCIIStart) + string.Append(hex_digit); + else + DecodeUTF8(hex_digit, &string); + break; + } + case 'u': { // UTF-16 sequence. + // UTF units are of the form \uXXXX. + if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. + ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); + return false; + } + + // Skip the 'u'. + NextChar(); + + std::string utf8_units; + if (!DecodeUTF16(&utf8_units)) { + ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); + return false; + } + + string.AppendString(utf8_units); + break; + } + case '"': + string.Append('"'); + break; + case '\\': + string.Append('\\'); + break; + case '/': + string.Append('/'); + break; + case 'b': + string.Append('\b'); + break; + case 'f': + string.Append('\f'); + break; + case 'n': + string.Append('\n'); + break; + case 'r': + string.Append('\r'); + break; + case 't': + string.Append('\t'); + break; + case 'v': // Not listed as valid escape sequence in the RFC. + string.Append('\v'); + break; + // All other escape squences are illegal. + default: + ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); + return false; + } + } else if (next_char == '"') { + --index_; // Rewind by one because of CBU8_NEXT. + out->Swap(&string); + return true; + } else { + if (next_char < kExtendedASCIIStart) + string.Append(next_char); + else + DecodeUTF8(next_char, &string); + } + } + + ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); + return false; +} + +// Entry is at the first X in \uXXXX. +bool JSONParser::DecodeUTF16(std::string* dest_string) { + if (!CanConsume(4)) + return false; + + // This is a 32-bit field because the shift operations in the + // conversion process below cause MSVC to error about "data loss." + // This only stores UTF-16 code units, though. + // Consume the UTF-16 code unit, which may be a high surrogate. + int code_unit16_high = 0; + if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) + return false; + + // Only add 3, not 4, because at the end of this iteration, the parser has + // finished working with the last digit of the UTF sequence, meaning that + // the next iteration will advance to the next byte. + NextNChars(3); + + // Used to convert the UTF-16 code units to a code point and then to a UTF-8 + // code unit sequence. + char code_unit8[8] = { 0 }; + size_t offset = 0; + + // If this is a high surrogate, consume the next code unit to get the + // low surrogate. + if (CBU16_IS_SURROGATE(code_unit16_high)) { + // Make sure this is the high surrogate. If not, it's an encoding + // error. + if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) + return false; + + // Make sure that the token has more characters to consume the + // lower surrogate. + if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. + return false; + if (*NextChar() != '\\' || *NextChar() != 'u') + return false; + + NextChar(); // Read past 'u'. + int code_unit16_low = 0; + if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) + return false; + + NextNChars(3); + + if (!CBU16_IS_TRAIL(code_unit16_low)) { + return false; + } + + uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high, + code_unit16_low); + offset = 0; + CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); + } else { + // Not a surrogate. + DCHECK(CBU16_IS_SINGLE(code_unit16_high)); + CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); + } + + dest_string->append(code_unit8); + return true; +} + +void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { + // Anything outside of the basic ASCII plane will need to be decoded from + // int32 to a multi-byte sequence. + if (point < kExtendedASCIIStart) { + dest->Append(point); + } else { + char utf8_units[4] = { 0 }; + int offset = 0; + CBU8_APPEND_UNSAFE(utf8_units, offset, point); + dest->Convert(); + dest->AppendString(utf8_units); + } +} + +Value* JSONParser::ConsumeNumber() { + const char* num_start = pos_; + const int start_index = index_; + int end_index = start_index; + + if (*pos_ == '-') + NextChar(); + + if (!ReadInt(false)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + end_index = index_; + + // The optional fraction part. + if (*pos_ == '.') { + if (!CanConsume(1)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + NextChar(); + if (!ReadInt(true)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + end_index = index_; + } + + // Optional exponent part. + if (*pos_ == 'e' || *pos_ == 'E') { + NextChar(); + if (*pos_ == '-' || *pos_ == '+') + NextChar(); + if (!ReadInt(true)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + end_index = index_; + } + + // ReadInt is greedy because numbers have no easily detectable sentinel, + // so save off where the parser should be on exit (see Consume invariant at + // the top of the header), then make sure the next token is one which is + // valid. + const char* exit_pos = pos_ - 1; + int exit_index = index_ - 1; + + switch (GetNextToken()) { + case T_OBJECT_END: + case T_ARRAY_END: + case T_LIST_SEPARATOR: + case T_END_OF_INPUT: + break; + default: + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + + pos_ = exit_pos; + index_ = exit_index; + + StringPiece num_string(num_start, end_index - start_index); + + int num_int; + if (StringToInt(num_string, &num_int)) + return Value::CreateIntegerValue(num_int); + + double num_double; + if (base::StringToDouble(num_string.as_string(), &num_double) && + IsFinite(num_double)) { + return Value::CreateDoubleValue(num_double); + } + + return NULL; +} + +bool JSONParser::ReadInt(bool allow_leading_zeros) { + char first = *pos_; + int len = 0; + + char c = first; + while (CanConsume(1) && IsAsciiDigit(c)) { + c = *NextChar(); + ++len; + } + + if (len == 0) + return false; + + if (!allow_leading_zeros && len > 1 && first == '0') + return false; + + return true; +} + +Value* JSONParser::ConsumeLiteral() { + switch (*pos_) { + case 't': { + const char* kTrueLiteral = "true"; + const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); + if (!CanConsume(kTrueLen - 1) || + !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + NextNChars(kTrueLen - 1); + return Value::CreateBooleanValue(true); + } + case 'f': { + const char* kFalseLiteral = "false"; + const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); + if (!CanConsume(kFalseLen - 1) || + !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + NextNChars(kFalseLen - 1); + return Value::CreateBooleanValue(false); + } + case 'n': { + const char* kNullLiteral = "null"; + const int kNullLen = static_cast<int>(strlen(kNullLiteral)); + if (!CanConsume(kNullLen - 1) || + !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { + ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); + return NULL; + } + NextNChars(kNullLen - 1); + return Value::CreateNullValue(); + } + default: + ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); + return NULL; + } +} + +// static +bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { + return strncmp(one, two, len) == 0; +} + +void JSONParser::ReportError(JSONReader::JsonParseError code, + int column_adjust) { + error_code_ = code; + error_line_ = line_number_; + error_column_ = index_ - index_last_line_ + column_adjust; +} + +// static +std::string JSONParser::FormatErrorMessage(int line, int column, + const std::string& description) { + if (line || column) { + return StringPrintf("Line: %i, column: %i, %s", + line, column, description.c_str()); + } + return description; +} + +} // namespace internal +} // namespace base diff --git a/base/json/json_parser.h b/base/json/json_parser.h new file mode 100644 index 0000000..901e679 --- /dev/null +++ b/base/json/json_parser.h @@ -0,0 +1,273 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_JSON_JSON_PARSER_H_ +#define BASE_JSON_JSON_PARSER_H_ +#pragma once + +#include <string> + +#include "base/base_export.h" +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/json/json_reader.h" +#include "base/string_piece.h" + +#if !defined(OS_CHROMEOS) +#include "base/gtest_prod_util.h" +#endif + +namespace base { +class Value; +} + +#if defined(OS_CHROMEOS) +// Chromium and Chromium OS check out gtest to different places, so this is +// unable to compile on both if gtest_prod.h is included here. Instead, include +// its only contents -- this will need to be updated if the macro ever changes. +#define FRIEND_TEST(test_case_name, test_name)\ +friend class test_case_name##_##test_name##_Test + +#define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ + FRIEND_TEST(test_case_name, test_name); \ + FRIEND_TEST(test_case_name, DISABLED_##test_name); \ + FRIEND_TEST(test_case_name, FLAKY_##test_name); \ + FRIEND_TEST(test_case_name, FAILS_##test_name) +#endif // OS_CHROMEOS + +namespace base { +namespace internal { + +class JSONParserTest; + +// The implementation behind the JSONReader interface. This class is not meant +// to be used directly; it encapsulates logic that need not be exposed publicly. +// +// This parser guarantees O(n) time through the input string. It also optimizes +// base::StringValue by using StringPiece where possible when returning Value +// objects by using "hidden roots," discussed in the implementation. +// +// Iteration happens on the byte level, with the functions CanConsume and +// NextChar. The conversion from byte to JSON token happens without advancing +// the parser in GetNextToken/ParseToken, that is tokenization operates on +// the current parser position without advancing. +// +// Built on top of these are a family of Consume functions that iterate +// internally. Invariant: on entry of a Consume function, the parser is wound +// to the first byte of a valid JSON token. On exit, it is on the last byte +// of a token, such that the next iteration of the parser will be at the byte +// immediately following the token, which would likely be the first byte of the +// next token. +class BASE_EXPORT_PRIVATE JSONParser { + public: + explicit JSONParser(int options); + ~JSONParser(); + + // Parses the input string according to the set options and returns the + // result as a Value owned by the caller. + Value* Parse(const std::string& input); + + // Returns the error code. + JSONReader::JsonParseError error_code() const; + + // Returns the human-friendly error message. + std::string GetErrorMessage() const; + + private: + enum Token { + T_OBJECT_BEGIN, // { + T_OBJECT_END, // } + T_ARRAY_BEGIN, // [ + T_ARRAY_END, // ] + T_STRING, + T_NUMBER, + T_BOOL_TRUE, // true + T_BOOL_FALSE, // false + T_NULL, // null + T_LIST_SEPARATOR, // , + T_OBJECT_PAIR_SEPARATOR, // : + T_END_OF_INPUT, + T_INVALID_TOKEN, + }; + + // A helper class used for parsing strings. One optimization performed is to + // create base::Value with a StringPiece to avoid unnecessary std::string + // copies. This is not possible if the input string needs to be decoded from + // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. + // This class centralizes that logic. + class StringBuilder { + public: + // Empty constructor. Used for creating a builder with which to Swap(). + StringBuilder(); + + // |pos| is the beginning of an input string, excluding the |"|. + explicit StringBuilder(const char* pos); + + ~StringBuilder(); + + // Swaps the contents of |other| with this. + void Swap(StringBuilder* other); + + // Either increases the |length_| of the string or copies the character if + // the StringBuilder has been converted. |c| must be in the basic ASCII + // plane; all other characters need to be in UTF-8 units, appended with + // AppendString below. + void Append(const char& c); + + // Appends a string to the std::string. Must be Convert()ed to use. + void AppendString(const std::string& str); + + // Converts the builder from its default StringPiece to a full std::string, + // performing a copy. Once a builder is converted, it cannot be made a + // StringPiece again. + void Convert(); + + // Returns whether the builder can be converted to a StringPiece. + bool CanBeStringPiece() const; + + // Returns the StringPiece representation. Returns an empty piece if it + // cannot be converted. + StringPiece AsStringPiece(); + + // Returns the builder as a std::string. + const std::string& AsString(); + + private: + // The beginning of the input string. + const char* pos_; + + // Number of bytes in |pos_| that make up the string being built. + size_t length_; + + // The copied string representation. NULL until Convert() is called. + // Strong. scoped_ptr<T> has too much of an overhead here. + std::string* string_; + }; + + // Quick check that the stream has capacity to consume |length| more bytes. + bool CanConsume(int length); + + // The basic way to consume a single character in the stream. Consumes one + // byte of the input stream and returns a pointer to the rest of it. + const char* NextChar(); + + // Performs the equivalent of NextChar N times. + void NextNChars(int n); + + // Skips over whitespace and comments to find the next token in the stream. + // This does not advance the parser for non-whitespace or comment chars. + Token GetNextToken(); + + // Consumes whitespace characters and comments until the next non-that is + // encountered. + void EatWhitespaceAndComments(); + // Helper function that consumes a comment, assuming that the parser is + // currently wound to a '/'. + bool EatComment(); + + // Calls GetNextToken() and then ParseToken(). Caller owns the result. + Value* ParseNextToken(); + + // Takes a token that represents the start of a Value ("a structural token" + // in RFC terms) and consumes it, returning the result as an object the + // caller owns. + Value* ParseToken(Token token); + + // Assuming that the parser is currently wound to '{', this parses a JSON + // object into a DictionaryValue. + Value* ConsumeDictionary(); + + // Assuming that the parser is wound to '[', this parses a JSON list into a + // ListValue. + Value* ConsumeList(); + + // Calls through ConsumeStringRaw and wraps it in a value. + Value* ConsumeString(); + + // Assuming that the parser is wound to a double quote, this parses a string, + // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on + // success and Swap()s the result into |out|. Returns false on failure with + // error information set. + bool ConsumeStringRaw(StringBuilder* out); + // Helper function for ConsumeStringRaw() that consumes the next four or 10 + // bytes (parser is wound to the first character of a HEX sequence, with the + // potential for consuming another \uXXXX for a surrogate). Returns true on + // success and places the UTF8 code units in |dest_string|, and false on + // failure. + bool DecodeUTF16(std::string* dest_string); + // Helper function for ConsumeStringRaw() that takes a single code point, + // decodes it into UTF-8 units, and appends it to the given builder. The + // point must be valid. + void DecodeUTF8(const int32& point, StringBuilder* dest); + + // Assuming that the parser is wound to the start of a valid JSON number, + // this parses and converts it to either an int or double value. + Value* ConsumeNumber(); + // Helper that reads characters that are ints. Returns true if a number was + // read and false on error. + bool ReadInt(bool allow_leading_zeros); + + // Consumes the literal values of |true|, |false|, and |null|, assuming the + // parser is wound to the first character of any of those. + Value* ConsumeLiteral(); + + // Compares two string buffers of a given length. + static bool StringsAreEqual(const char* left, const char* right, size_t len); + + // Sets the error information to |code| at the current column, based on + // |index_| and |index_last_line_|, with an optional positive/negative + // adjustment by |column_adjust|. + void ReportError(JSONReader::JsonParseError code, int column_adjust); + + // Given the line and column number of an error, formats one of the error + // message contants from json_reader.h for human display. + static std::string FormatErrorMessage(int line, int column, + const std::string& description); + + // base::JSONParserOptions that control parsing. + int options_; + + // Pointer to the start of the input data. + const char* start_pos_; + + // Pointer to the current position in the input data. Equivalent to + // |start_pos_ + index_|. + const char* pos_; + + // Pointer to the last character of the input data. + const char* end_pos_; + + // The index in the input stream to which the parser is wound. + int index_; + + // The number of times the parser has recursed (current stack depth). + int stack_depth_; + + // The line number that the parser is at currently. + int line_number_; + + // The last value of |index_| on the previous line. + int index_last_line_; + + // Error information. + JSONReader::JsonParseError error_code_; + int error_line_; + int error_column_; + + friend class JSONParserTest; + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); + + DISALLOW_COPY_AND_ASSIGN(JSONParser); +}; + +} // namespace internal +} // namespace base + +#endif // BASE_JSON_JSON_PARSER_H_ diff --git a/base/json/json_parser_unittest.cc b/base/json/json_parser_unittest.cc new file mode 100644 index 0000000..206ef4c --- /dev/null +++ b/base/json/json_parser_unittest.cc @@ -0,0 +1,293 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/json/json_parser.h" + +#include "base/json/json_reader.h" +#include "base/memory/scoped_ptr.h" +#include "base/values.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace base { +namespace internal { + +class JSONParserTest : public testing::Test { + public: + JSONParser* NewTestParser(const std::string& input) { + JSONParser* parser = new JSONParser(JSON_PARSE_RFC); + parser->start_pos_ = input.data(); + parser->pos_ = parser->start_pos_; + parser->end_pos_ = parser->start_pos_ + input.length(); + return parser; + } + + void TestLastThree(JSONParser* parser) { + EXPECT_EQ(',', *parser->NextChar()); + EXPECT_EQ('|', *parser->NextChar()); + EXPECT_EQ('\0', *parser->NextChar()); + EXPECT_EQ(parser->end_pos_, parser->pos_); + } +}; + +TEST_F(JSONParserTest, NextChar) { + std::string input("Hello world"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + + EXPECT_EQ('H', *parser->pos_); + for (size_t i = 1; i < input.length(); ++i) { + EXPECT_EQ(input[i], *parser->NextChar()); + } + EXPECT_EQ(parser->end_pos_, parser->NextChar()); +} + +TEST_F(JSONParserTest, ConsumeString) { + std::string input("\"test\",|"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + scoped_ptr<Value> value(parser->ConsumeString()); + EXPECT_EQ('"', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + std::string str; + EXPECT_TRUE(value->GetAsString(&str)); + EXPECT_EQ("test", str); +} + +TEST_F(JSONParserTest, ConsumeList) { + std::string input("[true, false],|"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + scoped_ptr<Value> value(parser->ConsumeList()); + EXPECT_EQ(']', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + base::ListValue* list; + EXPECT_TRUE(value->GetAsList(&list)); + EXPECT_EQ(2u, list->GetSize()); +} + +TEST_F(JSONParserTest, ConsumeDictionary) { + std::string input("{\"abc\":\"def\"},|"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + scoped_ptr<Value> value(parser->ConsumeDictionary()); + EXPECT_EQ('}', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + base::DictionaryValue* dict; + EXPECT_TRUE(value->GetAsDictionary(&dict)); + std::string str; + EXPECT_TRUE(dict->GetString("abc", &str)); + EXPECT_EQ("def", str); +} + +TEST_F(JSONParserTest, ConsumeLiterals) { + // Literal |true|. + std::string input("true,|"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + scoped_ptr<Value> value(parser->ConsumeLiteral()); + EXPECT_EQ('e', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + bool bool_value = false; + EXPECT_TRUE(value->GetAsBoolean(&bool_value)); + EXPECT_TRUE(bool_value); + + // Literal |false|. + input = "false,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeLiteral()); + EXPECT_EQ('e', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->GetAsBoolean(&bool_value)); + EXPECT_FALSE(bool_value); + + // Literal |null|. + input = "null,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeLiteral()); + EXPECT_EQ('l', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->IsType(Value::TYPE_NULL)); +} + +TEST_F(JSONParserTest, ConsumeNumbers) { + // Integer. + std::string input("1234,|"); + scoped_ptr<JSONParser> parser(NewTestParser(input)); + scoped_ptr<Value> value(parser->ConsumeNumber()); + EXPECT_EQ('4', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + int number_i; + EXPECT_TRUE(value->GetAsInteger(&number_i)); + EXPECT_EQ(1234, number_i); + + // Negative integer. + input = "-1234,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeNumber()); + EXPECT_EQ('4', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->GetAsInteger(&number_i)); + EXPECT_EQ(-1234, number_i); + + // Double. + input = "12.34,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeNumber()); + EXPECT_EQ('4', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + double number_d; + EXPECT_TRUE(value->GetAsDouble(&number_d)); + EXPECT_EQ(12.34, number_d); + + // Scientific. + input = "42e3,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeNumber()); + EXPECT_EQ('3', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->GetAsDouble(&number_d)); + EXPECT_EQ(42000, number_d); + + // Negative scientific. + input = "314159e-5,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeNumber()); + EXPECT_EQ('5', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->GetAsDouble(&number_d)); + EXPECT_EQ(3.14159, number_d); + + // Positive scientific. + input = "0.42e+3,|"; + parser.reset(NewTestParser(input)); + value.reset(parser->ConsumeNumber()); + EXPECT_EQ('3', *parser->pos_); + + TestLastThree(parser.get()); + + ASSERT_TRUE(value.get()); + EXPECT_TRUE(value->GetAsDouble(&number_d)); + EXPECT_EQ(420, number_d); +} + +TEST_F(JSONParserTest, ErrorMessages) { + // Error strings should not be modified in case of success. + std::string error_message; + int error_code = 0; + scoped_ptr<Value> root; + root.reset(JSONReader::ReadAndReturnError("[42]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_TRUE(error_message.empty()); + EXPECT_EQ(0, error_code); + + // Test line and column counting + const char* big_json = "[\n0,\n1,\n2,\n3,4,5,6 7,\n8,\n9\n]"; + // error here ---------------------------------^ + root.reset(JSONReader::ReadAndReturnError(big_json, JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(5, 10, JSONReader::kSyntaxError), + error_message); + EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code); + + // Test each of the error conditions + root.reset(JSONReader::ReadAndReturnError("{},{}", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 3, + JSONReader::kUnexpectedDataAfterRoot), error_message); + EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, error_code); + + std::string nested_json; + for (int i = 0; i < 101; ++i) { + nested_json.insert(nested_json.begin(), '['); + nested_json.append(1, ']'); + } + root.reset(JSONReader::ReadAndReturnError(nested_json, JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 100, JSONReader::kTooMuchNesting), + error_message); + EXPECT_EQ(JSONReader::JSON_TOO_MUCH_NESTING, error_code); + + root.reset(JSONReader::ReadAndReturnError("[1,]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 4, JSONReader::kTrailingComma), + error_message); + EXPECT_EQ(JSONReader::JSON_TRAILING_COMMA, error_code); + + root.reset(JSONReader::ReadAndReturnError("{foo:\"bar\"}", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 2, + JSONReader::kUnquotedDictionaryKey), error_message); + EXPECT_EQ(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, error_code); + + root.reset(JSONReader::ReadAndReturnError("{\"foo\":\"bar\",}", + JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 14, JSONReader::kTrailingComma), + error_message); + + root.reset(JSONReader::ReadAndReturnError("[nu]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 2, JSONReader::kSyntaxError), + error_message); + EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\xq\"]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\uq\"]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); + + root.reset(JSONReader::ReadAndReturnError("[\"xxx\\q\"]", JSON_PARSE_RFC, + &error_code, &error_message)); + EXPECT_FALSE(root.get()); + EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), + error_message); + EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); +} + +} // namespace internal +} // namespace base diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc index 3457478..fb1459b 100644 --- a/base/json/json_reader.cc +++ b/base/json/json_reader.cc @@ -4,73 +4,17 @@ #include "base/json/json_reader.h" -#include "base/float_util.h" +#include "base/json/json_parser.h" #include "base/logging.h" -#include "base/memory/scoped_ptr.h" -#include "base/stringprintf.h" -#include "base/string_number_conversions.h" -#include "base/string_piece.h" -#include "base/string_util.h" -#include "base/third_party/icu/icu_utf.h" -#include "base/utf_string_conversions.h" -#include "base/values.h" - -namespace { - -const char kNullString[] = "null"; -const char kTrueString[] = "true"; -const char kFalseString[] = "false"; - -const int kStackLimit = 100; - -// A helper method for ParseNumberToken. It reads an int from the end of -// token. The method returns false if there is no valid integer at the end of -// the token. -bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) { - char first = token.NextChar(); - int len = 0; - - // Read in more digits. - char c = first; - while ('\0' != c && IsAsciiDigit(c)) { - ++token.length; - ++len; - c = token.NextChar(); - } - // We need at least 1 digit. - if (len == 0) - return false; - - if (!can_have_leading_zeros && len > 1 && '0' == first) - return false; - - return true; -} - -// A helper method for ParseStringToken. It reads |digits| hex digits from the -// token. If the sequence if digits is not valid (contains other characters), -// the method returns false. -bool ReadHexDigits(base::JSONReader::Token& token, int digits) { - for (int i = 1; i <= digits; ++i) { - char c = *(token.begin + token.length + i); - if (c == '\0' || !IsHexDigit(c)) - return false; - } - - token.length += digits; - return true; -} - -} // namespace namespace base { -const char* JSONReader::kBadRootElementType = - "Root value must be an array or object."; const char* JSONReader::kInvalidEscape = "Invalid escape sequence."; const char* JSONReader::kSyntaxError = "Syntax error."; +const char* JSONReader::kUnexpectedToken = + "Unexpected token."; const char* JSONReader::kTrailingComma = "Trailing comma not allowed."; const char* JSONReader::kTooMuchNesting = @@ -83,24 +27,27 @@ const char* JSONReader::kUnquotedDictionaryKey = "Dictionary keys must be quoted."; JSONReader::JSONReader() - : start_pos_(NULL), - json_pos_(NULL), - end_pos_(NULL), - stack_depth_(0), - allow_trailing_comma_(false), - error_code_(JSON_NO_ERROR), - error_line_(0), - error_col_(0) {} + : parser_(new internal::JSONParser(JSON_PARSE_RFC)) { +} + +JSONReader::JSONReader(int options) + : parser_(new internal::JSONParser(options)) { +} + +JSONReader::~JSONReader() { +} // static Value* JSONReader::Read(const std::string& json) { - return Read(json, JSON_PARSE_RFC); + internal::JSONParser parser(JSON_PARSE_RFC); + return parser.Parse(json); } // static Value* JSONReader::Read(const std::string& json, int options) { - return ReadAndReturnError(json, options, NULL, NULL); + internal::JSONParser parser(options); + return parser.Parse(json); } // static @@ -108,16 +55,15 @@ Value* JSONReader::ReadAndReturnError(const std::string& json, int options, int* error_code_out, std::string* error_msg_out) { - JSONReader reader = JSONReader(); - Value* root = reader.JsonToValue(json, false, - (options & JSON_ALLOW_TRAILING_COMMAS) != 0); + internal::JSONParser parser(options); + Value* root = parser.Parse(json); if (root) return root; if (error_code_out) - *error_code_out = reader.error_code(); + *error_code_out = parser.error_code(); if (error_msg_out) - *error_msg_out = reader.GetErrorMessage(); + *error_msg_out = parser.GetErrorMessage(); return NULL; } @@ -127,12 +73,12 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { switch (error_code) { case JSON_NO_ERROR: return std::string(); - case JSON_BAD_ROOT_ELEMENT_TYPE: - return kBadRootElementType; case JSON_INVALID_ESCAPE: return kInvalidEscape; case JSON_SYNTAX_ERROR: return kSyntaxError; + case JSON_UNEXPECTED_TOKEN: + return kUnexpectedToken; case JSON_TRAILING_COMMA: return kTrailingComma; case JSON_TOO_MUCH_NESTING: @@ -149,586 +95,16 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { } } -std::string JSONReader::GetErrorMessage() const { - return FormatErrorMessage(error_line_, error_col_, - ErrorCodeToString(error_code_)); -} - -Value* JSONReader::JsonToValue(const std::string& json, bool check_root, - bool allow_trailing_comma) { - // The input must be in UTF-8. - if (!IsStringUTF8(json.data())) { - error_code_ = JSON_UNSUPPORTED_ENCODING; - return NULL; - } - - start_pos_ = json.data(); - end_pos_ = start_pos_ + json.size(); - - // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF) - // or <0xEF 0xBB 0xBF>, advance the start position to avoid the - // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an - // invalid character and returning NULL. - if (json.size() >= 3 && static_cast<uint8>(start_pos_[0]) == 0xEF && - static_cast<uint8>(start_pos_[1]) == 0xBB && - static_cast<uint8>(start_pos_[2]) == 0xBF) { - start_pos_ += 3; - } - - json_pos_ = start_pos_; - allow_trailing_comma_ = allow_trailing_comma; - stack_depth_ = 0; - error_code_ = JSON_NO_ERROR; - - scoped_ptr<Value> root(BuildValue(check_root)); - if (root.get()) { - if (ParseToken().type == Token::END_OF_INPUT) { - return root.release(); - } else { - SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_); - } - } - - // Default to calling errors "syntax errors". - if (error_code_ == 0) - SetErrorCode(JSON_SYNTAX_ERROR, json_pos_); - - return NULL; -} - -// static -std::string JSONReader::FormatErrorMessage(int line, int column, - const std::string& description) { - if (line || column) { - return base::StringPrintf( - "Line: %i, column: %i, %s", line, column, description.c_str()); - } - return description; +Value* JSONReader::ReadToValue(const std::string& json) { + return parser_->Parse(json); } -Value* JSONReader::BuildValue(bool is_root) { - ++stack_depth_; - if (stack_depth_ > kStackLimit) { - SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_); - return NULL; - } - - Token token = ParseToken(); - // The root token must be an array or an object. - if (is_root && token.type != Token::OBJECT_BEGIN && - token.type != Token::ARRAY_BEGIN) { - SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_); - return NULL; - } - - scoped_ptr<Value> node; - - switch (token.type) { - case Token::END_OF_INPUT: - case Token::INVALID_TOKEN: - return NULL; - - case Token::NULL_TOKEN: - node.reset(Value::CreateNullValue()); - break; - - case Token::BOOL_TRUE: - node.reset(Value::CreateBooleanValue(true)); - break; - - case Token::BOOL_FALSE: - node.reset(Value::CreateBooleanValue(false)); - break; - - case Token::NUMBER: - node.reset(DecodeNumber(token)); - if (!node.get()) - return NULL; - break; - - case Token::STRING: - node.reset(DecodeString(token)); - if (!node.get()) - return NULL; - break; - - case Token::ARRAY_BEGIN: - { - json_pos_ += token.length; - token = ParseToken(); - - node.reset(new ListValue()); - while (token.type != Token::ARRAY_END) { - Value* array_node = BuildValue(false); - if (!array_node) - return NULL; - static_cast<ListValue*>(node.get())->Append(array_node); - - // After a list value, we expect a comma or the end of the list. - token = ParseToken(); - if (token.type == Token::LIST_SEPARATOR) { - json_pos_ += token.length; - token = ParseToken(); - // Trailing commas are invalid according to the JSON RFC, but some - // consumers need the parsing leniency, so handle accordingly. - if (token.type == Token::ARRAY_END) { - if (!allow_trailing_comma_) { - SetErrorCode(JSON_TRAILING_COMMA, json_pos_); - return NULL; - } - // Trailing comma OK, stop parsing the Array. - break; - } - } else if (token.type != Token::ARRAY_END) { - // Unexpected value after list value. Bail out. - return NULL; - } - } - if (token.type != Token::ARRAY_END) { - return NULL; - } - break; - } - - case Token::OBJECT_BEGIN: - { - json_pos_ += token.length; - token = ParseToken(); - - node.reset(new DictionaryValue); - while (token.type != Token::OBJECT_END) { - if (token.type != Token::STRING) { - SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_); - return NULL; - } - scoped_ptr<Value> dict_key_value(DecodeString(token)); - if (!dict_key_value.get()) - return NULL; - - // Convert the key into a wstring. - std::string dict_key; - bool success = dict_key_value->GetAsString(&dict_key); - DCHECK(success); - - json_pos_ += token.length; - token = ParseToken(); - if (token.type != Token::OBJECT_PAIR_SEPARATOR) - return NULL; - - json_pos_ += token.length; - token = ParseToken(); - Value* dict_value = BuildValue(false); - if (!dict_value) - return NULL; - static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion( - dict_key, dict_value); - - // After a key/value pair, we expect a comma or the end of the - // object. - token = ParseToken(); - if (token.type == Token::LIST_SEPARATOR) { - json_pos_ += token.length; - token = ParseToken(); - // Trailing commas are invalid according to the JSON RFC, but some - // consumers need the parsing leniency, so handle accordingly. - if (token.type == Token::OBJECT_END) { - if (!allow_trailing_comma_) { - SetErrorCode(JSON_TRAILING_COMMA, json_pos_); - return NULL; - } - // Trailing comma OK, stop parsing the Object. - break; - } - } else if (token.type != Token::OBJECT_END) { - // Unexpected value after last object value. Bail out. - return NULL; - } - } - if (token.type != Token::OBJECT_END) - return NULL; - - break; - } - - default: - // We got a token that's not a value. - return NULL; - } - json_pos_ += token.length; - - --stack_depth_; - return node.release(); +JSONReader::JsonParseError JSONReader::error_code() const { + return parser_->error_code(); } -JSONReader::Token JSONReader::ParseNumberToken() { - // We just grab the number here. We validate the size in DecodeNumber. - // According to RFC4627, a valid number is: [minus] int [frac] [exp] - Token token(Token::NUMBER, json_pos_, 0); - char c = *json_pos_; - if ('-' == c) { - ++token.length; - c = token.NextChar(); - } - - if (!ReadInt(token, false)) - return Token::CreateInvalidToken(); - - // Optional fraction part - c = token.NextChar(); - if ('.' == c) { - ++token.length; - if (!ReadInt(token, true)) - return Token::CreateInvalidToken(); - c = token.NextChar(); - } - - // Optional exponent part - if ('e' == c || 'E' == c) { - ++token.length; - c = token.NextChar(); - if ('-' == c || '+' == c) { - ++token.length; - c = token.NextChar(); - } - if (!ReadInt(token, true)) - return Token::CreateInvalidToken(); - } - - return token; -} - -Value* JSONReader::DecodeNumber(const Token& token) { - const std::string num_string(token.begin, token.length); - - int num_int; - if (StringToInt(num_string, &num_int)) - return Value::CreateIntegerValue(num_int); - - double num_double; - if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double)) - return Value::CreateDoubleValue(num_double); - - return NULL; -} - -JSONReader::Token JSONReader::ParseStringToken() { - Token token(Token::STRING, json_pos_, 1); - char c = token.NextChar(); - while (json_pos_ + token.length < end_pos_) { - if ('\\' == c) { - ++token.length; - c = token.NextChar(); - // Make sure the escaped char is valid. - switch (c) { - case 'x': - if (!ReadHexDigits(token, 2)) { - SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); - return Token::CreateInvalidToken(); - } - break; - case 'u': - if (!ReadHexDigits(token, 4)) { - SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); - return Token::CreateInvalidToken(); - } - break; - case '\\': - case '/': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - case '"': - break; - default: - SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); - return Token::CreateInvalidToken(); - } - } else if ('"' == c) { - ++token.length; - return token; - } - ++token.length; - c = token.NextChar(); - } - return Token::CreateInvalidToken(); -} - -Value* JSONReader::DecodeString(const Token& token) { - std::string decoded_str; - decoded_str.reserve(token.length - 2); - - for (int i = 1; i < token.length - 1; ++i) { - char c = *(token.begin + i); - if ('\\' == c) { - ++i; - c = *(token.begin + i); - switch (c) { - case '"': - case '/': - case '\\': - decoded_str.push_back(c); - break; - case 'b': - decoded_str.push_back('\b'); - break; - case 'f': - decoded_str.push_back('\f'); - break; - case 'n': - decoded_str.push_back('\n'); - break; - case 'r': - decoded_str.push_back('\r'); - break; - case 't': - decoded_str.push_back('\t'); - break; - case 'v': - decoded_str.push_back('\v'); - break; - - case 'x': { - if (i + 2 >= token.length) - return NULL; - int hex_digit = 0; - if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit)) - return NULL; - decoded_str.push_back(hex_digit); - i += 2; - break; - } - case 'u': - if (!ConvertUTF16Units(token, &i, &decoded_str)) - return NULL; - break; - - default: - // We should only have valid strings at this point. If not, - // ParseStringToken didn't do its job. - NOTREACHED(); - return NULL; - } - } else { - // Not escaped - decoded_str.push_back(c); - } - } - return Value::CreateStringValue(decoded_str); -} - -bool JSONReader::ConvertUTF16Units(const Token& token, - int* i, - std::string* dest_string) { - if (*i + 4 >= token.length) - return false; - - // This is a 32-bit field because the shift operations in the - // conversion process below cause MSVC to error about "data loss." - // This only stores UTF-16 code units, though. - // Consume the UTF-16 code unit, which may be a high surrogate. - int code_unit16_high = 0; - if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high)) - return false; - *i += 4; - - // If this is a high surrogate, consume the next code unit to get the - // low surrogate. - int code_unit16_low = 0; - if (CBU16_IS_SURROGATE(code_unit16_high)) { - // Make sure this is the high surrogate. If not, it's an encoding - // error. - if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) - return false; - - // Make sure that the token has more characters to consume the - // lower surrogate. - if (*i + 6 >= token.length) - return false; - if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u') - return false; - - if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low)) - return false; - *i += 4; - if (!CBU16_IS_SURROGATE(code_unit16_low) || - !CBU16_IS_TRAIL(code_unit16_low)) { - return false; - } - } else if (!CBU16_IS_SINGLE(code_unit16_high)) { - // If this is not a code point, it's an encoding error. - return false; - } - - // Convert the UTF-16 code units to a code point and then to a UTF-8 - // code unit sequence. - char code_point[8] = { 0 }; - size_t offset = 0; - if (!code_unit16_low) { - CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); - } else { - uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, - code_unit16_low); - offset = 0; - CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); - } - dest_string->append(code_point); - return true; -} - -JSONReader::Token JSONReader::ParseToken() { - EatWhitespaceAndComments(); - - Token token(Token::INVALID_TOKEN, 0, 0); - switch (*json_pos_) { - case '\0': - token.type = Token::END_OF_INPUT; - break; - - case 'n': - if (NextStringMatch(kNullString, arraysize(kNullString) - 1)) - token = Token(Token::NULL_TOKEN, json_pos_, 4); - break; - - case 't': - if (NextStringMatch(kTrueString, arraysize(kTrueString) - 1)) - token = Token(Token::BOOL_TRUE, json_pos_, 4); - break; - - case 'f': - if (NextStringMatch(kFalseString, arraysize(kFalseString) - 1)) - token = Token(Token::BOOL_FALSE, json_pos_, 5); - break; - - case '[': - token = Token(Token::ARRAY_BEGIN, json_pos_, 1); - break; - - case ']': - token = Token(Token::ARRAY_END, json_pos_, 1); - break; - - case ',': - token = Token(Token::LIST_SEPARATOR, json_pos_, 1); - break; - - case '{': - token = Token(Token::OBJECT_BEGIN, json_pos_, 1); - break; - - case '}': - token = Token(Token::OBJECT_END, json_pos_, 1); - break; - - case ':': - token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '-': - token = ParseNumberToken(); - break; - - case '"': - token = ParseStringToken(); - break; - } - return token; -} - -void JSONReader::EatWhitespaceAndComments() { - while (json_pos_ != end_pos_) { - switch (*json_pos_) { - case ' ': - case '\n': - case '\r': - case '\t': - ++json_pos_; - break; - case '/': - // TODO(tc): This isn't in the RFC so it should be a parser flag. - if (!EatComment()) - return; - break; - default: - // Not a whitespace char, just exit. - return; - } - } -} - -bool JSONReader::EatComment() { - if ('/' != *json_pos_) - return false; - - char next_char = *(json_pos_ + 1); - if ('/' == next_char) { - // Line comment, read until \n or \r - json_pos_ += 2; - while (json_pos_ != end_pos_) { - switch (*json_pos_) { - case '\n': - case '\r': - ++json_pos_; - return true; - default: - ++json_pos_; - } - } - } else if ('*' == next_char) { - // Block comment, read until */ - json_pos_ += 2; - while (json_pos_ != end_pos_) { - if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { - json_pos_ += 2; - return true; - } - ++json_pos_; - } - } else { - return false; - } - return true; -} - -bool JSONReader::NextStringMatch(const char* str, size_t length) { - return strncmp(json_pos_, str, length) == 0; -} - -void JSONReader::SetErrorCode(JsonParseError error, - const char* error_pos) { - int line_number = 1; - int column_number = 1; - - // Figure out the line and column the error occured at. - for (const char* pos = start_pos_; pos != error_pos; ++pos) { - if (pos > end_pos_) { - NOTREACHED(); - return; - } - - if (*pos == '\n') { - ++line_number; - column_number = 1; - } else { - ++column_number; - } - } - - error_line_ = line_number; - error_col_ = column_number; - error_code_ = error; +std::string JSONReader::GetErrorMessage() const { + return parser_->GetErrorMessage(); } } // namespace base diff --git a/base/json/json_reader.h b/base/json/json_reader.h index 35ee7d3..e081175 100644 --- a/base/json/json_reader.h +++ b/base/json/json_reader.h @@ -33,23 +33,18 @@ #include "base/base_export.h" #include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" -// Chromium and Chromium OS check out gtest to different places, so we're -// unable to compile on both if we include gtest_prod.h here. Instead, include -// its only contents -- this will need to be updated if the macro ever changes. -#define FRIEND_TEST(test_case_name, test_name)\ -friend class test_case_name##_##test_name##_Test +namespace base { +class Value; -#define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ - FRIEND_TEST(test_case_name, test_name); \ - FRIEND_TEST(test_case_name, DISABLED_##test_name); \ - FRIEND_TEST(test_case_name, FLAKY_##test_name); \ - FRIEND_TEST(test_case_name, FAILS_##test_name) +namespace internal { +class JSONParser; +} +} namespace base { -class Value; - enum JSONParserOptions { // Parses the input strictly according to RFC 4627, except for where noted // above. @@ -57,56 +52,22 @@ enum JSONParserOptions { // Allows commas to exist after the last element in structures. JSON_ALLOW_TRAILING_COMMAS = 1 << 0, + + // The parser can perform optimizations by placing hidden data in the root of + // the JSON object, which speeds up certain operations on children. However, + // if the child is Remove()d from root, it would result in use-after-free + // unless it is DeepCopy()ed or this option is used. + JSON_DETACHABLE_CHILDREN = 1 << 1, }; class BASE_EXPORT JSONReader { public: - // A struct to hold a JS token. - class Token { - public: - enum Type { - OBJECT_BEGIN, // { - OBJECT_END, // } - ARRAY_BEGIN, // [ - ARRAY_END, // ] - STRING, - NUMBER, - BOOL_TRUE, // true - BOOL_FALSE, // false - NULL_TOKEN, // null - LIST_SEPARATOR, // , - OBJECT_PAIR_SEPARATOR, // : - END_OF_INPUT, - INVALID_TOKEN, - }; - - Token(Type t, const char* b, int len) - : type(t), begin(b), length(len) {} - - // Get the character that's one past the end of this token. - char NextChar() { - return *(begin + length); - } - - static Token CreateInvalidToken() { - return Token(INVALID_TOKEN, 0, 0); - } - - Type type; - - // A pointer into JSONReader::json_pos_ that's the beginning of this token. - const char* begin; - - // End should be one char past the end of the token. - int length; - }; - // Error codes during parsing. enum JsonParseError { JSON_NO_ERROR = 0, - JSON_BAD_ROOT_ELEMENT_TYPE, JSON_INVALID_ESCAPE, JSON_SYNTAX_ERROR, + JSON_UNEXPECTED_TOKEN, JSON_TRAILING_COMMA, JSON_TOO_MUCH_NESTING, JSON_UNEXPECTED_DATA_AFTER_ROOT, @@ -115,17 +76,23 @@ class BASE_EXPORT JSONReader { }; // String versions of parse error codes. - static const char* kBadRootElementType; static const char* kInvalidEscape; static const char* kSyntaxError; + static const char* kUnexpectedToken; static const char* kTrailingComma; static const char* kTooMuchNesting; static const char* kUnexpectedDataAfterRoot; static const char* kUnsupportedEncoding; static const char* kUnquotedDictionaryKey; + // Constructs a reader with the default options, JSON_PARSE_RFC. JSONReader(); + // Constructs a reader with custom options. + explicit JSONReader(int options); + + ~JSONReader(); + // Reads and parses |json|, returning a Value. The caller owns the returned // instance. If |json| is not a properly formed JSON string, returns NULL. static Value* Read(const std::string& json); @@ -148,106 +115,19 @@ class BASE_EXPORT JSONReader { // Returns an empty string if error_code is JSON_NO_ERROR. static std::string ErrorCodeToString(JsonParseError error_code); - // Returns the error code if the last call to JsonToValue() failed. + // Parses an input string into a Value that is owned by the caller. + Value* ReadToValue(const std::string& json); + + // Returns the error code if the last call to ReadToValue() failed. // Returns JSON_NO_ERROR otherwise. - JsonParseError error_code() const { return error_code_; } + JsonParseError error_code() const; // Converts error_code_ to a human-readable string, including line and column // numbers if appropriate. std::string GetErrorMessage() const; - // Reads and parses |json|, returning a Value. The caller owns the returned - // instance. If |json| is not a properly formed JSON string, returns NULL and - // a detailed error can be retrieved from |error_message()|. - // If |check_root| is true, we require that the root object be an object or - // array. Otherwise, it can be any valid JSON type. - // If |allow_trailing_comma| is true, we will ignore trailing commas in - // objects and arrays even though this goes against the RFC. - Value* JsonToValue(const std::string& json, bool check_root, - bool allow_trailing_comma); - private: - FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading); - FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages); - - static std::string FormatErrorMessage(int line, int column, - const std::string& description); - - // Recursively build Value. Returns NULL if we don't have a valid JSON - // string. If |is_root| is true, we verify that the root element is either - // an object or an array. - Value* BuildValue(bool is_root); - - // Parses a sequence of characters into a Token::NUMBER. If the sequence of - // characters is not a valid number, returns a Token::INVALID_TOKEN. Note - // that DecodeNumber is used to actually convert from a string to an - // int/double. - Token ParseNumberToken(); - - // Try and convert the substring that token holds into an int or a double. If - // we can (ie., no overflow), return the value, else return NULL. - Value* DecodeNumber(const Token& token); - - // Parses a sequence of characters into a Token::STRING. If the sequence of - // characters is not a valid string, returns a Token::INVALID_TOKEN. Note - // that DecodeString is used to actually decode the escaped string into an - // actual wstring. - Token ParseStringToken(); - - // Convert the substring into a value string. This should always succeed - // (otherwise ParseStringToken would have failed). - Value* DecodeString(const Token& token); - - // Helper function for DecodeString that consumes UTF16 [0,2] code units and - // convers them to UTF8 code untis. |token| is the string token in which the - // units should be read, |i| is the position in the token at which the first - // code unit starts, immediately after the |\u|. This will be mutated if code - // units are consumed. |dest_string| is a string to which the UTF8 code unit - // should be appended. Returns true on success and false if there's an - // encoding error. - bool ConvertUTF16Units(const Token& token, - int* i, - std::string* dest_string); - - // Grabs the next token in the JSON stream. This does not increment the - // stream so it can be used to look ahead at the next token. - Token ParseToken(); - - // Increments |json_pos_| past leading whitespace and comments. - void EatWhitespaceAndComments(); - - // If |json_pos_| is at the start of a comment, eat it, otherwise, returns - // false. - bool EatComment(); - - // Checks if |json_pos_| matches str. - bool NextStringMatch(const char* str, size_t length); - - // Sets the error code that will be returned to the caller. The current - // line and column are determined and added into the final message. - void SetErrorCode(const JsonParseError error, const char* error_pos); - - // Pointer to the starting position in the input string. - const char* start_pos_; - - // Pointer to the current position in the input string. - const char* json_pos_; - - // Pointer to the last position in the input string. - const char* end_pos_; - - // Used to keep track of how many nested lists/dicts there are. - int stack_depth_; - - // A parser flag that allows trailing commas in objects and arrays. - bool allow_trailing_comma_; - - // Contains the error code for the last call to JsonToValue(), if any. - JsonParseError error_code_; - int error_line_; - int error_col_; - - DISALLOW_COPY_AND_ASSIGN(JSONReader); + scoped_ptr<internal::JSONParser> parser_; }; } // namespace base diff --git a/base/json/json_reader_unittest.cc b/base/json/json_reader_unittest.cc index 4d6b0c4..38bf590 100644 --- a/base/json/json_reader_unittest.cc +++ b/base/json/json_reader_unittest.cc @@ -6,6 +6,7 @@ #include "base/base_paths.h" #include "base/file_util.h" +#include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/path_service.h" #include "base/string_piece.h" @@ -19,56 +20,67 @@ namespace base { TEST(JSONReaderTest, Reading) { // some whitespace checking scoped_ptr<Value> root; - root.reset(JSONReader().JsonToValue(" null ", false, false)); + root.reset(JSONReader().ReadToValue(" null ")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_NULL)); // Invalid JSON string - root.reset(JSONReader().JsonToValue("nu", false, false)); + root.reset(JSONReader().ReadToValue("nu")); EXPECT_FALSE(root.get()); // Simple bool - root.reset(JSONReader().JsonToValue("true ", false, false)); + root.reset(JSONReader().ReadToValue("true ")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_BOOLEAN)); // Embedded comment - root.reset(JSONReader().JsonToValue("/* comment */null", false, false)); + root.reset(JSONReader().ReadToValue("/* comment */null")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_NULL)); - root.reset(JSONReader().JsonToValue("40 /* comment */", false, false)); + root.reset(JSONReader().ReadToValue("40 /* comment */")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER)); - root.reset(JSONReader().JsonToValue("true // comment", false, false)); + root.reset(JSONReader().ReadToValue("true // comment")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_BOOLEAN)); - root.reset(JSONReader().JsonToValue("/* comment */\"sample string\"", - false, false)); + root.reset(JSONReader().ReadToValue("/* comment */\"sample string\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); std::string value; EXPECT_TRUE(root->GetAsString(&value)); EXPECT_EQ("sample string", value); + root.reset(JSONReader().ReadToValue("[1, /* comment, 2 ] */ \n 3]")); + ASSERT_TRUE(root.get()); + ListValue* list = static_cast<ListValue*>(root.get()); + EXPECT_EQ(2u, list->GetSize()); + int int_val = 0; + EXPECT_TRUE(list->GetInteger(0, &int_val)); + EXPECT_EQ(1, int_val); + EXPECT_TRUE(list->GetInteger(1, &int_val)); + EXPECT_EQ(3, int_val); + root.reset(JSONReader().ReadToValue("[1, /*a*/2, 3]")); + ASSERT_TRUE(root.get()); + list = static_cast<ListValue*>(root.get()); + EXPECT_EQ(3u, list->GetSize()); // Test number formats - root.reset(JSONReader().JsonToValue("43", false, false)); + root.reset(JSONReader().ReadToValue("43")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER)); - int int_val = 0; EXPECT_TRUE(root->GetAsInteger(&int_val)); EXPECT_EQ(43, int_val); // According to RFC4627, oct, hex, and leading zeros are invalid JSON. - root.reset(JSONReader().JsonToValue("043", false, false)); + root.reset(JSONReader().ReadToValue("043")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("0x43", false, false)); + root.reset(JSONReader().ReadToValue("0x43")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("00", false, false)); + root.reset(JSONReader().ReadToValue("00")); EXPECT_FALSE(root.get()); // Test 0 (which needs to be special cased because of the leading zero // clause). - root.reset(JSONReader().JsonToValue("0", false, false)); + root.reset(JSONReader().ReadToValue("0")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER)); int_val = 1; @@ -77,14 +89,14 @@ TEST(JSONReaderTest, Reading) { // Numbers that overflow ints should succeed, being internally promoted to // storage as doubles - root.reset(JSONReader().JsonToValue("2147483648", false, false)); + root.reset(JSONReader().ReadToValue("2147483648")); ASSERT_TRUE(root.get()); double double_val; EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(2147483648.0, double_val); - root.reset(JSONReader().JsonToValue("-2147483649", false, false)); + root.reset(JSONReader().ReadToValue("-2147483649")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; @@ -92,42 +104,42 @@ TEST(JSONReaderTest, Reading) { EXPECT_DOUBLE_EQ(-2147483649.0, double_val); // Parse a double - root.reset(JSONReader().JsonToValue("43.1", false, false)); + root.reset(JSONReader().ReadToValue("43.1")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(43.1, double_val); - root.reset(JSONReader().JsonToValue("4.3e-1", false, false)); + root.reset(JSONReader().ReadToValue("4.3e-1")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(.43, double_val); - root.reset(JSONReader().JsonToValue("2.1e0", false, false)); + root.reset(JSONReader().ReadToValue("2.1e0")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(2.1, double_val); - root.reset(JSONReader().JsonToValue("2.1e+0001", false, false)); + root.reset(JSONReader().ReadToValue("2.1e+0001")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(21.0, double_val); - root.reset(JSONReader().JsonToValue("0.01", false, false)); + root.reset(JSONReader().ReadToValue("0.01")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; EXPECT_TRUE(root->GetAsDouble(&double_val)); EXPECT_DOUBLE_EQ(0.01, double_val); - root.reset(JSONReader().JsonToValue("1.00", false, false)); + root.reset(JSONReader().ReadToValue("1.00")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE)); double_val = 0.0; @@ -135,43 +147,43 @@ TEST(JSONReaderTest, Reading) { EXPECT_DOUBLE_EQ(1.0, double_val); // Fractional parts must have a digit before and after the decimal point. - root.reset(JSONReader().JsonToValue("1.", false, false)); + root.reset(JSONReader().ReadToValue("1.")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue(".1", false, false)); + root.reset(JSONReader().ReadToValue(".1")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("1.e10", false, false)); + root.reset(JSONReader().ReadToValue("1.e10")); EXPECT_FALSE(root.get()); // Exponent must have a digit following the 'e'. - root.reset(JSONReader().JsonToValue("1e", false, false)); + root.reset(JSONReader().ReadToValue("1e")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("1E", false, false)); + root.reset(JSONReader().ReadToValue("1E")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("1e1.", false, false)); + root.reset(JSONReader().ReadToValue("1e1.")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("1e1.0", false, false)); + root.reset(JSONReader().ReadToValue("1e1.0")); EXPECT_FALSE(root.get()); // INF/-INF/NaN are not valid - root.reset(JSONReader().JsonToValue("1e1000", false, false)); + root.reset(JSONReader().ReadToValue("1e1000")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("-1e1000", false, false)); + root.reset(JSONReader().ReadToValue("-1e1000")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("NaN", false, false)); + root.reset(JSONReader().ReadToValue("NaN")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("nan", false, false)); + root.reset(JSONReader().ReadToValue("nan")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("inf", false, false)); + root.reset(JSONReader().ReadToValue("inf")); EXPECT_FALSE(root.get()); // Invalid number formats - root.reset(JSONReader().JsonToValue("4.3.1", false, false)); + root.reset(JSONReader().ReadToValue("4.3.1")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("4e3.1", false, false)); + root.reset(JSONReader().ReadToValue("4e3.1")); EXPECT_FALSE(root.get()); // Test string parser - root.reset(JSONReader().JsonToValue("\"hello world\"", false, false)); + root.reset(JSONReader().ReadToValue("\"hello world\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); std::string str_val; @@ -179,7 +191,7 @@ TEST(JSONReaderTest, Reading) { EXPECT_EQ("hello world", str_val); // Empty string - root.reset(JSONReader().JsonToValue("\"\"", false, false)); + root.reset(JSONReader().ReadToValue("\"\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); @@ -187,8 +199,7 @@ TEST(JSONReaderTest, Reading) { EXPECT_EQ("", str_val); // Test basic string escapes - root.reset(JSONReader().JsonToValue("\" \\\"\\\\\\/\\b\\f\\n\\r\\t\\v\"", - false, false)); + root.reset(JSONReader().ReadToValue("\" \\\"\\\\\\/\\b\\f\\n\\r\\t\\v\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); @@ -196,8 +207,7 @@ TEST(JSONReaderTest, Reading) { EXPECT_EQ(" \"\\/\b\f\n\r\t\v", str_val); // Test hex and unicode escapes including the null character. - root.reset(JSONReader().JsonToValue("\"\\x41\\x00\\u1234\"", false, - false)); + root.reset(JSONReader().ReadToValue("\"\\x41\\x00\\u1234\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); @@ -205,29 +215,24 @@ TEST(JSONReaderTest, Reading) { EXPECT_EQ(std::wstring(L"A\0\x1234", 3), UTF8ToWide(str_val)); // Test invalid strings - root.reset(JSONReader().JsonToValue("\"no closing quote", false, false)); + root.reset(JSONReader().ReadToValue("\"no closing quote")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("\"\\z invalid escape char\"", false, - false)); + root.reset(JSONReader().ReadToValue("\"\\z invalid escape char\"")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("\"\\xAQ invalid hex code\"", false, - false)); + root.reset(JSONReader().ReadToValue("\"\\xAQ invalid hex code\"")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("not enough hex chars\\x1\"", false, - false)); + root.reset(JSONReader().ReadToValue("not enough hex chars\\x1\"")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("\"not enough escape chars\\u123\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"not enough escape chars\\u123\"")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("\"extra backslash at end of input\\\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"extra backslash at end of input\\\"")); EXPECT_FALSE(root.get()); // Basic array root.reset(JSONReader::Read("[true, false, null]")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_LIST)); - ListValue* list = static_cast<ListValue*>(root.get()); + list = static_cast<ListValue*>(root.get()); EXPECT_EQ(3U, list->GetSize()); // Test with trailing comma. Should be parsed the same as above. @@ -448,32 +453,38 @@ TEST(JSONReaderTest, Reading) { EXPECT_EQ(5001U, list->GetSize()); // Test utf8 encoded input - root.reset(JSONReader().JsonToValue("\"\xe7\xbd\x91\xe9\xa1\xb5\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"\xe7\xbd\x91\xe9\xa1\xb5\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); EXPECT_TRUE(root->GetAsString(&str_val)); EXPECT_EQ(L"\x7f51\x9875", UTF8ToWide(str_val)); + root.reset(JSONReader().ReadToValue( + "{\"path\": \"/tmp/\xc3\xa0\xc3\xa8\xc3\xb2.png\"}")); + ASSERT_TRUE(root.get()); + EXPECT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); + EXPECT_TRUE(root->GetAsDictionary(&dict_val)); + EXPECT_TRUE(dict_val->GetString("path", &str_val)); + EXPECT_EQ("/tmp/\xC3\xA0\xC3\xA8\xC3\xB2.png", str_val); + // Test invalid utf8 encoded input - root.reset(JSONReader().JsonToValue("\"345\xb0\xa1\xb0\xa2\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"345\xb0\xa1\xb0\xa2\"")); + EXPECT_FALSE(root.get()); + root.reset(JSONReader().ReadToValue("\"123\xc0\x81\"")); EXPECT_FALSE(root.get()); - root.reset(JSONReader().JsonToValue("\"123\xc0\x81\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"abc\xc0\xae\"")); EXPECT_FALSE(root.get()); // Test utf16 encoded strings. - root.reset(JSONReader().JsonToValue("\"\\u20ac3,14\"", false, false)); + root.reset(JSONReader().ReadToValue("\"\\u20ac3,14\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); EXPECT_TRUE(root->GetAsString(&str_val)); EXPECT_EQ("\xe2\x82\xac""3,14", str_val); - root.reset(JSONReader().JsonToValue("\"\\ud83d\\udca9\\ud83d\\udc6c\"", - false, false)); + root.reset(JSONReader().ReadToValue("\"\\ud83d\\udca9\\ud83d\\udc6c\"")); ASSERT_TRUE(root.get()); EXPECT_TRUE(root->IsType(Value::TYPE_STRING)); str_val.clear(); @@ -492,9 +503,28 @@ TEST(JSONReaderTest, Reading) { "\"\\ud83\\foo\"" // No lower surrogate. }; for (size_t i = 0; i < arraysize(cases); ++i) { - root.reset(JSONReader().JsonToValue(cases[i], false, false)); + root.reset(JSONReader().ReadToValue(cases[i])); EXPECT_FALSE(root.get()) << cases[i]; } + + // Test literal root objects. + root.reset(JSONReader::Read("null")); + EXPECT_TRUE(root->IsType(Value::TYPE_NULL)); + + root.reset(JSONReader::Read("true")); + ASSERT_TRUE(root.get()); + EXPECT_TRUE(root->GetAsBoolean(&bool_value)); + EXPECT_TRUE(bool_value); + + root.reset(JSONReader::Read("10")); + ASSERT_TRUE(root.get()); + EXPECT_TRUE(root->GetAsInteger(&integer_value)); + EXPECT_EQ(10, integer_value); + + root.reset(JSONReader::Read("\"root\"")); + ASSERT_TRUE(root.get()); + EXPECT_TRUE(root->GetAsString(&str_val)); + EXPECT_EQ("root", str_val); } TEST(JSONReaderTest, ReadFromFile) { @@ -509,102 +539,110 @@ TEST(JSONReaderTest, ReadFromFile) { path.Append(FILE_PATH_LITERAL("bom_feff.json")), &input)); JSONReader reader; - std::string error_msg; - scoped_ptr<Value> root( - JSONReader::ReadAndReturnError(input, JSON_PARSE_RFC, NULL, &error_msg)); + scoped_ptr<Value> root(reader.ReadToValue(input)); ASSERT_TRUE(root.get()) << reader.GetErrorMessage(); EXPECT_TRUE(root->IsType(Value::TYPE_DICTIONARY)); } -TEST(JSONReaderTest, ErrorMessages) { - // Error strings should not be modified in case of success. - std::string error_message; - int error_code = 0; - scoped_ptr<Value> root; - root.reset(JSONReader::ReadAndReturnError("[42]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_TRUE(error_message.empty()); - EXPECT_EQ(0, error_code); - - // Test line and column counting - const char* big_json = "[\n0,\n1,\n2,\n3,4,5,6 7,\n8,\n9\n]"; - // error here --------------------------------^ - root.reset(JSONReader::ReadAndReturnError(big_json, JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(5, 9, JSONReader::kSyntaxError), - error_message); - EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code); - - // Test each of the error conditions - root.reset(JSONReader::ReadAndReturnError("{},{}", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 3, - JSONReader::kUnexpectedDataAfterRoot), error_message); - EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, error_code); - - std::string nested_json; - for (int i = 0; i < 101; ++i) { - nested_json.insert(nested_json.begin(), '['); - nested_json.append(1, ']'); +// Tests that the root of a JSON object can be deleted safely while its +// children outlive it. +TEST(JSONReaderTest, StringOptimizations) { + Value* dict_literals[2] = {0}; + Value* dict_strings[2] = {0}; + Value* list_values[2] = {0}; + + { + scoped_ptr<Value> root(JSONReader::Read( + "{" + " \"test\": {" + " \"foo\": true," + " \"bar\": 3.14," + " \"baz\": \"bat\"," + " \"moo\": \"cow\"" + " }," + " \"list\": [" + " \"a\"," + " \"b\"" + " ]" + "}", JSON_DETACHABLE_CHILDREN)); + ASSERT_TRUE(root.get()); + + DictionaryValue* root_dict = NULL; + ASSERT_TRUE(root->GetAsDictionary(&root_dict)); + + DictionaryValue* dict = NULL; + ListValue* list = NULL; + + ASSERT_TRUE(root_dict->GetDictionary("test", &dict)); + ASSERT_TRUE(root_dict->GetList("list", &list)); + + EXPECT_TRUE(dict->Remove("foo", &dict_literals[0])); + EXPECT_TRUE(dict->Remove("bar", &dict_literals[1])); + EXPECT_TRUE(dict->Remove("baz", &dict_strings[0])); + EXPECT_TRUE(dict->Remove("moo", &dict_strings[1])); + + ASSERT_EQ(2u, list->GetSize()); + EXPECT_TRUE(list->Remove(0, &list_values[0])); + EXPECT_TRUE(list->Remove(0, &list_values[1])); } - root.reset(JSONReader::ReadAndReturnError(nested_json, JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 101, JSONReader::kTooMuchNesting), - error_message); - EXPECT_EQ(JSONReader::JSON_TOO_MUCH_NESTING, error_code); - root.reset(JSONReader::ReadAndReturnError("[1,]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 4, JSONReader::kTrailingComma), - error_message); - EXPECT_EQ(JSONReader::JSON_TRAILING_COMMA, error_code); + bool b = false; + double d = 0; + std::string s; - root.reset(JSONReader::ReadAndReturnError("{foo:\"bar\"}", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2, - JSONReader::kUnquotedDictionaryKey), error_message); - EXPECT_EQ(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, error_code); + EXPECT_TRUE(dict_literals[0]->GetAsBoolean(&b)); + EXPECT_TRUE(b); - root.reset(JSONReader::ReadAndReturnError("{\"foo\":\"bar\",}", - JSON_PARSE_RFC, - &error_code, - &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 14, JSONReader::kTrailingComma), - error_message); + EXPECT_TRUE(dict_literals[1]->GetAsDouble(&d)); + EXPECT_EQ(3.14, d); - root.reset(JSONReader::ReadAndReturnError("[nu]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2, JSONReader::kSyntaxError), - error_message); - EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code); + EXPECT_TRUE(dict_strings[0]->GetAsString(&s)); + EXPECT_EQ("bat", s); - root.reset(JSONReader::ReadAndReturnError("[\"xxx\\xq\"]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), - error_message); - EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); + EXPECT_TRUE(dict_strings[1]->GetAsString(&s)); + EXPECT_EQ("cow", s); - root.reset(JSONReader::ReadAndReturnError("[\"xxx\\uq\"]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), - error_message); - EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); + EXPECT_TRUE(list_values[0]->GetAsString(&s)); + EXPECT_EQ("a", s); + EXPECT_TRUE(list_values[1]->GetAsString(&s)); + EXPECT_EQ("b", s); - root.reset(JSONReader::ReadAndReturnError("[\"xxx\\q\"]", JSON_PARSE_RFC, - &error_code, &error_message)); - EXPECT_FALSE(root.get()); - EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape), - error_message); - EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code); + delete dict_literals[0]; + delete dict_literals[1]; + delete dict_strings[0]; + delete dict_strings[1]; + delete list_values[0]; + delete list_values[1]; +} + +// A smattering of invalid JSON designed to test specific portions of the +// parser implementation against buffer overflow. Best run with DCHECKs so +// that the one in NextChar fires. +TEST(JSONReaderTest, InvalidSanity) { + const char* invalid_json[] = { + "/* test *", + "{\"foo\"", + "{\"foo\":", + " [", + "\"\\u123g\"", + "{\n\"eh:\n}", + }; + + for (size_t i = 0; i < arraysize(invalid_json); ++i) { + JSONReader reader; + LOG(INFO) << "Sanity test " << i << ": <" << invalid_json[i] << ">"; + EXPECT_FALSE(reader.ReadToValue(invalid_json[i])); + EXPECT_NE(JSONReader::JSON_NO_ERROR, reader.error_code()); + EXPECT_NE("", reader.GetErrorMessage()); + } +} + +TEST(JSONReaderTest, IllegalTrailingNull) { + const char json[] = { '"', 'n', 'u', 'l', 'l', '"', '\0' }; + std::string json_string(json, sizeof(json)); + JSONReader reader; + EXPECT_FALSE(reader.ReadToValue(json_string)); + EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, reader.error_code()); } } // namespace base diff --git a/base/string_util.cc b/base/string_util.cc index fd6fccc..2eecb79 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -472,7 +472,7 @@ bool IsStringUTF8(const std::string& str) { int32 code_point; CBU8_NEXT(src, char_index, src_len, code_point); if (!base::IsValidCharacter(code_point)) - return false; + return false; } return true; } diff --git a/base/values.cc b/base/values.cc index 8d7ca35..d561d68 100644 --- a/base/values.cc +++ b/base/values.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -694,6 +694,10 @@ void DictionaryValue::MergeDictionary(const DictionaryValue* dictionary) { } } +void DictionaryValue::Swap(DictionaryValue* other) { + dictionary_.swap(other->dictionary_); +} + DictionaryValue* DictionaryValue::DeepCopy() const { DictionaryValue* result = new DictionaryValue; @@ -908,6 +912,10 @@ ListValue::const_iterator ListValue::Find(const Value& value) const { return std::find_if(list_.begin(), list_.end(), ValueEquals(&value)); } +void ListValue::Swap(ListValue* other) { + list_.swap(other->list_); +} + bool ListValue::GetAsList(ListValue** out_value) { if (out_value) *out_value = this; diff --git a/base/values.h b/base/values.h index 4bcdc75..1d35d63 100644 --- a/base/values.h +++ b/base/values.h @@ -303,11 +303,12 @@ class BASE_EXPORT DictionaryValue : public Value { // passed out via out_value. If |out_value| is NULL, the removed value will // be deleted. This method returns true if |path| is a valid path; otherwise // it will return false and the DictionaryValue object will be unchanged. - bool Remove(const std::string& path, Value** out_value); + virtual bool Remove(const std::string& path, Value** out_value); // Like Remove(), but without special treatment of '.'. This allows e.g. URLs // to be used as paths. - bool RemoveWithoutPathExpansion(const std::string& key, Value** out_value); + virtual bool RemoveWithoutPathExpansion(const std::string& key, + Value** out_value); // Makes a copy of |this| but doesn't include empty dictionaries and lists in // the copy. This never returns NULL, even if |this| itself is empty. @@ -321,9 +322,7 @@ class BASE_EXPORT DictionaryValue : public Value { void MergeDictionary(const DictionaryValue* dictionary); // Swaps contents with the |other| dictionary. - void Swap(DictionaryValue* other) { - dictionary_.swap(other->dictionary_); - } + virtual void Swap(DictionaryValue* other); // This class provides an iterator for the keys in the dictionary. // It can't be used to modify the dictionary. @@ -425,7 +424,7 @@ class BASE_EXPORT ListValue : public Value { // passed out via |out_value|. If |out_value| is NULL, the removed value will // be deleted. This method returns true if |index| is valid; otherwise // it will return false and the ListValue object will be unchanged. - bool Remove(size_t index, Value** out_value); + virtual bool Remove(size_t index, Value** out_value); // Removes the first instance of |value| found in the list, if any, and // deletes it. |index| is the location where |value| was found. Returns false @@ -450,9 +449,7 @@ class BASE_EXPORT ListValue : public Value { const_iterator Find(const Value& value) const; // Swaps contents with the |other| list. - void Swap(ListValue* other) { - list_.swap(other->list_); - } + virtual void Swap(ListValue* other); // Iteration. iterator begin() { return list_.begin(); } |