summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorrsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-05-16 15:23:30 +0000
committerrsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-05-16 15:23:30 +0000
commit6e680cfca66d0461f2824ccb5128e4f9cbb20bb6 (patch)
tree8810c22ec7ab357f5cc15cc508565d2a9872dcf5 /base
parent97570b99a4341b1d1d6512f5d82b43c3123eb927 (diff)
downloadchromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.zip
chromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.tar.gz
chromium_src-6e680cfca66d0461f2824ccb5128e4f9cbb20bb6.tar.bz2
Rewrite base::JSONReader to be 35-40% faster, depending on the input string.
This change does the following: * Parses the input string and generates the object representation in O(n) time. * Optimizes string decoding by using StringPiece where possible, which also introduces the JSON_DETACHABLE_CHILDREN parser option. * Makes JSONReader a simpler interface by hiding the parser details in an internal JSONParser class. BUG=49212,111581,121469 TEST=Hopefully covered by all test suites. New tests added for edge cases. Review URL: https://chromiumcodereview.appspot.com/10035042 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@137430 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/base.gyp1
-rw-r--r--base/base.gypi2
-rw-r--r--base/base_export.h9
-rw-r--r--base/debug/trace_event_unittest.cc3
-rw-r--r--base/json/json_parser.cc973
-rw-r--r--base/json/json_parser.h273
-rw-r--r--base/json/json_parser_unittest.cc293
-rw-r--r--base/json/json_reader.cc680
-rw-r--r--base/json/json_reader.h174
-rw-r--r--base/json/json_reader_unittest.cc334
-rw-r--r--base/string_util.cc2
-rw-r--r--base/values.cc10
-rw-r--r--base/values.h15
13 files changed, 1808 insertions, 961 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 52df286..99c826c 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -169,6 +169,7 @@
'i18n/rtl_unittest.cc',
'i18n/string_search_unittest.cc',
'i18n/time_formatting_unittest.cc',
+ 'json/json_parser_unittest.cc',
'json/json_reader_unittest.cc',
'json/json_value_converter_unittest.cc',
'json/json_value_serializer_unittest.cc',
diff --git a/base/base.gypi b/base/base.gypi
index 897cabd..78e8a764 100644
--- a/base/base.gypi
+++ b/base/base.gypi
@@ -143,6 +143,8 @@
'id_map.h',
'json/json_file_value_serializer.cc',
'json/json_file_value_serializer.h',
+ 'json/json_parser.cc',
+ 'json/json_parser.h',
'json/json_reader.cc',
'json/json_reader.h',
'json/json_string_value_serializer.cc',
diff --git a/base/base_export.h b/base/base_export.h
index 239360e..37bbc60 100644
--- a/base/base_export.h
+++ b/base/base_export.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -11,20 +11,25 @@
#if defined(BASE_IMPLEMENTATION)
#define BASE_EXPORT __declspec(dllexport)
+#define BASE_EXPORT_PRIVATE __declspec(dllexport)
#else
#define BASE_EXPORT __declspec(dllimport)
+#define BASE_EXPORT_PRIVATE __declspec(dllimport)
#endif // defined(BASE_IMPLEMENTATION)
#else // defined(WIN32)
#if defined(BASE_IMPLEMENTATION)
#define BASE_EXPORT __attribute__((visibility("default")))
+#define BASE_EXPORT_PRIVATE __attribute__((visibility("default")))
#else
#define BASE_EXPORT
-#endif
+#define BASE_EXPORT_PRIVATE
+#endif // defined(BASE_IMPLEMENTATION)
#endif
#else // defined(COMPONENT_BUILD)
#define BASE_EXPORT
+#define BASE_EXPORT_PRIVATE
#endif
#endif // BASE_BASE_EXPORT_H_
diff --git a/base/debug/trace_event_unittest.cc b/base/debug/trace_event_unittest.cc
index 66dff0d..f839d27 100644
--- a/base/debug/trace_event_unittest.cc
+++ b/base/debug/trace_event_unittest.cc
@@ -99,7 +99,8 @@ void TraceEventTestFixture::OnTraceDataCollected(
trace_buffer_.Finish();
scoped_ptr<Value> root;
- root.reset(base::JSONReader::Read(json_output_.json_output));
+ root.reset(base::JSONReader::Read(json_output_.json_output,
+ JSON_PARSE_RFC | JSON_DETACHABLE_CHILDREN));
if (!root.get()) {
LOG(ERROR) << json_output_.json_output;
diff --git a/base/json/json_parser.cc b/base/json/json_parser.cc
new file mode 100644
index 0000000..766c764
--- /dev/null
+++ b/base/json/json_parser.cc
@@ -0,0 +1,973 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/json/json_parser.h"
+
+#include "base/float_util.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_number_conversions.h"
+#include "base/string_util.h"
+#include "base/stringprintf.h"
+#include "base/third_party/icu/icu_utf.h"
+#include "base/utf_string_conversion_utils.h"
+#include "base/utf_string_conversions.h"
+#include "base/values.h"
+
+namespace base {
+namespace internal {
+
+namespace {
+
+const int kStackMaxDepth = 100;
+
+const int32 kExtendedASCIIStart = 0x80;
+
+// This and the class below are used to own the JSON input string for when
+// string tokens are stored as StringPiece instead of std::string. This
+// optimization avoids about 2/3rds of string memory copies. The constructor
+// takes the input string and swaps its data into the new instance. The real
+// root value is also Swap()ed into the new instance.
+class DictionaryHiddenRootValue : public base::DictionaryValue {
+ public:
+ DictionaryHiddenRootValue(std::string* json, Value* root) {
+ DCHECK(root->IsType(Value::TYPE_DICTIONARY));
+ DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
+ json->swap(json_);
+ }
+
+ virtual void Swap(DictionaryValue* other) OVERRIDE {
+ DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
+
+ // First deep copy to convert JSONStringValue to std::string and swap that
+ // copy with |other|, which contains the new contents of |this|.
+ scoped_ptr<base::DictionaryValue> copy(DeepCopy());
+ copy->Swap(other);
+
+ // Then erase the contents of the current dictionary and swap in the
+ // new contents, originally from |other|.
+ Clear();
+ json_.clear();
+ DictionaryValue::Swap(copy.get());
+ }
+
+ // Not overriding DictionaryValue::Remove because it just calls through to
+ // the method below.
+
+ virtual bool RemoveWithoutPathExpansion(const std::string& key,
+ Value** out) OVERRIDE {
+ // If the caller won't take ownership of the removed value, just call up.
+ if (!out)
+ return DictionaryValue::RemoveWithoutPathExpansion(key, out);
+
+ DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
+
+ // Otherwise, remove the value while its still "owned" by this and copy it
+ // to convert any JSONStringValues to std::string.
+ Value* out_owned = NULL;
+ if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
+ return false;
+
+ *out = out_owned->DeepCopy();
+ delete out_owned;
+
+ return true;
+ }
+
+ private:
+ std::string json_;
+
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
+};
+
+class ListHiddenRootValue : public base::ListValue {
+ public:
+ ListHiddenRootValue(std::string* json, Value* root) {
+ DCHECK(root->IsType(Value::TYPE_LIST));
+ ListValue::Swap(static_cast<ListValue*>(root));
+ json->swap(json_);
+ }
+
+ virtual void Swap(ListValue* other) OVERRIDE {
+ DVLOG(1) << "Swap()ing a ListValue inefficiently.";
+
+ // First deep copy to convert JSONStringValue to std::string and swap that
+ // copy with |other|, which contains the new contents of |this|.
+ scoped_ptr<base::ListValue> copy(DeepCopy());
+ copy->Swap(other);
+
+ // Then erase the contents of the current list and swap in the new contents,
+ // originally from |other|.
+ Clear();
+ json_.clear();
+ ListValue::Swap(copy.get());
+ }
+
+ virtual bool Remove(size_t index, Value** out) OVERRIDE {
+ // If the caller won't take ownership of the removed value, just call up.
+ if (!out)
+ return ListValue::Remove(index, out);
+
+ DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
+
+ // Otherwise, remove the value while its still "owned" by this and copy it
+ // to convert any JSONStringValues to std::string.
+ Value* out_owned = NULL;
+ if (!ListValue::Remove(index, &out_owned))
+ return false;
+
+ *out = out_owned->DeepCopy();
+ delete out_owned;
+
+ return true;
+ }
+
+ private:
+ std::string json_;
+
+ DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
+};
+
+// A variant on StringValue that uses StringPiece instead of copying the string
+// into the Value. This can only be stored in a child of hidden root (above),
+// otherwise the referenced string will not be guaranteed to outlive it.
+class JSONStringValue : public base::Value {
+ public:
+ explicit JSONStringValue(const base::StringPiece& piece)
+ : Value(TYPE_STRING),
+ string_piece_(piece) {
+ }
+
+ // Value:
+ bool GetAsString(std::string* out_value) const OVERRIDE {
+ string_piece_.CopyToString(out_value);
+ return true;
+ }
+ bool GetAsString(string16* out_value) const OVERRIDE {
+ *out_value = UTF8ToUTF16(string_piece_);
+ return true;
+ }
+ virtual Value* DeepCopy() const OVERRIDE {
+ return Value::CreateStringValue(string_piece_.as_string());
+ }
+ virtual bool Equals(const Value* other) const OVERRIDE {
+ std::string other_string;
+ return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
+ StringPiece(other_string) == string_piece_;
+ }
+
+ private:
+ // The location in the original input stream.
+ base::StringPiece string_piece_;
+
+ DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
+};
+
+// Simple class that checks for maximum recursion/"stack overflow."
+class StackMarker {
+ public:
+ explicit StackMarker(int* depth) : depth_(depth) {
+ ++(*depth_);
+ DCHECK_LE(*depth_, kStackMaxDepth);
+ }
+ ~StackMarker() {
+ --(*depth_);
+ }
+
+ bool IsTooDeep() const {
+ return *depth_ >= kStackMaxDepth;
+ }
+
+ private:
+ int* const depth_;
+
+ DISALLOW_COPY_AND_ASSIGN(StackMarker);
+};
+
+} // namespace
+
+JSONParser::JSONParser(int options)
+ : options_(options),
+ start_pos_(NULL),
+ pos_(NULL),
+ end_pos_(NULL),
+ index_(0),
+ stack_depth_(0),
+ line_number_(0),
+ index_last_line_(0),
+ error_code_(JSONReader::JSON_NO_ERROR),
+ error_line_(0),
+ error_column_(0) {
+}
+
+JSONParser::~JSONParser() {
+}
+
+Value* JSONParser::Parse(const std::string& input) {
+ // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix
+ // <http://crbug.com/126107> when my Windows box arrives.
+#if defined(OS_WIN)
+ options_ |= JSON_DETACHABLE_CHILDREN;
+#endif
+
+ std::string input_copy;
+ // If the children of a JSON root can be detached, then hidden roots cannot
+ // be used, so do not bother copying the input because StringPiece will not
+ // be used anywhere.
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
+ input_copy = input;
+ start_pos_ = input_copy.data();
+ } else {
+ start_pos_ = input.data();
+ }
+ pos_ = start_pos_;
+ end_pos_ = start_pos_ + input.length();
+ index_ = 0;
+ line_number_ = 1;
+ index_last_line_ = 0;
+
+ error_code_ = JSONReader::JSON_NO_ERROR;
+ error_line_ = 0;
+ error_column_ = 0;
+
+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark
+ // <0xEF 0xBB 0xBF>, advance the start position to avoid the
+ // ParseNextToken function mis-treating a Unicode BOM as an invalid
+ // character and returning NULL.
+ if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
+ static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
+ static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
+ NextNChars(3);
+ }
+
+ // Parse the first and any nested tokens.
+ scoped_ptr<Value> root(ParseNextToken());
+ if (!root.get())
+ return NULL;
+
+ // Make sure the input stream is at an end.
+ if (GetNextToken() != T_END_OF_INPUT) {
+ if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
+ ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
+ return NULL;
+ }
+ }
+
+ // Dictionaries and lists can contain JSONStringValues, so wrap them in a
+ // hidden root.
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
+ if (root->IsType(Value::TYPE_DICTIONARY)) {
+ return new DictionaryHiddenRootValue(&input_copy, root.release());
+ } else if (root->IsType(Value::TYPE_LIST)) {
+ return new ListHiddenRootValue(&input_copy, root.release());
+ } else if (root->IsType(Value::TYPE_STRING)) {
+ // A string type could be a JSONStringValue, but because there's no
+ // corresponding HiddenRootValue, the memory will be lost. Deep copy to
+ // preserve it.
+ return root->DeepCopy();
+ }
+ }
+
+ // All other values can be returned directly.
+ return root.release();
+}
+
+JSONReader::JsonParseError JSONParser::error_code() const {
+ return error_code_;
+}
+
+std::string JSONParser::GetErrorMessage() const {
+ return FormatErrorMessage(error_line_, error_column_,
+ JSONReader::ErrorCodeToString(error_code_));
+}
+
+// StringBuilder ///////////////////////////////////////////////////////////////
+
+JSONParser::StringBuilder::StringBuilder()
+ : pos_(NULL),
+ length_(0),
+ string_(NULL) {
+}
+
+JSONParser::StringBuilder::StringBuilder(const char* pos)
+ : pos_(pos),
+ length_(0),
+ string_(NULL) {
+}
+
+void JSONParser::StringBuilder::Swap(StringBuilder* other) {
+ std::swap(other->string_, string_);
+ std::swap(other->pos_, pos_);
+ std::swap(other->length_, length_);
+}
+
+JSONParser::StringBuilder::~StringBuilder() {
+ delete string_;
+}
+
+void JSONParser::StringBuilder::Append(const char& c) {
+ DCHECK_GE(c, 0);
+ DCHECK_LT(c, 128);
+
+ if (string_)
+ string_->push_back(c);
+ else
+ ++length_;
+}
+
+void JSONParser::StringBuilder::AppendString(const std::string& str) {
+ DCHECK(string_);
+ string_->append(str);
+}
+
+void JSONParser::StringBuilder::Convert() {
+ if (string_)
+ return;
+ string_ = new std::string(pos_, length_);
+}
+
+bool JSONParser::StringBuilder::CanBeStringPiece() const {
+ return !string_;
+}
+
+StringPiece JSONParser::StringBuilder::AsStringPiece() {
+ if (string_)
+ return StringPiece();
+ return StringPiece(pos_, length_);
+}
+
+const std::string& JSONParser::StringBuilder::AsString() {
+ if (!string_)
+ Convert();
+ return *string_;
+}
+
+// JSONParser private //////////////////////////////////////////////////////////
+
+inline bool JSONParser::CanConsume(int length) {
+ return pos_ + length <= end_pos_;
+}
+
+const char* JSONParser::NextChar() {
+ DCHECK(CanConsume(1));
+ ++index_;
+ ++pos_;
+ return pos_;
+}
+
+void JSONParser::NextNChars(int n) {
+ DCHECK(CanConsume(n));
+ index_ += n;
+ pos_ += n;
+}
+
+JSONParser::Token JSONParser::GetNextToken() {
+ EatWhitespaceAndComments();
+ if (!CanConsume(1))
+ return T_END_OF_INPUT;
+
+ switch (*pos_) {
+ case '{':
+ return T_OBJECT_BEGIN;
+ case '}':
+ return T_OBJECT_END;
+ case '[':
+ return T_ARRAY_BEGIN;
+ case ']':
+ return T_ARRAY_END;
+ case '"':
+ return T_STRING;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '-':
+ return T_NUMBER;
+ case 't':
+ return T_BOOL_TRUE;
+ case 'f':
+ return T_BOOL_FALSE;
+ case 'n':
+ return T_NULL;
+ case ',':
+ return T_LIST_SEPARATOR;
+ case ':':
+ return T_OBJECT_PAIR_SEPARATOR;
+ default:
+ return T_INVALID_TOKEN;
+ }
+}
+
+void JSONParser::EatWhitespaceAndComments() {
+ while (pos_ < end_pos_) {
+ switch (*pos_) {
+ case '\r':
+ case '\n':
+ index_last_line_ = index_;
+ ++line_number_;
+ // Fall through.
+ case ' ':
+ case '\t':
+ NextChar();
+ break;
+ case '/':
+ if (!EatComment())
+ return;
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+bool JSONParser::EatComment() {
+ if (*pos_ != '/' || !CanConsume(1))
+ return false;
+
+ char next_char = *NextChar();
+ if (next_char == '/') {
+ // Single line comment, read to newline.
+ while (CanConsume(1)) {
+ char next_char = *NextChar();
+ if (next_char == '\n' || next_char == '\r')
+ return true;
+ }
+ } else if (next_char == '*') {
+ // Block comment, read until end marker.
+ while (CanConsume(2)) {
+ if (*NextChar() == '*' && *NextChar() == '/') {
+ // EatWhitespaceAndComments will inspect pos_, which will still be on
+ // the last / of the comment, so advance once more (which may also be
+ // end of input).
+ NextChar();
+ return true;
+ }
+ }
+
+ // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
+ }
+
+ return false;
+}
+
+Value* JSONParser::ParseNextToken() {
+ return ParseToken(GetNextToken());
+}
+
+Value* JSONParser::ParseToken(Token token) {
+ switch (token) {
+ case T_OBJECT_BEGIN:
+ return ConsumeDictionary();
+ case T_ARRAY_BEGIN:
+ return ConsumeList();
+ case T_STRING:
+ return ConsumeString();
+ case T_NUMBER:
+ return ConsumeNumber();
+ case T_BOOL_TRUE:
+ case T_BOOL_FALSE:
+ case T_NULL:
+ return ConsumeLiteral();
+ default:
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+}
+
+Value* JSONParser::ConsumeDictionary() {
+ if (*pos_ != '{') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+
+ StackMarker depth_check(&stack_depth_);
+ if (depth_check.IsTooDeep()) {
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
+ return NULL;
+ }
+
+ scoped_ptr<DictionaryValue> dict(new DictionaryValue);
+
+ NextChar();
+ Token token = GetNextToken();
+ while (token != T_OBJECT_END) {
+ if (token != T_STRING) {
+ ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
+ return NULL;
+ }
+
+ // First consume the key.
+ StringBuilder key;
+ if (!ConsumeStringRaw(&key)) {
+ return NULL;
+ }
+
+ // Read the separator.
+ NextChar();
+ token = GetNextToken();
+ if (token != T_OBJECT_PAIR_SEPARATOR) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+
+ // The next token is the value. Ownership transfers to |dict|.
+ NextChar();
+ Value* value = ParseNextToken();
+ if (!value) {
+ // ReportError from deeper level.
+ return NULL;
+ }
+
+ dict->SetWithoutPathExpansion(key.AsString(), value);
+
+ NextChar();
+ token = GetNextToken();
+ if (token == T_LIST_SEPARATOR) {
+ NextChar();
+ token = GetNextToken();
+ if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
+ return NULL;
+ }
+ } else if (token != T_OBJECT_END) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
+ return NULL;
+ }
+ }
+
+ if (token != T_OBJECT_END)
+ return NULL;
+
+ return dict.release();
+}
+
+Value* JSONParser::ConsumeList() {
+ if (*pos_ != '[') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+
+ StackMarker depth_check(&stack_depth_);
+ if (depth_check.IsTooDeep()) {
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
+ return NULL;
+ }
+
+ scoped_ptr<ListValue> list(new ListValue);
+
+ NextChar();
+ Token token = GetNextToken();
+ while (token != T_ARRAY_END) {
+ Value* item = ParseToken(token);
+ if (!item) {
+ // ReportError from deeper level.
+ return NULL;
+ }
+
+ list->Append(item);
+
+ NextChar();
+ token = GetNextToken();
+ if (token == T_LIST_SEPARATOR) {
+ NextChar();
+ token = GetNextToken();
+ if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
+ return NULL;
+ }
+ } else if (token != T_ARRAY_END) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ }
+
+ if (token != T_ARRAY_END)
+ return NULL;
+
+ return list.release();
+}
+
+Value* JSONParser::ConsumeString() {
+ StringBuilder string;
+ if (!ConsumeStringRaw(&string))
+ return NULL;
+
+ // Create the Value representation, using a hidden root, if configured
+ // to do so, and if the string can be represented by StringPiece.
+ if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
+ return new JSONStringValue(string.AsStringPiece());
+ } else {
+ if (string.CanBeStringPiece())
+ string.Convert();
+ return new StringValue(string.AsString());
+ }
+}
+
+bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
+ if (*pos_ != '"') {
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return false;
+ }
+
+ // StringBuilder will internally build a StringPiece unless a UTF-16
+ // conversion occurs, at which point it will perform a copy into a
+ // std::string.
+ StringBuilder string(NextChar());
+
+ int length = end_pos_ - start_pos_;
+ int32 next_char = 0;
+
+ while (CanConsume(1)) {
+ pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
+ CBU8_NEXT(start_pos_, index_, length, next_char);
+ if (next_char < 0 || !IsValidCharacter(next_char)) {
+ ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
+ return false;
+ }
+
+ // If this character is an escape sequence...
+ if (next_char == '\\') {
+ // The input string will be adjusted (either by combining the two
+ // characters of an encoded escape sequence, or with a UTF conversion),
+ // so using StringPiece isn't possible -- force a conversion.
+ string.Convert();
+
+ if (!CanConsume(1)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+
+ switch (*NextChar()) {
+ // Allowed esape sequences:
+ case 'x': { // UTF-8 sequence.
+ // UTF-8 \x escape sequences are not allowed in the spec, but they
+ // are supported here for backwards-compatiblity with the old parser.
+ if (!CanConsume(2)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
+ return false;
+ }
+
+ int hex_digit = 0;
+ if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
+ return false;
+ }
+ NextChar();
+
+ if (hex_digit < kExtendedASCIIStart)
+ string.Append(hex_digit);
+ else
+ DecodeUTF8(hex_digit, &string);
+ break;
+ }
+ case 'u': { // UTF-16 sequence.
+ // UTF units are of the form \uXXXX.
+ if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+
+ // Skip the 'u'.
+ NextChar();
+
+ std::string utf8_units;
+ if (!DecodeUTF16(&utf8_units)) {
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
+ return false;
+ }
+
+ string.AppendString(utf8_units);
+ break;
+ }
+ case '"':
+ string.Append('"');
+ break;
+ case '\\':
+ string.Append('\\');
+ break;
+ case '/':
+ string.Append('/');
+ break;
+ case 'b':
+ string.Append('\b');
+ break;
+ case 'f':
+ string.Append('\f');
+ break;
+ case 'n':
+ string.Append('\n');
+ break;
+ case 'r':
+ string.Append('\r');
+ break;
+ case 't':
+ string.Append('\t');
+ break;
+ case 'v': // Not listed as valid escape sequence in the RFC.
+ string.Append('\v');
+ break;
+ // All other escape squences are illegal.
+ default:
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
+ return false;
+ }
+ } else if (next_char == '"') {
+ --index_; // Rewind by one because of CBU8_NEXT.
+ out->Swap(&string);
+ return true;
+ } else {
+ if (next_char < kExtendedASCIIStart)
+ string.Append(next_char);
+ else
+ DecodeUTF8(next_char, &string);
+ }
+ }
+
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
+ return false;
+}
+
+// Entry is at the first X in \uXXXX.
+bool JSONParser::DecodeUTF16(std::string* dest_string) {
+ if (!CanConsume(4))
+ return false;
+
+ // This is a 32-bit field because the shift operations in the
+ // conversion process below cause MSVC to error about "data loss."
+ // This only stores UTF-16 code units, though.
+ // Consume the UTF-16 code unit, which may be a high surrogate.
+ int code_unit16_high = 0;
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
+ return false;
+
+ // Only add 3, not 4, because at the end of this iteration, the parser has
+ // finished working with the last digit of the UTF sequence, meaning that
+ // the next iteration will advance to the next byte.
+ NextNChars(3);
+
+ // Used to convert the UTF-16 code units to a code point and then to a UTF-8
+ // code unit sequence.
+ char code_unit8[8] = { 0 };
+ size_t offset = 0;
+
+ // If this is a high surrogate, consume the next code unit to get the
+ // low surrogate.
+ if (CBU16_IS_SURROGATE(code_unit16_high)) {
+ // Make sure this is the high surrogate. If not, it's an encoding
+ // error.
+ if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
+ return false;
+
+ // Make sure that the token has more characters to consume the
+ // lower surrogate.
+ if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
+ return false;
+ if (*NextChar() != '\\' || *NextChar() != 'u')
+ return false;
+
+ NextChar(); // Read past 'u'.
+ int code_unit16_low = 0;
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
+ return false;
+
+ NextNChars(3);
+
+ if (!CBU16_IS_TRAIL(code_unit16_low)) {
+ return false;
+ }
+
+ uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
+ code_unit16_low);
+ offset = 0;
+ CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
+ } else {
+ // Not a surrogate.
+ DCHECK(CBU16_IS_SINGLE(code_unit16_high));
+ CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
+ }
+
+ dest_string->append(code_unit8);
+ return true;
+}
+
+void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
+ // Anything outside of the basic ASCII plane will need to be decoded from
+ // int32 to a multi-byte sequence.
+ if (point < kExtendedASCIIStart) {
+ dest->Append(point);
+ } else {
+ char utf8_units[4] = { 0 };
+ int offset = 0;
+ CBU8_APPEND_UNSAFE(utf8_units, offset, point);
+ dest->Convert();
+ dest->AppendString(utf8_units);
+ }
+}
+
+Value* JSONParser::ConsumeNumber() {
+ const char* num_start = pos_;
+ const int start_index = index_;
+ int end_index = start_index;
+
+ if (*pos_ == '-')
+ NextChar();
+
+ if (!ReadInt(false)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+
+ // The optional fraction part.
+ if (*pos_ == '.') {
+ if (!CanConsume(1)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextChar();
+ if (!ReadInt(true)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+ }
+
+ // Optional exponent part.
+ if (*pos_ == 'e' || *pos_ == 'E') {
+ NextChar();
+ if (*pos_ == '-' || *pos_ == '+')
+ NextChar();
+ if (!ReadInt(true)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ end_index = index_;
+ }
+
+ // ReadInt is greedy because numbers have no easily detectable sentinel,
+ // so save off where the parser should be on exit (see Consume invariant at
+ // the top of the header), then make sure the next token is one which is
+ // valid.
+ const char* exit_pos = pos_ - 1;
+ int exit_index = index_ - 1;
+
+ switch (GetNextToken()) {
+ case T_OBJECT_END:
+ case T_ARRAY_END:
+ case T_LIST_SEPARATOR:
+ case T_END_OF_INPUT:
+ break;
+ default:
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+
+ pos_ = exit_pos;
+ index_ = exit_index;
+
+ StringPiece num_string(num_start, end_index - start_index);
+
+ int num_int;
+ if (StringToInt(num_string, &num_int))
+ return Value::CreateIntegerValue(num_int);
+
+ double num_double;
+ if (base::StringToDouble(num_string.as_string(), &num_double) &&
+ IsFinite(num_double)) {
+ return Value::CreateDoubleValue(num_double);
+ }
+
+ return NULL;
+}
+
+bool JSONParser::ReadInt(bool allow_leading_zeros) {
+ char first = *pos_;
+ int len = 0;
+
+ char c = first;
+ while (CanConsume(1) && IsAsciiDigit(c)) {
+ c = *NextChar();
+ ++len;
+ }
+
+ if (len == 0)
+ return false;
+
+ if (!allow_leading_zeros && len > 1 && first == '0')
+ return false;
+
+ return true;
+}
+
+Value* JSONParser::ConsumeLiteral() {
+ switch (*pos_) {
+ case 't': {
+ const char* kTrueLiteral = "true";
+ const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
+ if (!CanConsume(kTrueLen - 1) ||
+ !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kTrueLen - 1);
+ return Value::CreateBooleanValue(true);
+ }
+ case 'f': {
+ const char* kFalseLiteral = "false";
+ const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
+ if (!CanConsume(kFalseLen - 1) ||
+ !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kFalseLen - 1);
+ return Value::CreateBooleanValue(false);
+ }
+ case 'n': {
+ const char* kNullLiteral = "null";
+ const int kNullLen = static_cast<int>(strlen(kNullLiteral));
+ if (!CanConsume(kNullLen - 1) ||
+ !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
+ return NULL;
+ }
+ NextNChars(kNullLen - 1);
+ return Value::CreateNullValue();
+ }
+ default:
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
+ return NULL;
+ }
+}
+
+// static
+bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
+ return strncmp(one, two, len) == 0;
+}
+
+void JSONParser::ReportError(JSONReader::JsonParseError code,
+ int column_adjust) {
+ error_code_ = code;
+ error_line_ = line_number_;
+ error_column_ = index_ - index_last_line_ + column_adjust;
+}
+
+// static
+std::string JSONParser::FormatErrorMessage(int line, int column,
+ const std::string& description) {
+ if (line || column) {
+ return StringPrintf("Line: %i, column: %i, %s",
+ line, column, description.c_str());
+ }
+ return description;
+}
+
+} // namespace internal
+} // namespace base
diff --git a/base/json/json_parser.h b/base/json/json_parser.h
new file mode 100644
index 0000000..901e679
--- /dev/null
+++ b/base/json/json_parser.h
@@ -0,0 +1,273 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_JSON_JSON_PARSER_H_
+#define BASE_JSON_JSON_PARSER_H_
+#pragma once
+
+#include <string>
+
+#include "base/base_export.h"
+#include "base/basictypes.h"
+#include "base/compiler_specific.h"
+#include "base/json/json_reader.h"
+#include "base/string_piece.h"
+
+#if !defined(OS_CHROMEOS)
+#include "base/gtest_prod_util.h"
+#endif
+
+namespace base {
+class Value;
+}
+
+#if defined(OS_CHROMEOS)
+// Chromium and Chromium OS check out gtest to different places, so this is
+// unable to compile on both if gtest_prod.h is included here. Instead, include
+// its only contents -- this will need to be updated if the macro ever changes.
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
+ FRIEND_TEST(test_case_name, test_name); \
+ FRIEND_TEST(test_case_name, DISABLED_##test_name); \
+ FRIEND_TEST(test_case_name, FLAKY_##test_name); \
+ FRIEND_TEST(test_case_name, FAILS_##test_name)
+#endif // OS_CHROMEOS
+
+namespace base {
+namespace internal {
+
+class JSONParserTest;
+
+// The implementation behind the JSONReader interface. This class is not meant
+// to be used directly; it encapsulates logic that need not be exposed publicly.
+//
+// This parser guarantees O(n) time through the input string. It also optimizes
+// base::StringValue by using StringPiece where possible when returning Value
+// objects by using "hidden roots," discussed in the implementation.
+//
+// Iteration happens on the byte level, with the functions CanConsume and
+// NextChar. The conversion from byte to JSON token happens without advancing
+// the parser in GetNextToken/ParseToken, that is tokenization operates on
+// the current parser position without advancing.
+//
+// Built on top of these are a family of Consume functions that iterate
+// internally. Invariant: on entry of a Consume function, the parser is wound
+// to the first byte of a valid JSON token. On exit, it is on the last byte
+// of a token, such that the next iteration of the parser will be at the byte
+// immediately following the token, which would likely be the first byte of the
+// next token.
+class BASE_EXPORT_PRIVATE JSONParser {
+ public:
+ explicit JSONParser(int options);
+ ~JSONParser();
+
+ // Parses the input string according to the set options and returns the
+ // result as a Value owned by the caller.
+ Value* Parse(const std::string& input);
+
+ // Returns the error code.
+ JSONReader::JsonParseError error_code() const;
+
+ // Returns the human-friendly error message.
+ std::string GetErrorMessage() const;
+
+ private:
+ enum Token {
+ T_OBJECT_BEGIN, // {
+ T_OBJECT_END, // }
+ T_ARRAY_BEGIN, // [
+ T_ARRAY_END, // ]
+ T_STRING,
+ T_NUMBER,
+ T_BOOL_TRUE, // true
+ T_BOOL_FALSE, // false
+ T_NULL, // null
+ T_LIST_SEPARATOR, // ,
+ T_OBJECT_PAIR_SEPARATOR, // :
+ T_END_OF_INPUT,
+ T_INVALID_TOKEN,
+ };
+
+ // A helper class used for parsing strings. One optimization performed is to
+ // create base::Value with a StringPiece to avoid unnecessary std::string
+ // copies. This is not possible if the input string needs to be decoded from
+ // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
+ // This class centralizes that logic.
+ class StringBuilder {
+ public:
+ // Empty constructor. Used for creating a builder with which to Swap().
+ StringBuilder();
+
+ // |pos| is the beginning of an input string, excluding the |"|.
+ explicit StringBuilder(const char* pos);
+
+ ~StringBuilder();
+
+ // Swaps the contents of |other| with this.
+ void Swap(StringBuilder* other);
+
+ // Either increases the |length_| of the string or copies the character if
+ // the StringBuilder has been converted. |c| must be in the basic ASCII
+ // plane; all other characters need to be in UTF-8 units, appended with
+ // AppendString below.
+ void Append(const char& c);
+
+ // Appends a string to the std::string. Must be Convert()ed to use.
+ void AppendString(const std::string& str);
+
+ // Converts the builder from its default StringPiece to a full std::string,
+ // performing a copy. Once a builder is converted, it cannot be made a
+ // StringPiece again.
+ void Convert();
+
+ // Returns whether the builder can be converted to a StringPiece.
+ bool CanBeStringPiece() const;
+
+ // Returns the StringPiece representation. Returns an empty piece if it
+ // cannot be converted.
+ StringPiece AsStringPiece();
+
+ // Returns the builder as a std::string.
+ const std::string& AsString();
+
+ private:
+ // The beginning of the input string.
+ const char* pos_;
+
+ // Number of bytes in |pos_| that make up the string being built.
+ size_t length_;
+
+ // The copied string representation. NULL until Convert() is called.
+ // Strong. scoped_ptr<T> has too much of an overhead here.
+ std::string* string_;
+ };
+
+ // Quick check that the stream has capacity to consume |length| more bytes.
+ bool CanConsume(int length);
+
+ // The basic way to consume a single character in the stream. Consumes one
+ // byte of the input stream and returns a pointer to the rest of it.
+ const char* NextChar();
+
+ // Performs the equivalent of NextChar N times.
+ void NextNChars(int n);
+
+ // Skips over whitespace and comments to find the next token in the stream.
+ // This does not advance the parser for non-whitespace or comment chars.
+ Token GetNextToken();
+
+ // Consumes whitespace characters and comments until the next non-that is
+ // encountered.
+ void EatWhitespaceAndComments();
+ // Helper function that consumes a comment, assuming that the parser is
+ // currently wound to a '/'.
+ bool EatComment();
+
+ // Calls GetNextToken() and then ParseToken(). Caller owns the result.
+ Value* ParseNextToken();
+
+ // Takes a token that represents the start of a Value ("a structural token"
+ // in RFC terms) and consumes it, returning the result as an object the
+ // caller owns.
+ Value* ParseToken(Token token);
+
+ // Assuming that the parser is currently wound to '{', this parses a JSON
+ // object into a DictionaryValue.
+ Value* ConsumeDictionary();
+
+ // Assuming that the parser is wound to '[', this parses a JSON list into a
+ // ListValue.
+ Value* ConsumeList();
+
+ // Calls through ConsumeStringRaw and wraps it in a value.
+ Value* ConsumeString();
+
+ // Assuming that the parser is wound to a double quote, this parses a string,
+ // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
+ // success and Swap()s the result into |out|. Returns false on failure with
+ // error information set.
+ bool ConsumeStringRaw(StringBuilder* out);
+ // Helper function for ConsumeStringRaw() that consumes the next four or 10
+ // bytes (parser is wound to the first character of a HEX sequence, with the
+ // potential for consuming another \uXXXX for a surrogate). Returns true on
+ // success and places the UTF8 code units in |dest_string|, and false on
+ // failure.
+ bool DecodeUTF16(std::string* dest_string);
+ // Helper function for ConsumeStringRaw() that takes a single code point,
+ // decodes it into UTF-8 units, and appends it to the given builder. The
+ // point must be valid.
+ void DecodeUTF8(const int32& point, StringBuilder* dest);
+
+ // Assuming that the parser is wound to the start of a valid JSON number,
+ // this parses and converts it to either an int or double value.
+ Value* ConsumeNumber();
+ // Helper that reads characters that are ints. Returns true if a number was
+ // read and false on error.
+ bool ReadInt(bool allow_leading_zeros);
+
+ // Consumes the literal values of |true|, |false|, and |null|, assuming the
+ // parser is wound to the first character of any of those.
+ Value* ConsumeLiteral();
+
+ // Compares two string buffers of a given length.
+ static bool StringsAreEqual(const char* left, const char* right, size_t len);
+
+ // Sets the error information to |code| at the current column, based on
+ // |index_| and |index_last_line_|, with an optional positive/negative
+ // adjustment by |column_adjust|.
+ void ReportError(JSONReader::JsonParseError code, int column_adjust);
+
+ // Given the line and column number of an error, formats one of the error
+ // message contants from json_reader.h for human display.
+ static std::string FormatErrorMessage(int line, int column,
+ const std::string& description);
+
+ // base::JSONParserOptions that control parsing.
+ int options_;
+
+ // Pointer to the start of the input data.
+ const char* start_pos_;
+
+ // Pointer to the current position in the input data. Equivalent to
+ // |start_pos_ + index_|.
+ const char* pos_;
+
+ // Pointer to the last character of the input data.
+ const char* end_pos_;
+
+ // The index in the input stream to which the parser is wound.
+ int index_;
+
+ // The number of times the parser has recursed (current stack depth).
+ int stack_depth_;
+
+ // The line number that the parser is at currently.
+ int line_number_;
+
+ // The last value of |index_| on the previous line.
+ int index_last_line_;
+
+ // Error information.
+ JSONReader::JsonParseError error_code_;
+ int error_line_;
+ int error_column_;
+
+ friend class JSONParserTest;
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
+ FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
+
+ DISALLOW_COPY_AND_ASSIGN(JSONParser);
+};
+
+} // namespace internal
+} // namespace base
+
+#endif // BASE_JSON_JSON_PARSER_H_
diff --git a/base/json/json_parser_unittest.cc b/base/json/json_parser_unittest.cc
new file mode 100644
index 0000000..206ef4c
--- /dev/null
+++ b/base/json/json_parser_unittest.cc
@@ -0,0 +1,293 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/json/json_parser.h"
+
+#include "base/json/json_reader.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/values.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace base {
+namespace internal {
+
+class JSONParserTest : public testing::Test {
+ public:
+ JSONParser* NewTestParser(const std::string& input) {
+ JSONParser* parser = new JSONParser(JSON_PARSE_RFC);
+ parser->start_pos_ = input.data();
+ parser->pos_ = parser->start_pos_;
+ parser->end_pos_ = parser->start_pos_ + input.length();
+ return parser;
+ }
+
+ void TestLastThree(JSONParser* parser) {
+ EXPECT_EQ(',', *parser->NextChar());
+ EXPECT_EQ('|', *parser->NextChar());
+ EXPECT_EQ('\0', *parser->NextChar());
+ EXPECT_EQ(parser->end_pos_, parser->pos_);
+ }
+};
+
+TEST_F(JSONParserTest, NextChar) {
+ std::string input("Hello world");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+
+ EXPECT_EQ('H', *parser->pos_);
+ for (size_t i = 1; i < input.length(); ++i) {
+ EXPECT_EQ(input[i], *parser->NextChar());
+ }
+ EXPECT_EQ(parser->end_pos_, parser->NextChar());
+}
+
+TEST_F(JSONParserTest, ConsumeString) {
+ std::string input("\"test\",|");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+ scoped_ptr<Value> value(parser->ConsumeString());
+ EXPECT_EQ('"', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ std::string str;
+ EXPECT_TRUE(value->GetAsString(&str));
+ EXPECT_EQ("test", str);
+}
+
+TEST_F(JSONParserTest, ConsumeList) {
+ std::string input("[true, false],|");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+ scoped_ptr<Value> value(parser->ConsumeList());
+ EXPECT_EQ(']', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ base::ListValue* list;
+ EXPECT_TRUE(value->GetAsList(&list));
+ EXPECT_EQ(2u, list->GetSize());
+}
+
+TEST_F(JSONParserTest, ConsumeDictionary) {
+ std::string input("{\"abc\":\"def\"},|");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+ scoped_ptr<Value> value(parser->ConsumeDictionary());
+ EXPECT_EQ('}', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ base::DictionaryValue* dict;
+ EXPECT_TRUE(value->GetAsDictionary(&dict));
+ std::string str;
+ EXPECT_TRUE(dict->GetString("abc", &str));
+ EXPECT_EQ("def", str);
+}
+
+TEST_F(JSONParserTest, ConsumeLiterals) {
+ // Literal |true|.
+ std::string input("true,|");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+ scoped_ptr<Value> value(parser->ConsumeLiteral());
+ EXPECT_EQ('e', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ bool bool_value = false;
+ EXPECT_TRUE(value->GetAsBoolean(&bool_value));
+ EXPECT_TRUE(bool_value);
+
+ // Literal |false|.
+ input = "false,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeLiteral());
+ EXPECT_EQ('e', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->GetAsBoolean(&bool_value));
+ EXPECT_FALSE(bool_value);
+
+ // Literal |null|.
+ input = "null,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeLiteral());
+ EXPECT_EQ('l', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->IsType(Value::TYPE_NULL));
+}
+
+TEST_F(JSONParserTest, ConsumeNumbers) {
+ // Integer.
+ std::string input("1234,|");
+ scoped_ptr<JSONParser> parser(NewTestParser(input));
+ scoped_ptr<Value> value(parser->ConsumeNumber());
+ EXPECT_EQ('4', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ int number_i;
+ EXPECT_TRUE(value->GetAsInteger(&number_i));
+ EXPECT_EQ(1234, number_i);
+
+ // Negative integer.
+ input = "-1234,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeNumber());
+ EXPECT_EQ('4', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->GetAsInteger(&number_i));
+ EXPECT_EQ(-1234, number_i);
+
+ // Double.
+ input = "12.34,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeNumber());
+ EXPECT_EQ('4', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ double number_d;
+ EXPECT_TRUE(value->GetAsDouble(&number_d));
+ EXPECT_EQ(12.34, number_d);
+
+ // Scientific.
+ input = "42e3,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeNumber());
+ EXPECT_EQ('3', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->GetAsDouble(&number_d));
+ EXPECT_EQ(42000, number_d);
+
+ // Negative scientific.
+ input = "314159e-5,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeNumber());
+ EXPECT_EQ('5', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->GetAsDouble(&number_d));
+ EXPECT_EQ(3.14159, number_d);
+
+ // Positive scientific.
+ input = "0.42e+3,|";
+ parser.reset(NewTestParser(input));
+ value.reset(parser->ConsumeNumber());
+ EXPECT_EQ('3', *parser->pos_);
+
+ TestLastThree(parser.get());
+
+ ASSERT_TRUE(value.get());
+ EXPECT_TRUE(value->GetAsDouble(&number_d));
+ EXPECT_EQ(420, number_d);
+}
+
+TEST_F(JSONParserTest, ErrorMessages) {
+ // Error strings should not be modified in case of success.
+ std::string error_message;
+ int error_code = 0;
+ scoped_ptr<Value> root;
+ root.reset(JSONReader::ReadAndReturnError("[42]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_TRUE(error_message.empty());
+ EXPECT_EQ(0, error_code);
+
+ // Test line and column counting
+ const char* big_json = "[\n0,\n1,\n2,\n3,4,5,6 7,\n8,\n9\n]";
+ // error here ---------------------------------^
+ root.reset(JSONReader::ReadAndReturnError(big_json, JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(5, 10, JSONReader::kSyntaxError),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code);
+
+ // Test each of the error conditions
+ root.reset(JSONReader::ReadAndReturnError("{},{}", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 3,
+ JSONReader::kUnexpectedDataAfterRoot), error_message);
+ EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, error_code);
+
+ std::string nested_json;
+ for (int i = 0; i < 101; ++i) {
+ nested_json.insert(nested_json.begin(), '[');
+ nested_json.append(1, ']');
+ }
+ root.reset(JSONReader::ReadAndReturnError(nested_json, JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 100, JSONReader::kTooMuchNesting),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_TOO_MUCH_NESTING, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("[1,]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 4, JSONReader::kTrailingComma),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_TRAILING_COMMA, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("{foo:\"bar\"}", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 2,
+ JSONReader::kUnquotedDictionaryKey), error_message);
+ EXPECT_EQ(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("{\"foo\":\"bar\",}",
+ JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 14, JSONReader::kTrailingComma),
+ error_message);
+
+ root.reset(JSONReader::ReadAndReturnError("[nu]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 2, JSONReader::kSyntaxError),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("[\"xxx\\xq\"]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("[\"xxx\\uq\"]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+
+ root.reset(JSONReader::ReadAndReturnError("[\"xxx\\q\"]", JSON_PARSE_RFC,
+ &error_code, &error_message));
+ EXPECT_FALSE(root.get());
+ EXPECT_EQ(JSONParser::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
+ error_message);
+ EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+}
+
+} // namespace internal
+} // namespace base
diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc
index 3457478..fb1459b 100644
--- a/base/json/json_reader.cc
+++ b/base/json/json_reader.cc
@@ -4,73 +4,17 @@
#include "base/json/json_reader.h"
-#include "base/float_util.h"
+#include "base/json/json_parser.h"
#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
-#include "base/stringprintf.h"
-#include "base/string_number_conversions.h"
-#include "base/string_piece.h"
-#include "base/string_util.h"
-#include "base/third_party/icu/icu_utf.h"
-#include "base/utf_string_conversions.h"
-#include "base/values.h"
-
-namespace {
-
-const char kNullString[] = "null";
-const char kTrueString[] = "true";
-const char kFalseString[] = "false";
-
-const int kStackLimit = 100;
-
-// A helper method for ParseNumberToken. It reads an int from the end of
-// token. The method returns false if there is no valid integer at the end of
-// the token.
-bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) {
- char first = token.NextChar();
- int len = 0;
-
- // Read in more digits.
- char c = first;
- while ('\0' != c && IsAsciiDigit(c)) {
- ++token.length;
- ++len;
- c = token.NextChar();
- }
- // We need at least 1 digit.
- if (len == 0)
- return false;
-
- if (!can_have_leading_zeros && len > 1 && '0' == first)
- return false;
-
- return true;
-}
-
-// A helper method for ParseStringToken. It reads |digits| hex digits from the
-// token. If the sequence if digits is not valid (contains other characters),
-// the method returns false.
-bool ReadHexDigits(base::JSONReader::Token& token, int digits) {
- for (int i = 1; i <= digits; ++i) {
- char c = *(token.begin + token.length + i);
- if (c == '\0' || !IsHexDigit(c))
- return false;
- }
-
- token.length += digits;
- return true;
-}
-
-} // namespace
namespace base {
-const char* JSONReader::kBadRootElementType =
- "Root value must be an array or object.";
const char* JSONReader::kInvalidEscape =
"Invalid escape sequence.";
const char* JSONReader::kSyntaxError =
"Syntax error.";
+const char* JSONReader::kUnexpectedToken =
+ "Unexpected token.";
const char* JSONReader::kTrailingComma =
"Trailing comma not allowed.";
const char* JSONReader::kTooMuchNesting =
@@ -83,24 +27,27 @@ const char* JSONReader::kUnquotedDictionaryKey =
"Dictionary keys must be quoted.";
JSONReader::JSONReader()
- : start_pos_(NULL),
- json_pos_(NULL),
- end_pos_(NULL),
- stack_depth_(0),
- allow_trailing_comma_(false),
- error_code_(JSON_NO_ERROR),
- error_line_(0),
- error_col_(0) {}
+ : parser_(new internal::JSONParser(JSON_PARSE_RFC)) {
+}
+
+JSONReader::JSONReader(int options)
+ : parser_(new internal::JSONParser(options)) {
+}
+
+JSONReader::~JSONReader() {
+}
// static
Value* JSONReader::Read(const std::string& json) {
- return Read(json, JSON_PARSE_RFC);
+ internal::JSONParser parser(JSON_PARSE_RFC);
+ return parser.Parse(json);
}
// static
Value* JSONReader::Read(const std::string& json,
int options) {
- return ReadAndReturnError(json, options, NULL, NULL);
+ internal::JSONParser parser(options);
+ return parser.Parse(json);
}
// static
@@ -108,16 +55,15 @@ Value* JSONReader::ReadAndReturnError(const std::string& json,
int options,
int* error_code_out,
std::string* error_msg_out) {
- JSONReader reader = JSONReader();
- Value* root = reader.JsonToValue(json, false,
- (options & JSON_ALLOW_TRAILING_COMMAS) != 0);
+ internal::JSONParser parser(options);
+ Value* root = parser.Parse(json);
if (root)
return root;
if (error_code_out)
- *error_code_out = reader.error_code();
+ *error_code_out = parser.error_code();
if (error_msg_out)
- *error_msg_out = reader.GetErrorMessage();
+ *error_msg_out = parser.GetErrorMessage();
return NULL;
}
@@ -127,12 +73,12 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) {
switch (error_code) {
case JSON_NO_ERROR:
return std::string();
- case JSON_BAD_ROOT_ELEMENT_TYPE:
- return kBadRootElementType;
case JSON_INVALID_ESCAPE:
return kInvalidEscape;
case JSON_SYNTAX_ERROR:
return kSyntaxError;
+ case JSON_UNEXPECTED_TOKEN:
+ return kUnexpectedToken;
case JSON_TRAILING_COMMA:
return kTrailingComma;
case JSON_TOO_MUCH_NESTING:
@@ -149,586 +95,16 @@ std::string JSONReader::ErrorCodeToString(JsonParseError error_code) {
}
}
-std::string JSONReader::GetErrorMessage() const {
- return FormatErrorMessage(error_line_, error_col_,
- ErrorCodeToString(error_code_));
-}
-
-Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
- bool allow_trailing_comma) {
- // The input must be in UTF-8.
- if (!IsStringUTF8(json.data())) {
- error_code_ = JSON_UNSUPPORTED_ENCODING;
- return NULL;
- }
-
- start_pos_ = json.data();
- end_pos_ = start_pos_ + json.size();
-
- // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)
- // or <0xEF 0xBB 0xBF>, advance the start position to avoid the
- // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an
- // invalid character and returning NULL.
- if (json.size() >= 3 && static_cast<uint8>(start_pos_[0]) == 0xEF &&
- static_cast<uint8>(start_pos_[1]) == 0xBB &&
- static_cast<uint8>(start_pos_[2]) == 0xBF) {
- start_pos_ += 3;
- }
-
- json_pos_ = start_pos_;
- allow_trailing_comma_ = allow_trailing_comma;
- stack_depth_ = 0;
- error_code_ = JSON_NO_ERROR;
-
- scoped_ptr<Value> root(BuildValue(check_root));
- if (root.get()) {
- if (ParseToken().type == Token::END_OF_INPUT) {
- return root.release();
- } else {
- SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_);
- }
- }
-
- // Default to calling errors "syntax errors".
- if (error_code_ == 0)
- SetErrorCode(JSON_SYNTAX_ERROR, json_pos_);
-
- return NULL;
-}
-
-// static
-std::string JSONReader::FormatErrorMessage(int line, int column,
- const std::string& description) {
- if (line || column) {
- return base::StringPrintf(
- "Line: %i, column: %i, %s", line, column, description.c_str());
- }
- return description;
+Value* JSONReader::ReadToValue(const std::string& json) {
+ return parser_->Parse(json);
}
-Value* JSONReader::BuildValue(bool is_root) {
- ++stack_depth_;
- if (stack_depth_ > kStackLimit) {
- SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_);
- return NULL;
- }
-
- Token token = ParseToken();
- // The root token must be an array or an object.
- if (is_root && token.type != Token::OBJECT_BEGIN &&
- token.type != Token::ARRAY_BEGIN) {
- SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_);
- return NULL;
- }
-
- scoped_ptr<Value> node;
-
- switch (token.type) {
- case Token::END_OF_INPUT:
- case Token::INVALID_TOKEN:
- return NULL;
-
- case Token::NULL_TOKEN:
- node.reset(Value::CreateNullValue());
- break;
-
- case Token::BOOL_TRUE:
- node.reset(Value::CreateBooleanValue(true));
- break;
-
- case Token::BOOL_FALSE:
- node.reset(Value::CreateBooleanValue(false));
- break;
-
- case Token::NUMBER:
- node.reset(DecodeNumber(token));
- if (!node.get())
- return NULL;
- break;
-
- case Token::STRING:
- node.reset(DecodeString(token));
- if (!node.get())
- return NULL;
- break;
-
- case Token::ARRAY_BEGIN:
- {
- json_pos_ += token.length;
- token = ParseToken();
-
- node.reset(new ListValue());
- while (token.type != Token::ARRAY_END) {
- Value* array_node = BuildValue(false);
- if (!array_node)
- return NULL;
- static_cast<ListValue*>(node.get())->Append(array_node);
-
- // After a list value, we expect a comma or the end of the list.
- token = ParseToken();
- if (token.type == Token::LIST_SEPARATOR) {
- json_pos_ += token.length;
- token = ParseToken();
- // Trailing commas are invalid according to the JSON RFC, but some
- // consumers need the parsing leniency, so handle accordingly.
- if (token.type == Token::ARRAY_END) {
- if (!allow_trailing_comma_) {
- SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
- return NULL;
- }
- // Trailing comma OK, stop parsing the Array.
- break;
- }
- } else if (token.type != Token::ARRAY_END) {
- // Unexpected value after list value. Bail out.
- return NULL;
- }
- }
- if (token.type != Token::ARRAY_END) {
- return NULL;
- }
- break;
- }
-
- case Token::OBJECT_BEGIN:
- {
- json_pos_ += token.length;
- token = ParseToken();
-
- node.reset(new DictionaryValue);
- while (token.type != Token::OBJECT_END) {
- if (token.type != Token::STRING) {
- SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_);
- return NULL;
- }
- scoped_ptr<Value> dict_key_value(DecodeString(token));
- if (!dict_key_value.get())
- return NULL;
-
- // Convert the key into a wstring.
- std::string dict_key;
- bool success = dict_key_value->GetAsString(&dict_key);
- DCHECK(success);
-
- json_pos_ += token.length;
- token = ParseToken();
- if (token.type != Token::OBJECT_PAIR_SEPARATOR)
- return NULL;
-
- json_pos_ += token.length;
- token = ParseToken();
- Value* dict_value = BuildValue(false);
- if (!dict_value)
- return NULL;
- static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion(
- dict_key, dict_value);
-
- // After a key/value pair, we expect a comma or the end of the
- // object.
- token = ParseToken();
- if (token.type == Token::LIST_SEPARATOR) {
- json_pos_ += token.length;
- token = ParseToken();
- // Trailing commas are invalid according to the JSON RFC, but some
- // consumers need the parsing leniency, so handle accordingly.
- if (token.type == Token::OBJECT_END) {
- if (!allow_trailing_comma_) {
- SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
- return NULL;
- }
- // Trailing comma OK, stop parsing the Object.
- break;
- }
- } else if (token.type != Token::OBJECT_END) {
- // Unexpected value after last object value. Bail out.
- return NULL;
- }
- }
- if (token.type != Token::OBJECT_END)
- return NULL;
-
- break;
- }
-
- default:
- // We got a token that's not a value.
- return NULL;
- }
- json_pos_ += token.length;
-
- --stack_depth_;
- return node.release();
+JSONReader::JsonParseError JSONReader::error_code() const {
+ return parser_->error_code();
}
-JSONReader::Token JSONReader::ParseNumberToken() {
- // We just grab the number here. We validate the size in DecodeNumber.
- // According to RFC4627, a valid number is: [minus] int [frac] [exp]
- Token token(Token::NUMBER, json_pos_, 0);
- char c = *json_pos_;
- if ('-' == c) {
- ++token.length;
- c = token.NextChar();
- }
-
- if (!ReadInt(token, false))
- return Token::CreateInvalidToken();
-
- // Optional fraction part
- c = token.NextChar();
- if ('.' == c) {
- ++token.length;
- if (!ReadInt(token, true))
- return Token::CreateInvalidToken();
- c = token.NextChar();
- }
-
- // Optional exponent part
- if ('e' == c || 'E' == c) {
- ++token.length;
- c = token.NextChar();
- if ('-' == c || '+' == c) {
- ++token.length;
- c = token.NextChar();
- }
- if (!ReadInt(token, true))
- return Token::CreateInvalidToken();
- }
-
- return token;
-}
-
-Value* JSONReader::DecodeNumber(const Token& token) {
- const std::string num_string(token.begin, token.length);
-
- int num_int;
- if (StringToInt(num_string, &num_int))
- return Value::CreateIntegerValue(num_int);
-
- double num_double;
- if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double))
- return Value::CreateDoubleValue(num_double);
-
- return NULL;
-}
-
-JSONReader::Token JSONReader::ParseStringToken() {
- Token token(Token::STRING, json_pos_, 1);
- char c = token.NextChar();
- while (json_pos_ + token.length < end_pos_) {
- if ('\\' == c) {
- ++token.length;
- c = token.NextChar();
- // Make sure the escaped char is valid.
- switch (c) {
- case 'x':
- if (!ReadHexDigits(token, 2)) {
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- break;
- case 'u':
- if (!ReadHexDigits(token, 4)) {
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- break;
- case '\\':
- case '/':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case '"':
- break;
- default:
- SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
- return Token::CreateInvalidToken();
- }
- } else if ('"' == c) {
- ++token.length;
- return token;
- }
- ++token.length;
- c = token.NextChar();
- }
- return Token::CreateInvalidToken();
-}
-
-Value* JSONReader::DecodeString(const Token& token) {
- std::string decoded_str;
- decoded_str.reserve(token.length - 2);
-
- for (int i = 1; i < token.length - 1; ++i) {
- char c = *(token.begin + i);
- if ('\\' == c) {
- ++i;
- c = *(token.begin + i);
- switch (c) {
- case '"':
- case '/':
- case '\\':
- decoded_str.push_back(c);
- break;
- case 'b':
- decoded_str.push_back('\b');
- break;
- case 'f':
- decoded_str.push_back('\f');
- break;
- case 'n':
- decoded_str.push_back('\n');
- break;
- case 'r':
- decoded_str.push_back('\r');
- break;
- case 't':
- decoded_str.push_back('\t');
- break;
- case 'v':
- decoded_str.push_back('\v');
- break;
-
- case 'x': {
- if (i + 2 >= token.length)
- return NULL;
- int hex_digit = 0;
- if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit))
- return NULL;
- decoded_str.push_back(hex_digit);
- i += 2;
- break;
- }
- case 'u':
- if (!ConvertUTF16Units(token, &i, &decoded_str))
- return NULL;
- break;
-
- default:
- // We should only have valid strings at this point. If not,
- // ParseStringToken didn't do its job.
- NOTREACHED();
- return NULL;
- }
- } else {
- // Not escaped
- decoded_str.push_back(c);
- }
- }
- return Value::CreateStringValue(decoded_str);
-}
-
-bool JSONReader::ConvertUTF16Units(const Token& token,
- int* i,
- std::string* dest_string) {
- if (*i + 4 >= token.length)
- return false;
-
- // This is a 32-bit field because the shift operations in the
- // conversion process below cause MSVC to error about "data loss."
- // This only stores UTF-16 code units, though.
- // Consume the UTF-16 code unit, which may be a high surrogate.
- int code_unit16_high = 0;
- if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high))
- return false;
- *i += 4;
-
- // If this is a high surrogate, consume the next code unit to get the
- // low surrogate.
- int code_unit16_low = 0;
- if (CBU16_IS_SURROGATE(code_unit16_high)) {
- // Make sure this is the high surrogate. If not, it's an encoding
- // error.
- if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
- return false;
-
- // Make sure that the token has more characters to consume the
- // lower surrogate.
- if (*i + 6 >= token.length)
- return false;
- if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u')
- return false;
-
- if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low))
- return false;
- *i += 4;
- if (!CBU16_IS_SURROGATE(code_unit16_low) ||
- !CBU16_IS_TRAIL(code_unit16_low)) {
- return false;
- }
- } else if (!CBU16_IS_SINGLE(code_unit16_high)) {
- // If this is not a code point, it's an encoding error.
- return false;
- }
-
- // Convert the UTF-16 code units to a code point and then to a UTF-8
- // code unit sequence.
- char code_point[8] = { 0 };
- size_t offset = 0;
- if (!code_unit16_low) {
- CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
- } else {
- uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
- code_unit16_low);
- offset = 0;
- CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
- }
- dest_string->append(code_point);
- return true;
-}
-
-JSONReader::Token JSONReader::ParseToken() {
- EatWhitespaceAndComments();
-
- Token token(Token::INVALID_TOKEN, 0, 0);
- switch (*json_pos_) {
- case '\0':
- token.type = Token::END_OF_INPUT;
- break;
-
- case 'n':
- if (NextStringMatch(kNullString, arraysize(kNullString) - 1))
- token = Token(Token::NULL_TOKEN, json_pos_, 4);
- break;
-
- case 't':
- if (NextStringMatch(kTrueString, arraysize(kTrueString) - 1))
- token = Token(Token::BOOL_TRUE, json_pos_, 4);
- break;
-
- case 'f':
- if (NextStringMatch(kFalseString, arraysize(kFalseString) - 1))
- token = Token(Token::BOOL_FALSE, json_pos_, 5);
- break;
-
- case '[':
- token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
- break;
-
- case ']':
- token = Token(Token::ARRAY_END, json_pos_, 1);
- break;
-
- case ',':
- token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
- break;
-
- case '{':
- token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
- break;
-
- case '}':
- token = Token(Token::OBJECT_END, json_pos_, 1);
- break;
-
- case ':':
- token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case '-':
- token = ParseNumberToken();
- break;
-
- case '"':
- token = ParseStringToken();
- break;
- }
- return token;
-}
-
-void JSONReader::EatWhitespaceAndComments() {
- while (json_pos_ != end_pos_) {
- switch (*json_pos_) {
- case ' ':
- case '\n':
- case '\r':
- case '\t':
- ++json_pos_;
- break;
- case '/':
- // TODO(tc): This isn't in the RFC so it should be a parser flag.
- if (!EatComment())
- return;
- break;
- default:
- // Not a whitespace char, just exit.
- return;
- }
- }
-}
-
-bool JSONReader::EatComment() {
- if ('/' != *json_pos_)
- return false;
-
- char next_char = *(json_pos_ + 1);
- if ('/' == next_char) {
- // Line comment, read until \n or \r
- json_pos_ += 2;
- while (json_pos_ != end_pos_) {
- switch (*json_pos_) {
- case '\n':
- case '\r':
- ++json_pos_;
- return true;
- default:
- ++json_pos_;
- }
- }
- } else if ('*' == next_char) {
- // Block comment, read until */
- json_pos_ += 2;
- while (json_pos_ != end_pos_) {
- if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
- json_pos_ += 2;
- return true;
- }
- ++json_pos_;
- }
- } else {
- return false;
- }
- return true;
-}
-
-bool JSONReader::NextStringMatch(const char* str, size_t length) {
- return strncmp(json_pos_, str, length) == 0;
-}
-
-void JSONReader::SetErrorCode(JsonParseError error,
- const char* error_pos) {
- int line_number = 1;
- int column_number = 1;
-
- // Figure out the line and column the error occured at.
- for (const char* pos = start_pos_; pos != error_pos; ++pos) {
- if (pos > end_pos_) {
- NOTREACHED();
- return;
- }
-
- if (*pos == '\n') {
- ++line_number;
- column_number = 1;
- } else {
- ++column_number;
- }
- }
-
- error_line_ = line_number;
- error_col_ = column_number;
- error_code_ = error;
+std::string JSONReader::GetErrorMessage() const {
+ return parser_->GetErrorMessage();
}
} // namespace base
diff --git a/base/json/json_reader.h b/base/json/json_reader.h
index 35ee7d3..e081175 100644
--- a/base/json/json_reader.h
+++ b/base/json/json_reader.h
@@ -33,23 +33,18 @@
#include "base/base_export.h"
#include "base/basictypes.h"
+#include "base/memory/scoped_ptr.h"
-// Chromium and Chromium OS check out gtest to different places, so we're
-// unable to compile on both if we include gtest_prod.h here. Instead, include
-// its only contents -- this will need to be updated if the macro ever changes.
-#define FRIEND_TEST(test_case_name, test_name)\
-friend class test_case_name##_##test_name##_Test
+namespace base {
+class Value;
-#define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
- FRIEND_TEST(test_case_name, test_name); \
- FRIEND_TEST(test_case_name, DISABLED_##test_name); \
- FRIEND_TEST(test_case_name, FLAKY_##test_name); \
- FRIEND_TEST(test_case_name, FAILS_##test_name)
+namespace internal {
+class JSONParser;
+}
+}
namespace base {
-class Value;
-
enum JSONParserOptions {
// Parses the input strictly according to RFC 4627, except for where noted
// above.
@@ -57,56 +52,22 @@ enum JSONParserOptions {
// Allows commas to exist after the last element in structures.
JSON_ALLOW_TRAILING_COMMAS = 1 << 0,
+
+ // The parser can perform optimizations by placing hidden data in the root of
+ // the JSON object, which speeds up certain operations on children. However,
+ // if the child is Remove()d from root, it would result in use-after-free
+ // unless it is DeepCopy()ed or this option is used.
+ JSON_DETACHABLE_CHILDREN = 1 << 1,
};
class BASE_EXPORT JSONReader {
public:
- // A struct to hold a JS token.
- class Token {
- public:
- enum Type {
- OBJECT_BEGIN, // {
- OBJECT_END, // }
- ARRAY_BEGIN, // [
- ARRAY_END, // ]
- STRING,
- NUMBER,
- BOOL_TRUE, // true
- BOOL_FALSE, // false
- NULL_TOKEN, // null
- LIST_SEPARATOR, // ,
- OBJECT_PAIR_SEPARATOR, // :
- END_OF_INPUT,
- INVALID_TOKEN,
- };
-
- Token(Type t, const char* b, int len)
- : type(t), begin(b), length(len) {}
-
- // Get the character that's one past the end of this token.
- char NextChar() {
- return *(begin + length);
- }
-
- static Token CreateInvalidToken() {
- return Token(INVALID_TOKEN, 0, 0);
- }
-
- Type type;
-
- // A pointer into JSONReader::json_pos_ that's the beginning of this token.
- const char* begin;
-
- // End should be one char past the end of the token.
- int length;
- };
-
// Error codes during parsing.
enum JsonParseError {
JSON_NO_ERROR = 0,
- JSON_BAD_ROOT_ELEMENT_TYPE,
JSON_INVALID_ESCAPE,
JSON_SYNTAX_ERROR,
+ JSON_UNEXPECTED_TOKEN,
JSON_TRAILING_COMMA,
JSON_TOO_MUCH_NESTING,
JSON_UNEXPECTED_DATA_AFTER_ROOT,
@@ -115,17 +76,23 @@ class BASE_EXPORT JSONReader {
};
// String versions of parse error codes.
- static const char* kBadRootElementType;
static const char* kInvalidEscape;
static const char* kSyntaxError;
+ static const char* kUnexpectedToken;
static const char* kTrailingComma;
static const char* kTooMuchNesting;
static const char* kUnexpectedDataAfterRoot;
static const char* kUnsupportedEncoding;
static const char* kUnquotedDictionaryKey;
+ // Constructs a reader with the default options, JSON_PARSE_RFC.
JSONReader();
+ // Constructs a reader with custom options.
+ explicit JSONReader(int options);
+
+ ~JSONReader();
+
// Reads and parses |json|, returning a Value. The caller owns the returned
// instance. If |json| is not a properly formed JSON string, returns NULL.
static Value* Read(const std::string& json);
@@ -148,106 +115,19 @@ class BASE_EXPORT JSONReader {
// Returns an empty string if error_code is JSON_NO_ERROR.
static std::string ErrorCodeToString(JsonParseError error_code);
- // Returns the error code if the last call to JsonToValue() failed.
+ // Parses an input string into a Value that is owned by the caller.
+ Value* ReadToValue(const std::string& json);
+
+ // Returns the error code if the last call to ReadToValue() failed.
// Returns JSON_NO_ERROR otherwise.
- JsonParseError error_code() const { return error_code_; }
+ JsonParseError error_code() const;
// Converts error_code_ to a human-readable string, including line and column
// numbers if appropriate.
std::string GetErrorMessage() const;
- // Reads and parses |json|, returning a Value. The caller owns the returned
- // instance. If |json| is not a properly formed JSON string, returns NULL and
- // a detailed error can be retrieved from |error_message()|.
- // If |check_root| is true, we require that the root object be an object or
- // array. Otherwise, it can be any valid JSON type.
- // If |allow_trailing_comma| is true, we will ignore trailing commas in
- // objects and arrays even though this goes against the RFC.
- Value* JsonToValue(const std::string& json, bool check_root,
- bool allow_trailing_comma);
-
private:
- FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading);
- FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages);
-
- static std::string FormatErrorMessage(int line, int column,
- const std::string& description);
-
- // Recursively build Value. Returns NULL if we don't have a valid JSON
- // string. If |is_root| is true, we verify that the root element is either
- // an object or an array.
- Value* BuildValue(bool is_root);
-
- // Parses a sequence of characters into a Token::NUMBER. If the sequence of
- // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
- // that DecodeNumber is used to actually convert from a string to an
- // int/double.
- Token ParseNumberToken();
-
- // Try and convert the substring that token holds into an int or a double. If
- // we can (ie., no overflow), return the value, else return NULL.
- Value* DecodeNumber(const Token& token);
-
- // Parses a sequence of characters into a Token::STRING. If the sequence of
- // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
- // that DecodeString is used to actually decode the escaped string into an
- // actual wstring.
- Token ParseStringToken();
-
- // Convert the substring into a value string. This should always succeed
- // (otherwise ParseStringToken would have failed).
- Value* DecodeString(const Token& token);
-
- // Helper function for DecodeString that consumes UTF16 [0,2] code units and
- // convers them to UTF8 code untis. |token| is the string token in which the
- // units should be read, |i| is the position in the token at which the first
- // code unit starts, immediately after the |\u|. This will be mutated if code
- // units are consumed. |dest_string| is a string to which the UTF8 code unit
- // should be appended. Returns true on success and false if there's an
- // encoding error.
- bool ConvertUTF16Units(const Token& token,
- int* i,
- std::string* dest_string);
-
- // Grabs the next token in the JSON stream. This does not increment the
- // stream so it can be used to look ahead at the next token.
- Token ParseToken();
-
- // Increments |json_pos_| past leading whitespace and comments.
- void EatWhitespaceAndComments();
-
- // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
- // false.
- bool EatComment();
-
- // Checks if |json_pos_| matches str.
- bool NextStringMatch(const char* str, size_t length);
-
- // Sets the error code that will be returned to the caller. The current
- // line and column are determined and added into the final message.
- void SetErrorCode(const JsonParseError error, const char* error_pos);
-
- // Pointer to the starting position in the input string.
- const char* start_pos_;
-
- // Pointer to the current position in the input string.
- const char* json_pos_;
-
- // Pointer to the last position in the input string.
- const char* end_pos_;
-
- // Used to keep track of how many nested lists/dicts there are.
- int stack_depth_;
-
- // A parser flag that allows trailing commas in objects and arrays.
- bool allow_trailing_comma_;
-
- // Contains the error code for the last call to JsonToValue(), if any.
- JsonParseError error_code_;
- int error_line_;
- int error_col_;
-
- DISALLOW_COPY_AND_ASSIGN(JSONReader);
+ scoped_ptr<internal::JSONParser> parser_;
};
} // namespace base
diff --git a/base/json/json_reader_unittest.cc b/base/json/json_reader_unittest.cc
index 4d6b0c4..38bf590 100644
--- a/base/json/json_reader_unittest.cc
+++ b/base/json/json_reader_unittest.cc
@@ -6,6 +6,7 @@
#include "base/base_paths.h"
#include "base/file_util.h"
+#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/path_service.h"
#include "base/string_piece.h"
@@ -19,56 +20,67 @@ namespace base {
TEST(JSONReaderTest, Reading) {
// some whitespace checking
scoped_ptr<Value> root;
- root.reset(JSONReader().JsonToValue(" null ", false, false));
+ root.reset(JSONReader().ReadToValue(" null "));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_NULL));
// Invalid JSON string
- root.reset(JSONReader().JsonToValue("nu", false, false));
+ root.reset(JSONReader().ReadToValue("nu"));
EXPECT_FALSE(root.get());
// Simple bool
- root.reset(JSONReader().JsonToValue("true ", false, false));
+ root.reset(JSONReader().ReadToValue("true "));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_BOOLEAN));
// Embedded comment
- root.reset(JSONReader().JsonToValue("/* comment */null", false, false));
+ root.reset(JSONReader().ReadToValue("/* comment */null"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_NULL));
- root.reset(JSONReader().JsonToValue("40 /* comment */", false, false));
+ root.reset(JSONReader().ReadToValue("40 /* comment */"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER));
- root.reset(JSONReader().JsonToValue("true // comment", false, false));
+ root.reset(JSONReader().ReadToValue("true // comment"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_BOOLEAN));
- root.reset(JSONReader().JsonToValue("/* comment */\"sample string\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("/* comment */\"sample string\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
std::string value;
EXPECT_TRUE(root->GetAsString(&value));
EXPECT_EQ("sample string", value);
+ root.reset(JSONReader().ReadToValue("[1, /* comment, 2 ] */ \n 3]"));
+ ASSERT_TRUE(root.get());
+ ListValue* list = static_cast<ListValue*>(root.get());
+ EXPECT_EQ(2u, list->GetSize());
+ int int_val = 0;
+ EXPECT_TRUE(list->GetInteger(0, &int_val));
+ EXPECT_EQ(1, int_val);
+ EXPECT_TRUE(list->GetInteger(1, &int_val));
+ EXPECT_EQ(3, int_val);
+ root.reset(JSONReader().ReadToValue("[1, /*a*/2, 3]"));
+ ASSERT_TRUE(root.get());
+ list = static_cast<ListValue*>(root.get());
+ EXPECT_EQ(3u, list->GetSize());
// Test number formats
- root.reset(JSONReader().JsonToValue("43", false, false));
+ root.reset(JSONReader().ReadToValue("43"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER));
- int int_val = 0;
EXPECT_TRUE(root->GetAsInteger(&int_val));
EXPECT_EQ(43, int_val);
// According to RFC4627, oct, hex, and leading zeros are invalid JSON.
- root.reset(JSONReader().JsonToValue("043", false, false));
+ root.reset(JSONReader().ReadToValue("043"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("0x43", false, false));
+ root.reset(JSONReader().ReadToValue("0x43"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("00", false, false));
+ root.reset(JSONReader().ReadToValue("00"));
EXPECT_FALSE(root.get());
// Test 0 (which needs to be special cased because of the leading zero
// clause).
- root.reset(JSONReader().JsonToValue("0", false, false));
+ root.reset(JSONReader().ReadToValue("0"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_INTEGER));
int_val = 1;
@@ -77,14 +89,14 @@ TEST(JSONReaderTest, Reading) {
// Numbers that overflow ints should succeed, being internally promoted to
// storage as doubles
- root.reset(JSONReader().JsonToValue("2147483648", false, false));
+ root.reset(JSONReader().ReadToValue("2147483648"));
ASSERT_TRUE(root.get());
double double_val;
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(2147483648.0, double_val);
- root.reset(JSONReader().JsonToValue("-2147483649", false, false));
+ root.reset(JSONReader().ReadToValue("-2147483649"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
@@ -92,42 +104,42 @@ TEST(JSONReaderTest, Reading) {
EXPECT_DOUBLE_EQ(-2147483649.0, double_val);
// Parse a double
- root.reset(JSONReader().JsonToValue("43.1", false, false));
+ root.reset(JSONReader().ReadToValue("43.1"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(43.1, double_val);
- root.reset(JSONReader().JsonToValue("4.3e-1", false, false));
+ root.reset(JSONReader().ReadToValue("4.3e-1"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(.43, double_val);
- root.reset(JSONReader().JsonToValue("2.1e0", false, false));
+ root.reset(JSONReader().ReadToValue("2.1e0"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(2.1, double_val);
- root.reset(JSONReader().JsonToValue("2.1e+0001", false, false));
+ root.reset(JSONReader().ReadToValue("2.1e+0001"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(21.0, double_val);
- root.reset(JSONReader().JsonToValue("0.01", false, false));
+ root.reset(JSONReader().ReadToValue("0.01"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(0.01, double_val);
- root.reset(JSONReader().JsonToValue("1.00", false, false));
+ root.reset(JSONReader().ReadToValue("1.00"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_DOUBLE));
double_val = 0.0;
@@ -135,43 +147,43 @@ TEST(JSONReaderTest, Reading) {
EXPECT_DOUBLE_EQ(1.0, double_val);
// Fractional parts must have a digit before and after the decimal point.
- root.reset(JSONReader().JsonToValue("1.", false, false));
+ root.reset(JSONReader().ReadToValue("1."));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue(".1", false, false));
+ root.reset(JSONReader().ReadToValue(".1"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("1.e10", false, false));
+ root.reset(JSONReader().ReadToValue("1.e10"));
EXPECT_FALSE(root.get());
// Exponent must have a digit following the 'e'.
- root.reset(JSONReader().JsonToValue("1e", false, false));
+ root.reset(JSONReader().ReadToValue("1e"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("1E", false, false));
+ root.reset(JSONReader().ReadToValue("1E"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("1e1.", false, false));
+ root.reset(JSONReader().ReadToValue("1e1."));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("1e1.0", false, false));
+ root.reset(JSONReader().ReadToValue("1e1.0"));
EXPECT_FALSE(root.get());
// INF/-INF/NaN are not valid
- root.reset(JSONReader().JsonToValue("1e1000", false, false));
+ root.reset(JSONReader().ReadToValue("1e1000"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("-1e1000", false, false));
+ root.reset(JSONReader().ReadToValue("-1e1000"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("NaN", false, false));
+ root.reset(JSONReader().ReadToValue("NaN"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("nan", false, false));
+ root.reset(JSONReader().ReadToValue("nan"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("inf", false, false));
+ root.reset(JSONReader().ReadToValue("inf"));
EXPECT_FALSE(root.get());
// Invalid number formats
- root.reset(JSONReader().JsonToValue("4.3.1", false, false));
+ root.reset(JSONReader().ReadToValue("4.3.1"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("4e3.1", false, false));
+ root.reset(JSONReader().ReadToValue("4e3.1"));
EXPECT_FALSE(root.get());
// Test string parser
- root.reset(JSONReader().JsonToValue("\"hello world\"", false, false));
+ root.reset(JSONReader().ReadToValue("\"hello world\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
std::string str_val;
@@ -179,7 +191,7 @@ TEST(JSONReaderTest, Reading) {
EXPECT_EQ("hello world", str_val);
// Empty string
- root.reset(JSONReader().JsonToValue("\"\"", false, false));
+ root.reset(JSONReader().ReadToValue("\"\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
@@ -187,8 +199,7 @@ TEST(JSONReaderTest, Reading) {
EXPECT_EQ("", str_val);
// Test basic string escapes
- root.reset(JSONReader().JsonToValue("\" \\\"\\\\\\/\\b\\f\\n\\r\\t\\v\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\" \\\"\\\\\\/\\b\\f\\n\\r\\t\\v\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
@@ -196,8 +207,7 @@ TEST(JSONReaderTest, Reading) {
EXPECT_EQ(" \"\\/\b\f\n\r\t\v", str_val);
// Test hex and unicode escapes including the null character.
- root.reset(JSONReader().JsonToValue("\"\\x41\\x00\\u1234\"", false,
- false));
+ root.reset(JSONReader().ReadToValue("\"\\x41\\x00\\u1234\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
@@ -205,29 +215,24 @@ TEST(JSONReaderTest, Reading) {
EXPECT_EQ(std::wstring(L"A\0\x1234", 3), UTF8ToWide(str_val));
// Test invalid strings
- root.reset(JSONReader().JsonToValue("\"no closing quote", false, false));
+ root.reset(JSONReader().ReadToValue("\"no closing quote"));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("\"\\z invalid escape char\"", false,
- false));
+ root.reset(JSONReader().ReadToValue("\"\\z invalid escape char\""));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("\"\\xAQ invalid hex code\"", false,
- false));
+ root.reset(JSONReader().ReadToValue("\"\\xAQ invalid hex code\""));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("not enough hex chars\\x1\"", false,
- false));
+ root.reset(JSONReader().ReadToValue("not enough hex chars\\x1\""));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("\"not enough escape chars\\u123\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"not enough escape chars\\u123\""));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("\"extra backslash at end of input\\\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"extra backslash at end of input\\\""));
EXPECT_FALSE(root.get());
// Basic array
root.reset(JSONReader::Read("[true, false, null]"));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_LIST));
- ListValue* list = static_cast<ListValue*>(root.get());
+ list = static_cast<ListValue*>(root.get());
EXPECT_EQ(3U, list->GetSize());
// Test with trailing comma. Should be parsed the same as above.
@@ -448,32 +453,38 @@ TEST(JSONReaderTest, Reading) {
EXPECT_EQ(5001U, list->GetSize());
// Test utf8 encoded input
- root.reset(JSONReader().JsonToValue("\"\xe7\xbd\x91\xe9\xa1\xb5\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"\xe7\xbd\x91\xe9\xa1\xb5\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
EXPECT_TRUE(root->GetAsString(&str_val));
EXPECT_EQ(L"\x7f51\x9875", UTF8ToWide(str_val));
+ root.reset(JSONReader().ReadToValue(
+ "{\"path\": \"/tmp/\xc3\xa0\xc3\xa8\xc3\xb2.png\"}"));
+ ASSERT_TRUE(root.get());
+ EXPECT_TRUE(root->IsType(Value::TYPE_DICTIONARY));
+ EXPECT_TRUE(root->GetAsDictionary(&dict_val));
+ EXPECT_TRUE(dict_val->GetString("path", &str_val));
+ EXPECT_EQ("/tmp/\xC3\xA0\xC3\xA8\xC3\xB2.png", str_val);
+
// Test invalid utf8 encoded input
- root.reset(JSONReader().JsonToValue("\"345\xb0\xa1\xb0\xa2\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"345\xb0\xa1\xb0\xa2\""));
+ EXPECT_FALSE(root.get());
+ root.reset(JSONReader().ReadToValue("\"123\xc0\x81\""));
EXPECT_FALSE(root.get());
- root.reset(JSONReader().JsonToValue("\"123\xc0\x81\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"abc\xc0\xae\""));
EXPECT_FALSE(root.get());
// Test utf16 encoded strings.
- root.reset(JSONReader().JsonToValue("\"\\u20ac3,14\"", false, false));
+ root.reset(JSONReader().ReadToValue("\"\\u20ac3,14\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
EXPECT_TRUE(root->GetAsString(&str_val));
EXPECT_EQ("\xe2\x82\xac""3,14", str_val);
- root.reset(JSONReader().JsonToValue("\"\\ud83d\\udca9\\ud83d\\udc6c\"",
- false, false));
+ root.reset(JSONReader().ReadToValue("\"\\ud83d\\udca9\\ud83d\\udc6c\""));
ASSERT_TRUE(root.get());
EXPECT_TRUE(root->IsType(Value::TYPE_STRING));
str_val.clear();
@@ -492,9 +503,28 @@ TEST(JSONReaderTest, Reading) {
"\"\\ud83\\foo\"" // No lower surrogate.
};
for (size_t i = 0; i < arraysize(cases); ++i) {
- root.reset(JSONReader().JsonToValue(cases[i], false, false));
+ root.reset(JSONReader().ReadToValue(cases[i]));
EXPECT_FALSE(root.get()) << cases[i];
}
+
+ // Test literal root objects.
+ root.reset(JSONReader::Read("null"));
+ EXPECT_TRUE(root->IsType(Value::TYPE_NULL));
+
+ root.reset(JSONReader::Read("true"));
+ ASSERT_TRUE(root.get());
+ EXPECT_TRUE(root->GetAsBoolean(&bool_value));
+ EXPECT_TRUE(bool_value);
+
+ root.reset(JSONReader::Read("10"));
+ ASSERT_TRUE(root.get());
+ EXPECT_TRUE(root->GetAsInteger(&integer_value));
+ EXPECT_EQ(10, integer_value);
+
+ root.reset(JSONReader::Read("\"root\""));
+ ASSERT_TRUE(root.get());
+ EXPECT_TRUE(root->GetAsString(&str_val));
+ EXPECT_EQ("root", str_val);
}
TEST(JSONReaderTest, ReadFromFile) {
@@ -509,102 +539,110 @@ TEST(JSONReaderTest, ReadFromFile) {
path.Append(FILE_PATH_LITERAL("bom_feff.json")), &input));
JSONReader reader;
- std::string error_msg;
- scoped_ptr<Value> root(
- JSONReader::ReadAndReturnError(input, JSON_PARSE_RFC, NULL, &error_msg));
+ scoped_ptr<Value> root(reader.ReadToValue(input));
ASSERT_TRUE(root.get()) << reader.GetErrorMessage();
EXPECT_TRUE(root->IsType(Value::TYPE_DICTIONARY));
}
-TEST(JSONReaderTest, ErrorMessages) {
- // Error strings should not be modified in case of success.
- std::string error_message;
- int error_code = 0;
- scoped_ptr<Value> root;
- root.reset(JSONReader::ReadAndReturnError("[42]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_TRUE(error_message.empty());
- EXPECT_EQ(0, error_code);
-
- // Test line and column counting
- const char* big_json = "[\n0,\n1,\n2,\n3,4,5,6 7,\n8,\n9\n]";
- // error here --------------------------------^
- root.reset(JSONReader::ReadAndReturnError(big_json, JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(5, 9, JSONReader::kSyntaxError),
- error_message);
- EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code);
-
- // Test each of the error conditions
- root.reset(JSONReader::ReadAndReturnError("{},{}", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 3,
- JSONReader::kUnexpectedDataAfterRoot), error_message);
- EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, error_code);
-
- std::string nested_json;
- for (int i = 0; i < 101; ++i) {
- nested_json.insert(nested_json.begin(), '[');
- nested_json.append(1, ']');
+// Tests that the root of a JSON object can be deleted safely while its
+// children outlive it.
+TEST(JSONReaderTest, StringOptimizations) {
+ Value* dict_literals[2] = {0};
+ Value* dict_strings[2] = {0};
+ Value* list_values[2] = {0};
+
+ {
+ scoped_ptr<Value> root(JSONReader::Read(
+ "{"
+ " \"test\": {"
+ " \"foo\": true,"
+ " \"bar\": 3.14,"
+ " \"baz\": \"bat\","
+ " \"moo\": \"cow\""
+ " },"
+ " \"list\": ["
+ " \"a\","
+ " \"b\""
+ " ]"
+ "}", JSON_DETACHABLE_CHILDREN));
+ ASSERT_TRUE(root.get());
+
+ DictionaryValue* root_dict = NULL;
+ ASSERT_TRUE(root->GetAsDictionary(&root_dict));
+
+ DictionaryValue* dict = NULL;
+ ListValue* list = NULL;
+
+ ASSERT_TRUE(root_dict->GetDictionary("test", &dict));
+ ASSERT_TRUE(root_dict->GetList("list", &list));
+
+ EXPECT_TRUE(dict->Remove("foo", &dict_literals[0]));
+ EXPECT_TRUE(dict->Remove("bar", &dict_literals[1]));
+ EXPECT_TRUE(dict->Remove("baz", &dict_strings[0]));
+ EXPECT_TRUE(dict->Remove("moo", &dict_strings[1]));
+
+ ASSERT_EQ(2u, list->GetSize());
+ EXPECT_TRUE(list->Remove(0, &list_values[0]));
+ EXPECT_TRUE(list->Remove(0, &list_values[1]));
}
- root.reset(JSONReader::ReadAndReturnError(nested_json, JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 101, JSONReader::kTooMuchNesting),
- error_message);
- EXPECT_EQ(JSONReader::JSON_TOO_MUCH_NESTING, error_code);
- root.reset(JSONReader::ReadAndReturnError("[1,]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 4, JSONReader::kTrailingComma),
- error_message);
- EXPECT_EQ(JSONReader::JSON_TRAILING_COMMA, error_code);
+ bool b = false;
+ double d = 0;
+ std::string s;
- root.reset(JSONReader::ReadAndReturnError("{foo:\"bar\"}", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2,
- JSONReader::kUnquotedDictionaryKey), error_message);
- EXPECT_EQ(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, error_code);
+ EXPECT_TRUE(dict_literals[0]->GetAsBoolean(&b));
+ EXPECT_TRUE(b);
- root.reset(JSONReader::ReadAndReturnError("{\"foo\":\"bar\",}",
- JSON_PARSE_RFC,
- &error_code,
- &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 14, JSONReader::kTrailingComma),
- error_message);
+ EXPECT_TRUE(dict_literals[1]->GetAsDouble(&d));
+ EXPECT_EQ(3.14, d);
- root.reset(JSONReader::ReadAndReturnError("[nu]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 2, JSONReader::kSyntaxError),
- error_message);
- EXPECT_EQ(JSONReader::JSON_SYNTAX_ERROR, error_code);
+ EXPECT_TRUE(dict_strings[0]->GetAsString(&s));
+ EXPECT_EQ("bat", s);
- root.reset(JSONReader::ReadAndReturnError("[\"xxx\\xq\"]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
- error_message);
- EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+ EXPECT_TRUE(dict_strings[1]->GetAsString(&s));
+ EXPECT_EQ("cow", s);
- root.reset(JSONReader::ReadAndReturnError("[\"xxx\\uq\"]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
- error_message);
- EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+ EXPECT_TRUE(list_values[0]->GetAsString(&s));
+ EXPECT_EQ("a", s);
+ EXPECT_TRUE(list_values[1]->GetAsString(&s));
+ EXPECT_EQ("b", s);
- root.reset(JSONReader::ReadAndReturnError("[\"xxx\\q\"]", JSON_PARSE_RFC,
- &error_code, &error_message));
- EXPECT_FALSE(root.get());
- EXPECT_EQ(JSONReader::FormatErrorMessage(1, 7, JSONReader::kInvalidEscape),
- error_message);
- EXPECT_EQ(JSONReader::JSON_INVALID_ESCAPE, error_code);
+ delete dict_literals[0];
+ delete dict_literals[1];
+ delete dict_strings[0];
+ delete dict_strings[1];
+ delete list_values[0];
+ delete list_values[1];
+}
+
+// A smattering of invalid JSON designed to test specific portions of the
+// parser implementation against buffer overflow. Best run with DCHECKs so
+// that the one in NextChar fires.
+TEST(JSONReaderTest, InvalidSanity) {
+ const char* invalid_json[] = {
+ "/* test *",
+ "{\"foo\"",
+ "{\"foo\":",
+ " [",
+ "\"\\u123g\"",
+ "{\n\"eh:\n}",
+ };
+
+ for (size_t i = 0; i < arraysize(invalid_json); ++i) {
+ JSONReader reader;
+ LOG(INFO) << "Sanity test " << i << ": <" << invalid_json[i] << ">";
+ EXPECT_FALSE(reader.ReadToValue(invalid_json[i]));
+ EXPECT_NE(JSONReader::JSON_NO_ERROR, reader.error_code());
+ EXPECT_NE("", reader.GetErrorMessage());
+ }
+}
+
+TEST(JSONReaderTest, IllegalTrailingNull) {
+ const char json[] = { '"', 'n', 'u', 'l', 'l', '"', '\0' };
+ std::string json_string(json, sizeof(json));
+ JSONReader reader;
+ EXPECT_FALSE(reader.ReadToValue(json_string));
+ EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, reader.error_code());
}
} // namespace base
diff --git a/base/string_util.cc b/base/string_util.cc
index fd6fccc..2eecb79 100644
--- a/base/string_util.cc
+++ b/base/string_util.cc
@@ -472,7 +472,7 @@ bool IsStringUTF8(const std::string& str) {
int32 code_point;
CBU8_NEXT(src, char_index, src_len, code_point);
if (!base::IsValidCharacter(code_point))
- return false;
+ return false;
}
return true;
}
diff --git a/base/values.cc b/base/values.cc
index 8d7ca35..d561d68 100644
--- a/base/values.cc
+++ b/base/values.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -694,6 +694,10 @@ void DictionaryValue::MergeDictionary(const DictionaryValue* dictionary) {
}
}
+void DictionaryValue::Swap(DictionaryValue* other) {
+ dictionary_.swap(other->dictionary_);
+}
+
DictionaryValue* DictionaryValue::DeepCopy() const {
DictionaryValue* result = new DictionaryValue;
@@ -908,6 +912,10 @@ ListValue::const_iterator ListValue::Find(const Value& value) const {
return std::find_if(list_.begin(), list_.end(), ValueEquals(&value));
}
+void ListValue::Swap(ListValue* other) {
+ list_.swap(other->list_);
+}
+
bool ListValue::GetAsList(ListValue** out_value) {
if (out_value)
*out_value = this;
diff --git a/base/values.h b/base/values.h
index 4bcdc75..1d35d63 100644
--- a/base/values.h
+++ b/base/values.h
@@ -303,11 +303,12 @@ class BASE_EXPORT DictionaryValue : public Value {
// passed out via out_value. If |out_value| is NULL, the removed value will
// be deleted. This method returns true if |path| is a valid path; otherwise
// it will return false and the DictionaryValue object will be unchanged.
- bool Remove(const std::string& path, Value** out_value);
+ virtual bool Remove(const std::string& path, Value** out_value);
// Like Remove(), but without special treatment of '.'. This allows e.g. URLs
// to be used as paths.
- bool RemoveWithoutPathExpansion(const std::string& key, Value** out_value);
+ virtual bool RemoveWithoutPathExpansion(const std::string& key,
+ Value** out_value);
// Makes a copy of |this| but doesn't include empty dictionaries and lists in
// the copy. This never returns NULL, even if |this| itself is empty.
@@ -321,9 +322,7 @@ class BASE_EXPORT DictionaryValue : public Value {
void MergeDictionary(const DictionaryValue* dictionary);
// Swaps contents with the |other| dictionary.
- void Swap(DictionaryValue* other) {
- dictionary_.swap(other->dictionary_);
- }
+ virtual void Swap(DictionaryValue* other);
// This class provides an iterator for the keys in the dictionary.
// It can't be used to modify the dictionary.
@@ -425,7 +424,7 @@ class BASE_EXPORT ListValue : public Value {
// passed out via |out_value|. If |out_value| is NULL, the removed value will
// be deleted. This method returns true if |index| is valid; otherwise
// it will return false and the ListValue object will be unchanged.
- bool Remove(size_t index, Value** out_value);
+ virtual bool Remove(size_t index, Value** out_value);
// Removes the first instance of |value| found in the list, if any, and
// deletes it. |index| is the location where |value| was found. Returns false
@@ -450,9 +449,7 @@ class BASE_EXPORT ListValue : public Value {
const_iterator Find(const Value& value) const;
// Swaps contents with the |other| list.
- void Swap(ListValue* other) {
- list_.swap(other->list_);
- }
+ virtual void Swap(ListValue* other);
// Iteration.
iterator begin() { return list_.begin(); }