summaryrefslogtreecommitdiffstats
path: root/base/json/json_reader.cc
diff options
context:
space:
mode:
Diffstat (limited to 'base/json/json_reader.cc')
-rw-r--r--base/json/json_reader.cc642
1 files changed, 642 insertions, 0 deletions
diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc
new file mode 100644
index 0000000..06d790c
--- /dev/null
+++ b/base/json/json_reader.cc
@@ -0,0 +1,642 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/json/json_reader.h"
+
+#include "base/float_util.h"
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "base/values.h"
+
+namespace base {
+
+static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
+ 0, 0);
+static const int kStackLimit = 100;
+
+namespace {
+
+inline int HexToInt(wchar_t c) {
+ if ('0' <= c && c <= '9') {
+ return c - '0';
+ } else if ('A' <= c && c <= 'F') {
+ return c - 'A' + 10;
+ } else if ('a' <= c && c <= 'f') {
+ return c - 'a' + 10;
+ }
+ NOTREACHED();
+ return 0;
+}
+
+// A helper method for ParseNumberToken. It reads an int from the end of
+// token. The method returns false if there is no valid integer at the end of
+// the token.
+bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
+ wchar_t first = token.NextChar();
+ int len = 0;
+
+ // Read in more digits
+ wchar_t c = first;
+ while ('\0' != c && '0' <= c && c <= '9') {
+ ++token.length;
+ ++len;
+ c = token.NextChar();
+ }
+ // We need at least 1 digit.
+ if (len == 0)
+ return false;
+
+ if (!can_have_leading_zeros && len > 1 && '0' == first)
+ return false;
+
+ return true;
+}
+
+// A helper method for ParseStringToken. It reads |digits| hex digits from the
+// token. If the sequence if digits is not valid (contains other characters),
+// the method returns false.
+bool ReadHexDigits(JSONReader::Token& token, int digits) {
+ for (int i = 1; i <= digits; ++i) {
+ wchar_t c = *(token.begin + token.length + i);
+ if ('\0' == c)
+ return false;
+ if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F'))) {
+ return false;
+ }
+ }
+
+ token.length += digits;
+ return true;
+}
+
+} // anonymous namespace
+
+const char* JSONReader::kBadRootElementType =
+ "Root value must be an array or object.";
+const char* JSONReader::kInvalidEscape =
+ "Invalid escape sequence.";
+const char* JSONReader::kSyntaxError =
+ "Syntax error.";
+const char* JSONReader::kTrailingComma =
+ "Trailing comma not allowed.";
+const char* JSONReader::kTooMuchNesting =
+ "Too much nesting.";
+const char* JSONReader::kUnexpectedDataAfterRoot =
+ "Unexpected data after root element.";
+const char* JSONReader::kUnsupportedEncoding =
+ "Unsupported encoding. JSON must be UTF-8.";
+const char* JSONReader::kUnquotedDictionaryKey =
+ "Dictionary keys must be quoted.";
+
+/* static */
+Value* JSONReader::Read(const std::string& json,
+ bool allow_trailing_comma) {
+ return ReadAndReturnError(json, allow_trailing_comma, NULL);
+}
+
+/* static */
+Value* JSONReader::ReadAndReturnError(const std::string& json,
+ bool allow_trailing_comma,
+ std::string *error_message_out) {
+ JSONReader reader = JSONReader();
+ Value* root = reader.JsonToValue(json, true, allow_trailing_comma);
+ if (root)
+ return root;
+
+ if (error_message_out)
+ *error_message_out = reader.error_message();
+
+ return NULL;
+}
+
+/* static */
+std::string JSONReader::FormatErrorMessage(int line, int column,
+ const char* description) {
+ return StringPrintf("Line: %i, column: %i, %s",
+ line, column, description);
+}
+
+JSONReader::JSONReader()
+ : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
+ allow_trailing_comma_(false) {}
+
+Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
+ bool allow_trailing_comma) {
+ // The input must be in UTF-8.
+ if (!IsStringUTF8(json.c_str())) {
+ error_message_ = kUnsupportedEncoding;
+ return NULL;
+ }
+
+ // The conversion from UTF8 to wstring removes null bytes for us
+ // (a good thing).
+ std::wstring json_wide(UTF8ToWide(json));
+ start_pos_ = json_wide.c_str();
+
+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark
+ // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
+ // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
+ // mis-treating a Unicode BOM as an invalid character and returning NULL,
+ // skip a converted Unicode BOM if it exists.
+ if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {
+ ++start_pos_;
+ }
+
+ json_pos_ = start_pos_;
+ allow_trailing_comma_ = allow_trailing_comma;
+ stack_depth_ = 0;
+ error_message_.clear();
+
+ scoped_ptr<Value> root(BuildValue(check_root));
+ if (root.get()) {
+ if (ParseToken().type == Token::END_OF_INPUT) {
+ return root.release();
+ } else {
+ SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_);
+ }
+ }
+
+ // Default to calling errors "syntax errors".
+ if (error_message_.empty())
+ SetErrorMessage(kSyntaxError, json_pos_);
+
+ return NULL;
+}
+
+Value* JSONReader::BuildValue(bool is_root) {
+ ++stack_depth_;
+ if (stack_depth_ > kStackLimit) {
+ SetErrorMessage(kTooMuchNesting, json_pos_);
+ return NULL;
+ }
+
+ Token token = ParseToken();
+ // The root token must be an array or an object.
+ if (is_root && token.type != Token::OBJECT_BEGIN &&
+ token.type != Token::ARRAY_BEGIN) {
+ SetErrorMessage(kBadRootElementType, json_pos_);
+ return NULL;
+ }
+
+ scoped_ptr<Value> node;
+
+ switch (token.type) {
+ case Token::END_OF_INPUT:
+ case Token::INVALID_TOKEN:
+ return NULL;
+
+ case Token::NULL_TOKEN:
+ node.reset(Value::CreateNullValue());
+ break;
+
+ case Token::BOOL_TRUE:
+ node.reset(Value::CreateBooleanValue(true));
+ break;
+
+ case Token::BOOL_FALSE:
+ node.reset(Value::CreateBooleanValue(false));
+ break;
+
+ case Token::NUMBER:
+ node.reset(DecodeNumber(token));
+ if (!node.get())
+ return NULL;
+ break;
+
+ case Token::STRING:
+ node.reset(DecodeString(token));
+ if (!node.get())
+ return NULL;
+ break;
+
+ case Token::ARRAY_BEGIN:
+ {
+ json_pos_ += token.length;
+ token = ParseToken();
+
+ node.reset(new ListValue());
+ while (token.type != Token::ARRAY_END) {
+ Value* array_node = BuildValue(false);
+ if (!array_node)
+ return NULL;
+ static_cast<ListValue*>(node.get())->Append(array_node);
+
+ // After a list value, we expect a comma or the end of the list.
+ token = ParseToken();
+ if (token.type == Token::LIST_SEPARATOR) {
+ json_pos_ += token.length;
+ token = ParseToken();
+ // Trailing commas are invalid according to the JSON RFC, but some
+ // consumers need the parsing leniency, so handle accordingly.
+ if (token.type == Token::ARRAY_END) {
+ if (!allow_trailing_comma_) {
+ SetErrorMessage(kTrailingComma, json_pos_);
+ return NULL;
+ }
+ // Trailing comma OK, stop parsing the Array.
+ break;
+ }
+ } else if (token.type != Token::ARRAY_END) {
+ // Unexpected value after list value. Bail out.
+ return NULL;
+ }
+ }
+ if (token.type != Token::ARRAY_END) {
+ return NULL;
+ }
+ break;
+ }
+
+ case Token::OBJECT_BEGIN:
+ {
+ json_pos_ += token.length;
+ token = ParseToken();
+
+ node.reset(new DictionaryValue);
+ while (token.type != Token::OBJECT_END) {
+ if (token.type != Token::STRING) {
+ SetErrorMessage(kUnquotedDictionaryKey, json_pos_);
+ return NULL;
+ }
+ scoped_ptr<Value> dict_key_value(DecodeString(token));
+ if (!dict_key_value.get())
+ return NULL;
+
+ // Convert the key into a wstring.
+ std::wstring dict_key;
+ bool success = dict_key_value->GetAsString(&dict_key);
+ DCHECK(success);
+
+ json_pos_ += token.length;
+ token = ParseToken();
+ if (token.type != Token::OBJECT_PAIR_SEPARATOR)
+ return NULL;
+
+ json_pos_ += token.length;
+ token = ParseToken();
+ Value* dict_value = BuildValue(false);
+ if (!dict_value)
+ return NULL;
+ static_cast<DictionaryValue*>(node.get())->Set(dict_key, dict_value);
+
+ // After a key/value pair, we expect a comma or the end of the
+ // object.
+ token = ParseToken();
+ if (token.type == Token::LIST_SEPARATOR) {
+ json_pos_ += token.length;
+ token = ParseToken();
+ // Trailing commas are invalid according to the JSON RFC, but some
+ // consumers need the parsing leniency, so handle accordingly.
+ if (token.type == Token::OBJECT_END) {
+ if (!allow_trailing_comma_) {
+ SetErrorMessage(kTrailingComma, json_pos_);
+ return NULL;
+ }
+ // Trailing comma OK, stop parsing the Object.
+ break;
+ }
+ } else if (token.type != Token::OBJECT_END) {
+ // Unexpected value after last object value. Bail out.
+ return NULL;
+ }
+ }
+ if (token.type != Token::OBJECT_END)
+ return NULL;
+
+ break;
+ }
+
+ default:
+ // We got a token that's not a value.
+ return NULL;
+ }
+ json_pos_ += token.length;
+
+ --stack_depth_;
+ return node.release();
+}
+
+JSONReader::Token JSONReader::ParseNumberToken() {
+ // We just grab the number here. We validate the size in DecodeNumber.
+ // According to RFC4627, a valid number is: [minus] int [frac] [exp]
+ Token token(Token::NUMBER, json_pos_, 0);
+ wchar_t c = *json_pos_;
+ if ('-' == c) {
+ ++token.length;
+ c = token.NextChar();
+ }
+
+ if (!ReadInt(token, false))
+ return kInvalidToken;
+
+ // Optional fraction part
+ c = token.NextChar();
+ if ('.' == c) {
+ ++token.length;
+ if (!ReadInt(token, true))
+ return kInvalidToken;
+ c = token.NextChar();
+ }
+
+ // Optional exponent part
+ if ('e' == c || 'E' == c) {
+ ++token.length;
+ c = token.NextChar();
+ if ('-' == c || '+' == c) {
+ ++token.length;
+ c = token.NextChar();
+ }
+ if (!ReadInt(token, true))
+ return kInvalidToken;
+ }
+
+ return token;
+}
+
+Value* JSONReader::DecodeNumber(const Token& token) {
+ const std::wstring num_string(token.begin, token.length);
+
+ int num_int;
+ if (StringToInt(WideToUTF16Hack(num_string), &num_int))
+ return Value::CreateIntegerValue(num_int);
+
+ double num_double;
+ if (StringToDouble(WideToUTF16Hack(num_string), &num_double) &&
+ base::IsFinite(num_double))
+ return Value::CreateRealValue(num_double);
+
+ return NULL;
+}
+
+JSONReader::Token JSONReader::ParseStringToken() {
+ Token token(Token::STRING, json_pos_, 1);
+ wchar_t c = token.NextChar();
+ while ('\0' != c) {
+ if ('\\' == c) {
+ ++token.length;
+ c = token.NextChar();
+ // Make sure the escaped char is valid.
+ switch (c) {
+ case 'x':
+ if (!ReadHexDigits(token, 2)) {
+ SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
+ return kInvalidToken;
+ }
+ break;
+ case 'u':
+ if (!ReadHexDigits(token, 4)) {
+ SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
+ return kInvalidToken;
+ }
+ break;
+ case '\\':
+ case '/':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case '"':
+ break;
+ default:
+ SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
+ return kInvalidToken;
+ }
+ } else if ('"' == c) {
+ ++token.length;
+ return token;
+ }
+ ++token.length;
+ c = token.NextChar();
+ }
+ return kInvalidToken;
+}
+
+Value* JSONReader::DecodeString(const Token& token) {
+ std::wstring decoded_str;
+ decoded_str.reserve(token.length - 2);
+
+ for (int i = 1; i < token.length - 1; ++i) {
+ wchar_t c = *(token.begin + i);
+ if ('\\' == c) {
+ ++i;
+ c = *(token.begin + i);
+ switch (c) {
+ case '"':
+ case '/':
+ case '\\':
+ decoded_str.push_back(c);
+ break;
+ case 'b':
+ decoded_str.push_back('\b');
+ break;
+ case 'f':
+ decoded_str.push_back('\f');
+ break;
+ case 'n':
+ decoded_str.push_back('\n');
+ break;
+ case 'r':
+ decoded_str.push_back('\r');
+ break;
+ case 't':
+ decoded_str.push_back('\t');
+ break;
+ case 'v':
+ decoded_str.push_back('\v');
+ break;
+
+ case 'x':
+ decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +
+ HexToInt(*(token.begin + i + 2)));
+ i += 2;
+ break;
+ case 'u':
+ decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +
+ (HexToInt(*(token.begin + i + 2)) << 8) +
+ (HexToInt(*(token.begin + i + 3)) << 4) +
+ HexToInt(*(token.begin + i + 4)));
+ i += 4;
+ break;
+
+ default:
+ // We should only have valid strings at this point. If not,
+ // ParseStringToken didn't do it's job.
+ NOTREACHED();
+ return NULL;
+ }
+ } else {
+ // Not escaped
+ decoded_str.push_back(c);
+ }
+ }
+ return Value::CreateStringValue(decoded_str);
+}
+
+JSONReader::Token JSONReader::ParseToken() {
+ static const std::wstring kNullString(L"null");
+ static const std::wstring kTrueString(L"true");
+ static const std::wstring kFalseString(L"false");
+
+ EatWhitespaceAndComments();
+
+ Token token(Token::INVALID_TOKEN, 0, 0);
+ switch (*json_pos_) {
+ case '\0':
+ token.type = Token::END_OF_INPUT;
+ break;
+
+ case 'n':
+ if (NextStringMatch(kNullString))
+ token = Token(Token::NULL_TOKEN, json_pos_, 4);
+ break;
+
+ case 't':
+ if (NextStringMatch(kTrueString))
+ token = Token(Token::BOOL_TRUE, json_pos_, 4);
+ break;
+
+ case 'f':
+ if (NextStringMatch(kFalseString))
+ token = Token(Token::BOOL_FALSE, json_pos_, 5);
+ break;
+
+ case '[':
+ token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
+ break;
+
+ case ']':
+ token = Token(Token::ARRAY_END, json_pos_, 1);
+ break;
+
+ case ',':
+ token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
+ break;
+
+ case '{':
+ token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
+ break;
+
+ case '}':
+ token = Token(Token::OBJECT_END, json_pos_, 1);
+ break;
+
+ case ':':
+ token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '-':
+ token = ParseNumberToken();
+ break;
+
+ case '"':
+ token = ParseStringToken();
+ break;
+ }
+ return token;
+}
+
+bool JSONReader::NextStringMatch(const std::wstring& str) {
+ for (size_t i = 0; i < str.length(); ++i) {
+ if ('\0' == *json_pos_)
+ return false;
+ if (*(json_pos_ + i) != str[i])
+ return false;
+ }
+ return true;
+}
+
+void JSONReader::EatWhitespaceAndComments() {
+ while ('\0' != *json_pos_) {
+ switch (*json_pos_) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ ++json_pos_;
+ break;
+ case '/':
+ // TODO(tc): This isn't in the RFC so it should be a parser flag.
+ if (!EatComment())
+ return;
+ break;
+ default:
+ // Not a whitespace char, just exit.
+ return;
+ }
+ }
+}
+
+bool JSONReader::EatComment() {
+ if ('/' != *json_pos_)
+ return false;
+
+ wchar_t next_char = *(json_pos_ + 1);
+ if ('/' == next_char) {
+ // Line comment, read until \n or \r
+ json_pos_ += 2;
+ while ('\0' != *json_pos_) {
+ switch (*json_pos_) {
+ case '\n':
+ case '\r':
+ ++json_pos_;
+ return true;
+ default:
+ ++json_pos_;
+ }
+ }
+ } else if ('*' == next_char) {
+ // Block comment, read until */
+ json_pos_ += 2;
+ while ('\0' != *json_pos_) {
+ if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
+ json_pos_ += 2;
+ return true;
+ }
+ ++json_pos_;
+ }
+ } else {
+ return false;
+ }
+ return true;
+}
+
+void JSONReader::SetErrorMessage(const char* description,
+ const wchar_t* error_pos) {
+ int line_number = 1;
+ int column_number = 1;
+
+ // Figure out the line and column the error occured at.
+ for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {
+ if (*pos == '\0') {
+ NOTREACHED();
+ return;
+ }
+
+ if (*pos == '\n') {
+ ++line_number;
+ column_number = 1;
+ } else {
+ ++column_number;
+ }
+ }
+
+ error_message_ = FormatErrorMessage(line_number, column_number, description);
+}
+
+} // namespace base