// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // A JSON parser. Converts strings of JSON into a Value object (see // base/values.h). // http://www.ietf.org/rfc/rfc4627.txt?number=4627 // // Known limitations/deviations from the RFC: // - Only knows how to parse ints within the range of a signed 32 bit int and // decimal numbers within a double. // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 // (BE or LE) and UTF-32 (BE or LE) as well. // - We limit nesting to 100 levels to prevent stack overflow (this is allowed // by the RFC). // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input // UTF-8 string for the JSONReader::JsonToValue() function may start with a // UTF-8 BOM (0xEF, 0xBB, 0xBF). // To avoid the function from mis-treating a UTF-8 BOM as an invalid // character, the function skips a Unicode BOM at the beginning of the // Unicode string (converted from the input UTF-8 string) before parsing it. // // TODO(tc): Add a parsing option to to relax object keys being wrapped in // double quotes // TODO(tc): Add an option to disable comment stripping #ifndef BASE_JSON_JSON_READER_H_ #define BASE_JSON_JSON_READER_H_ #pragma once #include #include "base/base_export.h" #include "base/basictypes.h" // Chromium and Chromium OS check out gtest to different places, so we're // unable to compile on both if we include gtest_prod.h here. Instead, include // its only contents -- this will need to be updated if the macro ever changes. #define FRIEND_TEST(test_case_name, test_name)\ friend class test_case_name##_##test_name##_Test #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ FRIEND_TEST(test_case_name, test_name); \ FRIEND_TEST(test_case_name, DISABLED_##test_name); \ FRIEND_TEST(test_case_name, FLAKY_##test_name); \ FRIEND_TEST(test_case_name, FAILS_##test_name) namespace base { class Value; enum JSONParserOptions { // Parses the input strictly according to RFC 4627, except for where noted // above. JSON_PARSE_RFC = 0, // Allows commas to exist after the last element in structures. JSON_ALLOW_TRAILING_COMMAS = 1 << 0, }; class BASE_EXPORT JSONReader { public: // A struct to hold a JS token. class Token { public: enum Type { OBJECT_BEGIN, // { OBJECT_END, // } ARRAY_BEGIN, // [ ARRAY_END, // ] STRING, NUMBER, BOOL_TRUE, // true BOOL_FALSE, // false NULL_TOKEN, // null LIST_SEPARATOR, // , OBJECT_PAIR_SEPARATOR, // : END_OF_INPUT, INVALID_TOKEN, }; Token(Type t, const char* b, int len) : type(t), begin(b), length(len) {} // Get the character that's one past the end of this token. char NextChar() { return *(begin + length); } static Token CreateInvalidToken() { return Token(INVALID_TOKEN, 0, 0); } Type type; // A pointer into JSONReader::json_pos_ that's the beginning of this token. const char* begin; // End should be one char past the end of the token. int length; }; // Error codes during parsing. enum JsonParseError { JSON_NO_ERROR = 0, JSON_BAD_ROOT_ELEMENT_TYPE, JSON_INVALID_ESCAPE, JSON_SYNTAX_ERROR, JSON_TRAILING_COMMA, JSON_TOO_MUCH_NESTING, JSON_UNEXPECTED_DATA_AFTER_ROOT, JSON_UNSUPPORTED_ENCODING, JSON_UNQUOTED_DICTIONARY_KEY, }; // String versions of parse error codes. static const char* kBadRootElementType; static const char* kInvalidEscape; static const char* kSyntaxError; static const char* kTrailingComma; static const char* kTooMuchNesting; static const char* kUnexpectedDataAfterRoot; static const char* kUnsupportedEncoding; static const char* kUnquotedDictionaryKey; JSONReader(); // Reads and parses |json|, returning a Value. The caller owns the returned // instance. If |json| is not a properly formed JSON string, returns NULL. static Value* Read(const std::string& json); // Reads and parses |json|, returning a Value owned by the caller. The // parser respects the given |options|. If the input is not properly formed, // returns NULL. static Value* Read(const std::string& json, int options); // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| // are optional. If specified and NULL is returned, they will be populated // an error code and a formatted error message (including error location if // appropriate). Otherwise, they will be unmodified. static Value* ReadAndReturnError(const std::string& json, int options, // JSONParserOptions int* error_code_out, std::string* error_msg_out); // Converts a JSON parse error code into a human readable message. // Returns an empty string if error_code is JSON_NO_ERROR. static std::string ErrorCodeToString(JsonParseError error_code); // Returns the error code if the last call to JsonToValue() failed. // Returns JSON_NO_ERROR otherwise. JsonParseError error_code() const { return error_code_; } // Converts error_code_ to a human-readable string, including line and column // numbers if appropriate. std::string GetErrorMessage() const; // Reads and parses |json|, returning a Value. The caller owns the returned // instance. If |json| is not a properly formed JSON string, returns NULL and // a detailed error can be retrieved from |error_message()|. // If |check_root| is true, we require that the root object be an object or // array. Otherwise, it can be any valid JSON type. // If |allow_trailing_comma| is true, we will ignore trailing commas in // objects and arrays even though this goes against the RFC. Value* JsonToValue(const std::string& json, bool check_root, bool allow_trailing_comma); private: FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, Reading); FRIEND_TEST_ALL_PREFIXES(JSONReaderTest, ErrorMessages); static std::string FormatErrorMessage(int line, int column, const std::string& description); // Recursively build Value. Returns NULL if we don't have a valid JSON // string. If |is_root| is true, we verify that the root element is either // an object or an array. Value* BuildValue(bool is_root); // Parses a sequence of characters into a Token::NUMBER. If the sequence of // characters is not a valid number, returns a Token::INVALID_TOKEN. Note // that DecodeNumber is used to actually convert from a string to an // int/double. Token ParseNumberToken(); // Try and convert the substring that token holds into an int or a double. If // we can (ie., no overflow), return the value, else return NULL. Value* DecodeNumber(const Token& token); // Parses a sequence of characters into a Token::STRING. If the sequence of // characters is not a valid string, returns a Token::INVALID_TOKEN. Note // that DecodeString is used to actually decode the escaped string into an // actual wstring. Token ParseStringToken(); // Convert the substring into a value string. This should always succeed // (otherwise ParseStringToken would have failed). Value* DecodeString(const Token& token); // Helper function for DecodeString that consumes UTF16 [0,2] code units and // convers them to UTF8 code untis. |token| is the string token in which the // units should be read, |i| is the position in the token at which the first // code unit starts, immediately after the |\u|. This will be mutated if code // units are consumed. |dest_string| is a string to which the UTF8 code unit // should be appended. Returns true on success and false if there's an // encoding error. bool ConvertUTF16Units(const Token& token, int* i, std::string* dest_string); // Grabs the next token in the JSON stream. This does not increment the // stream so it can be used to look ahead at the next token. Token ParseToken(); // Increments |json_pos_| past leading whitespace and comments. void EatWhitespaceAndComments(); // If |json_pos_| is at the start of a comment, eat it, otherwise, returns // false. bool EatComment(); // Checks if |json_pos_| matches str. bool NextStringMatch(const char* str, size_t length); // Sets the error code that will be returned to the caller. The current // line and column are determined and added into the final message. void SetErrorCode(const JsonParseError error, const char* error_pos); // Pointer to the starting position in the input string. const char* start_pos_; // Pointer to the current position in the input string. const char* json_pos_; // Pointer to the last position in the input string. const char* end_pos_; // Used to keep track of how many nested lists/dicts there are. int stack_depth_; // A parser flag that allows trailing commas in objects and arrays. bool allow_trailing_comma_; // Contains the error code for the last call to JsonToValue(), if any. JsonParseError error_code_; int error_line_; int error_col_; DISALLOW_COPY_AND_ASSIGN(JSONReader); }; } // namespace base #endif // BASE_JSON_JSON_READER_H_