Add base to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@8 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 21:49:38 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 21:49:38 +0000
commit: d7cae12696b96500c05dd2d430f6238922c20c96 (patch)
tree: ecff27b367735535b2a66477f8cd89d3c462a6c0 /base/json_reader.cc
parent: ee2815e28d408216cf94e874825b6bcf76c69083 (diff)
download: chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.zip
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.gz
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.bz2
1 files changed, 605 insertions, 0 deletions
diff --git a/base/json_reader.cc b/base/json_reader.cc
new file mode 100644
index 0000000..1ec5f637
--- /dev/null
+++ b/base/json_reader.cc
@@ -0,0 +1,605 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "base/json_reader.h"
+
+#include <float.h>
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/values.h"
+
+static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
+                                             0, 0);
+static const int kStackLimit = 100;
+
+namespace {
+
+inline int HexToInt(wchar_t c) {
+  if ('0' <= c && c <= '9') {
+    return c - '0';
+  } else if ('A' <= c && c <= 'F') {
+    return c - 'A' + 10;
+  } else if ('a' <= c && c <= 'f') {
+    return c - 'a' + 10;
+  }
+  NOTREACHED();
+  return 0;
+}
+
+// A helper method for ParseNumberToken.  It reads an int from the end of
+// token.  The method returns false if there is no valid integer at the end of
+// the token.
+bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
+  wchar_t first = token.NextChar();
+  int len = 0;
+
+  // Read in more digits
+  wchar_t c = first;
+  while ('\0' != c && '0' <= c && c <= '9') {
+    ++token.length;
+    ++len;
+    c = token.NextChar();
+  }
+  // We need at least 1 digit.
+  if (len == 0)
+    return false;
+
+  if (!can_have_leading_zeros && len > 1 && '0' == first)
+    return false;
+
+  return true;
+}
+
+// A helper method for ParseStringToken.  It reads |digits| hex digits from the
+// token. If the sequence if digits is not valid (contains other characters),
+// the method returns false.
+bool ReadHexDigits(JSONReader::Token& token, int digits) {
+  for (int i = 1; i <= digits; ++i) {
+    wchar_t c = *(token.begin + token.length + i);
+    if ('\0' == c)
+      return false;
+    if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+          ('A' <= c && c <= 'F'))) {
+      return false;
+    }
+  }
+
+  token.length += digits;
+  return true;
+}
+
+}  // anonymous namespace
+
+/* static */
+bool JSONReader::Read(const std::string& json, Value** root) {
+  return JsonToValue(json, root, true);
+}
+
+/* static */
+bool JSONReader::JsonToValue(const std::string& json, Value** root,
+                             bool check_root) {
+  // Assume input is UTF8.  The conversion from UTF8 to wstring removes null
+  // bytes for us (a good thing).
+  std::wstring json_wide(UTF8ToWide(json));
+  const wchar_t* json_cstr = json_wide.c_str();
+
+  // When the input JSON string starts with a UTF-8 Byte-Order-Mark
+  // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
+  // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
+  // mis-treating a Unicode BOM as an invalid character and returning false,
+  // skip a converted Unicode BOM if it exists.
+  if (!json_wide.empty() && json_cstr[0] == 0xFEFF) {
+    ++json_cstr;
+  }
+
+  JSONReader reader(json_cstr);
+
+  Value* temp_root = NULL;
+  bool success = reader.BuildValue(&temp_root, check_root);
+
+  // Only modify root_ if we have valid JSON and nothing else.
+  if (success && reader.ParseToken().type == Token::END_OF_INPUT) {
+    *root = temp_root;
+    return true;
+  }
+
+  if (temp_root)
+    delete temp_root;
+  return false;
+}
+
+JSONReader::JSONReader(const wchar_t* json_start_pos)
+  : json_pos_(json_start_pos), stack_depth_(0) {}
+
+bool JSONReader::BuildValue(Value** node, bool is_root) {
+  ++stack_depth_;
+  if (stack_depth_ > kStackLimit)
+    return false;
+
+  Token token = ParseToken();
+  // The root token must be an array or an object.
+  if (is_root && token.type != Token::OBJECT_BEGIN &&
+      token.type != Token::ARRAY_BEGIN) {
+    return false;
+  }
+
+  switch (token.type) {
+    case Token::END_OF_INPUT:
+    case Token::INVALID_TOKEN:
+      return false;
+
+    case Token::NULL_TOKEN:
+      *node = Value::CreateNullValue();
+      break;
+
+    case Token::BOOL_TRUE:
+      *node = Value::CreateBooleanValue(true);
+      break;
+
+    case Token::BOOL_FALSE:
+      *node = Value::CreateBooleanValue(false);
+      break;
+
+    case Token::NUMBER:
+      if (!DecodeNumber(token, node))
+        return false;
+      break;
+
+    case Token::STRING:
+      if (!DecodeString(token, node))
+        return false;
+      break;
+
+    case Token::ARRAY_BEGIN:
+      {
+        json_pos_ += token.length;
+        token = ParseToken();
+
+        ListValue* array = new ListValue;
+        while (token.type != Token::ARRAY_END) {
+          Value* array_node = NULL;
+          if (!BuildValue(&array_node, false)) {
+            delete array;
+            return false;
+          }
+          array->Append(array_node);
+
+          // After a list value, we expect a comma or the end of the list.
+          token = ParseToken();
+          if (token.type == Token::LIST_SEPARATOR) {
+            json_pos_ += token.length;
+            token = ParseToken();
+            // Trailing commas are invalid
+            if (token.type == Token::ARRAY_END) {
+              delete array;
+              return false;
+            }
+          } else if (token.type != Token::ARRAY_END) {
+            // Unexpected value after list value.  Bail out.
+            delete array;
+            return false;
+          }
+        }
+        if (token.type != Token::ARRAY_END) {
+          delete array;
+          return false;
+        }
+        *node = array;
+        break;
+      }
+
+    case Token::OBJECT_BEGIN:
+      {
+        json_pos_ += token.length;
+        token = ParseToken();
+
+        DictionaryValue* dict = new DictionaryValue;
+        while (token.type != Token::OBJECT_END) {
+          if (token.type != Token::STRING) {
+            delete dict;
+            return false;
+          }
+          Value* dict_key_value = NULL;
+          if (!DecodeString(token, &dict_key_value)) {
+            delete dict;
+            return false;
+          }
+          // Convert the key into a wstring.
+          std::wstring dict_key;
+          bool success = dict_key_value->GetAsString(&dict_key);
+          DCHECK(success);
+          delete dict_key_value;
+
+          json_pos_ += token.length;
+          token = ParseToken();
+          if (token.type != Token::OBJECT_PAIR_SEPARATOR) {
+            delete dict;
+            return false;
+          }
+
+          json_pos_ += token.length;
+          token = ParseToken();
+          Value* dict_value = NULL;
+          if (!BuildValue(&dict_value, false)) {
+            delete dict;
+            return false;
+          }
+          dict->Set(dict_key, dict_value);
+
+          // After a key/value pair, we expect a comma or the end of the
+          // object.
+          token = ParseToken();
+          if (token.type == Token::LIST_SEPARATOR) {
+            json_pos_ += token.length;
+            token = ParseToken();
+            // Trailing commas are invalid.  TODO(tc): Should we allow trailing
+            // commas in objects?  Seems harmless and quite convenient...
+            if (token.type == Token::OBJECT_END) {
+              delete dict;
+              return false;
+            }
+          } else if (token.type != Token::OBJECT_END) {
+            // Unexpected value after last object value.  Bail out.
+            delete dict;
+            return false;
+          }
+        }
+        if (token.type != Token::OBJECT_END) {
+          delete dict;
+          return false;
+        }
+        *node = dict;
+        break;
+      }
+
+    default:
+      // We got a token that's not a value.
+      return false;
+  }
+  json_pos_ += token.length;
+
+  --stack_depth_;
+  return true;
+}
+
+JSONReader::Token JSONReader::ParseNumberToken() {
+  // We just grab the number here.  We validate the size in DecodeNumber.
+  // According   to RFC4627, a valid number is: [minus] int [frac] [exp]
+  Token token(Token::NUMBER, json_pos_, 0);
+  wchar_t c = *json_pos_;
+  if ('-' == c) {
+    ++token.length;
+    c = token.NextChar();
+  }
+
+  if (!ReadInt(token, false))
+    return kInvalidToken;
+
+  // Optional fraction part
+  c = token.NextChar();
+  if ('.' == c) {
+    ++token.length;
+    if (!ReadInt(token, true))
+      return kInvalidToken;
+    c = token.NextChar();
+  }
+
+  // Optional exponent part
+  if ('e' == c || 'E' == c) {
+    ++token.length;
+    c = token.NextChar();
+    if ('-' == c || '+' == c) {
+      ++token.length;
+      c = token.NextChar();
+    }
+    if (!ReadInt(token, true))
+      return kInvalidToken;
+  }
+
+  return token;
+}
+
+bool JSONReader::DecodeNumber(const Token& token, Value** node) {
+  // Determine if we want to try to parse as an int or a double.
+  bool is_double = false;
+  for (int i = 0; i < token.length; ++i) {
+    wchar_t c = *(token.begin + i);
+    if ('e' == c || 'E' == c || '.' == c) {
+      is_double = true;
+      break;
+    }
+  }
+
+  if (is_double) {
+    // Try parsing as a double.
+    double num_double;
+    int parsed_values = swscanf_s(token.begin, L"%lf", &num_double);
+    // Make sure we're not -INF, INF or NAN.
+    if (1 == parsed_values && _finite(num_double)) {
+      *node = Value::CreateRealValue(num_double);
+      return true;
+    }
+  } else {
+    int num_int;
+    int parsed_values = swscanf_s(token.begin, L"%d", &num_int);
+    if (1 == parsed_values) {
+      // Ensure the parsed value matches the string.  This makes sure we don't
+      // overflow/underflow.
+      const std::wstring& back_to_str = StringPrintf(L"%d", num_int);
+      if (0 == wcsncmp(back_to_str.c_str(), token.begin,
+                       back_to_str.length())) {
+        *node = Value::CreateIntegerValue(num_int);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+JSONReader::Token JSONReader::ParseStringToken() {
+  Token token(Token::STRING, json_pos_, 1);
+  wchar_t c = token.NextChar();
+  while ('\0' != c) {
+    if ('\\' == c) {
+      ++token.length;
+      c = token.NextChar();
+      // Make sure the escaped char is valid.
+      switch (c) {
+        case 'x':
+          if (!ReadHexDigits(token, 2))
+            return kInvalidToken;
+          break;
+        case 'u':
+          if (!ReadHexDigits(token, 4))
+            return kInvalidToken;
+          break;
+        case '\\':
+        case '/':
+        case 'b':
+        case 'f':
+        case 'n':
+        case 'r':
+        case 't':
+        case '"':
+          break;
+        default:
+          return kInvalidToken;
+      }
+    } else if ('"' == c) {
+      ++token.length;
+      return token;
+    }
+    ++token.length;
+    c = token.NextChar();
+  }
+  return kInvalidToken;
+}
+
+bool JSONReader::DecodeString(const Token& token, Value** node) {
+  std::wstring decoded_str;
+  decoded_str.reserve(token.length - 2);
+
+  for (int i = 1; i < token.length - 1; ++i) {
+    wchar_t c = *(token.begin + i);
+    if ('\\' == c) {
+      ++i;
+      c = *(token.begin + i);
+      switch (c) {
+        case '"':
+        case '/':
+        case '\\':
+          decoded_str.push_back(c);
+          break;
+        case 'b':
+          decoded_str.push_back('\b');
+          break;
+        case 'f':
+          decoded_str.push_back('\f');
+          break;
+        case 'n':
+          decoded_str.push_back('\n');
+          break;
+        case 'r':
+          decoded_str.push_back('\r');
+          break;
+        case 't':
+          decoded_str.push_back('\t');
+          break;
+
+        case 'x':
+          decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +
+                                HexToInt(*(token.begin + i + 2)));
+          i += 2;
+          break;
+        case 'u':
+          decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +
+                                (HexToInt(*(token.begin + i + 2)) << 8) +
+                                (HexToInt(*(token.begin + i + 3)) << 4) +
+                                HexToInt(*(token.begin + i + 4)));
+          i += 4;
+          break;
+
+        default:
+          // We should only have valid strings at this point.  If not,
+          // ParseStringToken didn't do it's job.
+          NOTREACHED();
+          return false;
+      }
+    } else {
+      // Not escaped
+      decoded_str.push_back(c);
+    }
+  }
+  *node = Value::CreateStringValue(decoded_str);
+
+  return true;
+}
+
+JSONReader::Token JSONReader::ParseToken() {
+  static const std::wstring kNullString(L"null");
+  static const std::wstring kTrueString(L"true");
+  static const std::wstring kFalseString(L"false");
+
+  EatWhitespaceAndComments();
+
+  Token token(Token::INVALID_TOKEN, 0, 0);
+  switch (*json_pos_) {
+    case '\0':
+      token.type = Token::END_OF_INPUT;
+      break;
+
+    case 'n':
+      if (NextStringMatch(kNullString))
+        token = Token(Token::NULL_TOKEN, json_pos_, 4);
+      break;
+
+    case 't':
+      if (NextStringMatch(kTrueString))
+        token = Token(Token::BOOL_TRUE, json_pos_, 4);
+      break;
+
+    case 'f':
+      if (NextStringMatch(kFalseString))
+        token = Token(Token::BOOL_FALSE, json_pos_, 5);
+      break;
+
+    case '[':
+      token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
+      break;
+
+    case ']':
+      token = Token(Token::ARRAY_END, json_pos_, 1);
+      break;
+
+    case ',':
+      token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
+      break;
+
+    case '{':
+      token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
+      break;
+
+    case '}':
+      token = Token(Token::OBJECT_END, json_pos_, 1);
+      break;
+
+    case ':':
+      token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
+      break;
+
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+    case '-':
+      token = ParseNumberToken();
+      break;
+
+    case '"':
+      token = ParseStringToken();
+      break;
+  }
+  return token;
+}
+
+bool JSONReader::NextStringMatch(const std::wstring& str) {
+  for (size_t i = 0; i < str.length(); ++i) {
+    if ('\0' == *json_pos_)
+      return false;
+    if (*(json_pos_ + i) != str[i])
+      return false;
+  }
+  return true;
+}
+
+void JSONReader::EatWhitespaceAndComments() {
+  while ('\0' != *json_pos_) {
+    switch (*json_pos_) {
+      case ' ':
+      case '\n':
+      case '\r':
+      case '\t':
+        ++json_pos_;
+        break;
+      case '/':
+        // TODO(tc): This isn't in the RFC so it should be a parser flag.
+        if (!EatComment())
+          return;
+        break;
+      default:
+        // Not a whitespace char, just exit.
+        return;
+    }
+  }
+}
+
+bool JSONReader::EatComment() {
+  if ('/' != *json_pos_)
+    return false;
+
+  wchar_t next_char = *(json_pos_ + 1);
+  if ('/' == next_char) {
+    // Line comment, read until \n or \r
+    json_pos_ += 2;
+    while ('\0' != *json_pos_) {
+      switch (*json_pos_) {
+        case '\n':
+        case '\r':
+          ++json_pos_;
+          return true;
+        default:
+          ++json_pos_;
+      }
+    }
+  } else if ('*' == next_char) {
+    // Block comment, read until */
+    json_pos_ += 2;
+    while ('\0' != *json_pos_) {
+      switch (*json_pos_) {
+        case '*':
+          if ('/' == *(json_pos_ + 1)) {
+            json_pos_ += 2;
+            return true;
+          }
+        default:
+          ++json_pos_;
+      }
+    }
+  } else {
+    return false;
+  }
+  return true;
+}
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 21:49:38 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 21:49:38 +0000
commit	d7cae12696b96500c05dd2d430f6238922c20c96 (patch)
tree	ecff27b367735535b2a66477f8cd89d3c462a6c0 /base/json_reader.cc
parent	ee2815e28d408216cf94e874825b6bcf76c69083 (diff)
download	chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.zip chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.gz chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.bz2