Improve performance of extension docs generation by 60%.

Review URL: http://codereview.chromium.org/9996002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@132692 0039d316-1c4b-4281-b951-d872f2087c98
author: aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-04-17 23:19:17 +0000
committer: aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-04-17 23:19:17 +0000
commit: 2cb0c4539c1352c55b6fc844a9ece58dd3f962ac (patch)
tree: 0e86d2c44b4c34a7449259cf2a7ce9e26cdbefc7
parent: e35da7489de6f9ce605a5895f47040fa8bbe9613 (diff)
download: chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.zip
chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.tar.gz
chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.tar.bz2
6 files changed, 55 insertions, 165 deletions
diff --git a/chrome/common/extensions/docs/build/directory.py b/chrome/common/extensions/docs/build/directory.py
index 64f701c..d4fbf04 100755
--- a/chrome/common/extensions/docs/build/directory.py
+++ b/chrome/common/extensions/docs/build/directory.py
@@ -12,7 +12,12 @@ import re
 import hashlib
 import zipfile
 import simplejson as json
-import json_minify as minify
+import sys
+
+_script_path = os.path.realpath(__file__)
+sys.path.insert(0, os.path.normpath(_script_path +
+                   "/../../../../../../tools"))
+import json_comment_eater
 
 # Make sure we get consistent string sorting behavior by explicitly using the
 # default C locale.
@@ -58,7 +63,7 @@ def parse_json_file(path, encoding="utf-8"):
 
   try:
     json_str = json_file.read()
-    json_obj = json.loads(minify.json_minify(json_str), encoding)
+    json_obj = json.loads(json_comment_eater.Nom(json_str), encoding)
   except ValueError, msg:
     raise Exception("Failed to parse JSON out of file %s: %s" % (path, msg))
   finally:
diff --git a/third_party/json_minify/README.chromium b/third_party/json_minify/README.chromium
index 0f5f630..019c326 100644
--- a/third_party/json_minify/README.chromium
+++ b/third_party/json_minify/README.chromium
@@ -11,7 +11,6 @@ Description:
 A set of scripts that remove comments and whitespace from JSON files.
 
 Local Modifications:
-- Added the __init__.py file for easier imports
 - Got the (much faster) json-minify-sans-regexp.js file from the second
   URL listed
-- Small change to minify_json.py to pass license tests
+- Removed minify_json.py, it is crazy slow. See commit for replacement.
diff --git a/third_party/json_minify/__init__.py b/third_party/json_minify/__init__.py
deleted file mode 100644
index 45bce20..0000000
--- a/third_party/json_minify/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-from minify_json import json_minify
diff --git a/third_party/json_minify/minify_json.py b/third_party/json_minify/minify_json.py
deleted file mode 100644
index 69aac6e..0000000
--- a/third_party/json_minify/minify_json.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Use of this source code is governed by a BSD-style license (MIT)
-'''
-Created on 20/01/2011
-
-v0.1 (C) Gerald Storer
-MIT License
-
-Based on JSON.minify.js: 
-https://github.com/getify/JSON.minify
-'''
-
-import re
-
-def json_minify(json,strip_space=True):
-    tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
-    in_string = False
-    in_multiline_comment = False
-    in_singleline_comment = False
-    
-    new_str = []
-    from_index = 0 # from is a keyword in Python
-    
-    for match in re.finditer(tokenizer,json):
-        
-        if not in_multiline_comment and not in_singleline_comment:
-            tmp2 = json[from_index:match.start()]
-            if not in_string and strip_space:
-                tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard
-            new_str.append(tmp2)
-            
-        from_index = match.end()
-        
-        if match.group() == '"' and not in_multiline_comment and not in_singleline_comment:
-            escaped = re.search('(\\\\)*$',json[:match.start()])
-            if not in_string or escaped is None or len(escaped.group()) % 2 == 0:
-                # start of string with ", or unescaped " character found to end string
-                in_string = not in_string
-            from_index -= 1 # include " character in next catch
-            
-        elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment:
-            in_multiline_comment = True
-        elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment:
-            in_multiline_comment = False
-        elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment:
-            in_singleline_comment = True
-        elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment:
-            in_singleline_comment = False
-        elif not in_multiline_comment and not in_singleline_comment and (  
-             match.group() not in ['\n','\r',' ','\t'] or not strip_space):
-                new_str.append(match.group()) 
-    
-    new_str.append(json[from_index:])
-    return ''.join(new_str)
-
-if __name__ == '__main__':
-    import json # requires Python 2.6+ to run tests
-    
-    def test_json(s):
-        return json.loads(json_minify(s))
-    
-    test1 = '''// this is a JSON file with comments
-{
-    "foo": "bar",    // this is cool
-    "bar": [
-        "baz", "bum", "zam"
-    ],
-/* the rest of this document is just fluff
-   in case you are interested. */
-    "something": 10,
-    "else": 20
-}
-
-/* NOTE: You can easily strip the whitespace and comments 
-   from such a file with the JSON.minify() project hosted 
-   here on github at http://github.com/getify/JSON.minify 
-*/
-'''
-
-    test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}'''
-    
-    test2 = '''
-{"/*":"*/","//":"",/*"//"*/"/*/"://
-"//"}
-
-'''
-    test2_res = '''{"/*":"*/","//":"","/*/":"//"}'''
-    
-    test3 = r'''/*
-this is a 
-multi line comment */{
-
-"foo"
-:
-    "bar/*"// something
-    ,    "b\"az":/*
-something else */"blah"
-
-}
-'''
-    test3_res = r'''{"foo":"bar/*","b\"az":"blah"}'''
-    
-    test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", 
-    "baz\\\\": /* yay */ "fo\\\\\"*/o" 
-}
-'''
-    test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'''
-    
-    assert test_json(test1) == json.loads(test1_res),'Failed test 1'
-    assert test_json(test2) == json.loads(test2_res),'Failed test 2'
-    assert test_json(test3) == json.loads(test3_res),'Failed test 3'
-    assert test_json(test4) == json.loads(test4_res),'Failed test 4'
-    if __debug__: # Don't print passed message if the asserts didn't run
-        print 'Passed all tests'
diff --git a/tools/json_comment_eater.py b/tools/json_comment_eater.py
new file mode 100755
index 0000000..1b1e07a84
--- /dev/null
+++ b/tools/json_comment_eater.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+""" Utility to remove comments from JSON files so that they can be parsed by
+json.loads."""
+
+def _ReadString(input, start, output):
+  output.append('"')
+  in_escape = False
+  for pos in xrange(start, len(input)):
+    output.append(input[pos])
+    if in_escape:
+      in_escape = False
+    else:
+      if input[pos] == '\\':
+        in_escape = True
+      elif input[pos] == '"':
+        return pos + 1
+  return pos
+
+def _ReadComment(input, start, output):
+  for pos in xrange(start, len(input)):
+    if input[pos] in ['\r', '\n']:
+      output.append(input[pos])
+      return pos + 1
+  return pos
+
+def Nom(input):
+  output = []
+  pos = 0
+  while pos < len(input):
+    if input[pos] == '"':
+      pos = _ReadString(input, pos + 1, output)
+    elif input[pos:pos+2] == '//':
+      pos = _ReadComment(input, pos + 2, output)
+    else:
+      output.append(input[pos])
+      pos += 1
+  return ''.join(output)
diff --git a/tools/json_schema_compiler/json_schema.py b/tools/json_schema_compiler/json_schema.py
index 240a168..f51cece 100644
--- a/tools/json_schema_compiler/json_schema.py
+++ b/tools/json_schema_compiler/json_schema.py
@@ -4,49 +4,12 @@
 
 import copy
 import json
+import os.path
+import sys
 
-def StripJSONComments(stream):
-  """Strips //-style comments from a stream of JSON. Allows un-escaped //
-  inside string values.
-  """
-  # Previously we used json_minify to strip comments, but it seems to be pretty
-  # slow and does more than we need. This implementation does a lot less work -
-  # it just strips comments from the beginning of the '//' delimiter until end
-  # of line, but only if we're not inside a string. For example:
-  #
-  #  {"url": "http://www.example.com"}
-  #
-  # will work properly, as will:
-  #
-  #  {
-  #    "url": "http://www.example.com" // some comment
-  #  }
-  result = ""
-  last_char = None
-  inside_string = False
-  inside_comment = False
-  buf = ""
-  for char in stream:
-    if inside_comment:
-      if char == '\n':
-        inside_comment = False
-      else:
-        continue
-    else:
-      if char == '/' and not inside_string:
-        if last_char == '/':
-          inside_comment = True
-        last_char = char
-        continue
-      else:
-        if last_char == '/' and not inside_string:
-          result += '/'
-        if char == '"':
-          inside_string = not inside_string
-    last_char = char
-    result += char
-
-  return result
+_script_path = os.path.realpath(__file__)
+sys.path.insert(0, os.path.normpath(_script_path + "/../../"))
+import json_comment_eater
 
 def DeleteNocompileNodes(item):
   def HasNocompile(thing):
@@ -69,7 +32,8 @@ def DeleteNocompileNodes(item):
 
 def Load(filename):
   with open(filename, 'r') as handle:
-    return DeleteNocompileNodes(json.loads(StripJSONComments(handle.read())))
+    return DeleteNocompileNodes(
+        json.loads(json_comment_eater.Nom(handle.read())))
 
 
 # A dictionary mapping |filename| to the object resulting from loading the JSON
author	aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-04-17 23:19:17 +0000
committer	aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-04-17 23:19:17 +0000
commit	2cb0c4539c1352c55b6fc844a9ece58dd3f962ac (patch)
tree	0e86d2c44b4c34a7449259cf2a7ce9e26cdbefc7
parent	e35da7489de6f9ce605a5895f47040fa8bbe9613 (diff)
download	chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.zip chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.tar.gz chromium_src-2cb0c4539c1352c55b6fc844a9ece58dd3f962ac.tar.bz2