Add Java support to checkdeps.py

I've moved the C++-specific parts of checkdeps.py into objcpp_checker.py, and added a corresponding java_checker.py module for Java code. The Java module does an extra pass over the files (see Pydoc for details) but this has almost no impact on the overall running time. The Java checker is intended to be compatible with the existing DEPS format despite Java's different import mechanism. In the Android repo, there are a few extra DEPS files needed for the Java code. That code has not yet been upstreamed back to Chromium, so those DEPS aren't needed yet -- they can be added as and when they're needed. BUG=137081 TEST=checkdeps.py Review URL: https://chromiumcodereview.appspot.com/10790014 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@148094 0039d316-1c4b-4281-b951-d872f2087c98
author: husky@chromium.org <husky@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-24 12:48:59 +0000
committer: husky@chromium.org <husky@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-24 12:48:59 +0000
commit: e11751b6fa5605ff91fd02e6fe4909fe15d95760 (patch)
tree: cbf86417a39c1efaf18e2f924aab5eb5bb92fbf4 /tools/checkdeps
parent: 1364c6d731d7ecafbbf58baecd5484d216be78a0 (diff)
download: chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.zip
chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.tar.gz
chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.tar.bz2
3 files changed, 232 insertions, 131 deletions
diff --git a/tools/checkdeps/checkdeps.py b/tools/checkdeps/checkdeps.py
index dae44c3..9624812 100755
--- a/tools/checkdeps/checkdeps.py
+++ b/tools/checkdeps/checkdeps.py
@@ -55,10 +55,13 @@ only lowercase.
 import os
 import optparse
 import pipes
-import re
 import sys
 import copy
 
+import cpp_checker
+import java_checker
+
+
 # Variable name used in the DEPS file to add or subtract include files from
 # the module-level deps.
 INCLUDE_RULES_VAR_NAME = "include_rules"
@@ -67,20 +70,9 @@ INCLUDE_RULES_VAR_NAME = "include_rules"
 # be checked. This allows us to skip third party code, for example.
 SKIP_SUBDIRS_VAR_NAME = "skip_child_includes"
 
-# The maximum number of non-include lines we can see before giving up.
-MAX_UNINTERESTING_LINES = 50
-
-# The maximum line length, this is to be efficient in the case of very long
-# lines (which can't be #includes).
-MAX_LINE_LENGTH = 128
-
 # Set to true for more output. This is set by the command line options.
 VERBOSE = False
 
-# This regular expression will be used to extract filenames from include
-# statements.
-EXTRACT_INCLUDE_PATH = re.compile('[ \t]*#[ \t]*(?:include|import)[ \t]+"(.*)"')
-
 # In lowercase, using forward slashes as directory separators, ending in a
 # forward slash. Set by the command line options.
 BASE_DIRECTORY = ""
@@ -91,13 +83,13 @@ GIT_SOURCE_DIRECTORY = set()
 
 # Specifies a single rule for an include, which can be either allow or disallow.
 class Rule(object):
-  def __init__(self, allow, dir, source):
-    self._allow = allow
-    self._dir = dir
+  def __init__(self, allow, directory, source):
+    self.allow = allow
+    self._dir = directory
     self._source = source
 
   def __str__(self):
-    if (self._allow):
+    if (self.allow):
       return '"+%s" from %s.' % (self._dir, self._source)
     return '"-%s" from %s.' % (self._dir, self._source)
 
@@ -160,7 +152,7 @@ class Rules:
     for rule in self._rules:
       if rule.ChildOrMatch(allowed_dir):
         # This rule applies.
-        if rule._allow:
+        if rule.allow:
           return (True, "")
         return (False, rule.__str__())
     # No rules apply, fail.
@@ -196,7 +188,7 @@ def ApplyRules(existing_rules, includes, cur_dir):
                     (cur_dir, BASE_DIRECTORY))
 
   # Last, apply the additional explicit rules.
-  for (index, rule_str) in enumerate(includes):
+  for (_, rule_str) in enumerate(includes):
     if not len(relative_dir):
       rule_description = "the top level include_rules"
     else:
@@ -233,10 +225,10 @@ def ApplyDirectoryRules(existing_rules, dir_name):
   # Check the DEPS file in this directory.
   if VERBOSE:
     print "Applying rules from", dir_name
-  def FromImpl(unused, unused2):
+  def FromImpl(_unused, _unused2):
     pass  # NOP function so "From" doesn't fail.
 
-  def FileImpl(unused):
+  def FileImpl(_unused):
     pass  # NOP function so "File" doesn't fail.
 
   class _VarImpl:
@@ -247,7 +239,7 @@ def ApplyDirectoryRules(existing_rules, dir_name):
       """Implements the Var syntax."""
       if var_name in self._local_scope.get("vars", {}):
         return self._local_scope["vars"][var_name]
-      raise Error("Var is not defined: %s" % var_name)
+      raise Exception("Var is not defined: %s" % var_name)
 
   local_scope = {}
   global_scope = {
@@ -270,111 +262,7 @@ def ApplyDirectoryRules(existing_rules, dir_name):
   return (ApplyRules(existing_rules, include_rules, dir_name), skip_subdirs)
 
 
-def ShouldCheckFile(file_name):
-  """Returns True if the given file is a type we want to check."""
-  checked_extensions = [
-      '.h',
-      '.cc',
-      '.m',
-      '.mm',
-  ]
-  basename, extension = os.path.splitext(file_name)
-  return extension in checked_extensions
-
-
-def CheckLine(rules, line):
-  """Checks the given file with the given rule set.
-  Returns a tuple (is_include, illegal_description).
-  If the line is an #include directive the first value will be True.
-  If it is also an illegal include, the second value will be a string describing
-  the error.  Otherwise, it will be None."""
-  found_item = EXTRACT_INCLUDE_PATH.match(line)
-  if not found_item:
-    return False, None  # Not a match
-
-  include_path = found_item.group(1)
-
-  # Fix up backslashes in case somebody accidentally used them.
-  include_path.replace("\\", "/")
-
-  if include_path.find("/") < 0:
-    # Don't fail when no directory is specified. We may want to be more
-    # strict about this in the future.
-    if VERBOSE:
-      print " WARNING: directory specified with no path: " + include_path
-    return True, None
-
-  (allowed, why_failed) = rules.DirAllowed(include_path)
-  if not allowed:
-    if VERBOSE:
-      retval = "\nFor " + rules.__str__()
-    else:
-      retval = ""
-    return True, retval + ('Illegal include: "%s"\n    Because of %s' %
-        (include_path, why_failed))
-
-  return True, None
-
-
-def CheckFile(rules, file_name):
-  """Checks the given file with the given rule set.
-
-  Args:
-    rules: The set of rules that apply to files in this directory.
-    file_name: The source file to check.
-
-  Returns: Either a string describing the error if there was one, or None if
-           the file checked out OK.
-  """
-  if VERBOSE:
-    print "Checking: " + file_name
-
-  ret_val = ""  # We'll collect the error messages in here
-  last_include = 0
-  try:
-    cur_file = open(file_name, "r")
-    in_if0 = 0
-    for line_num in xrange(sys.maxint):
-      if line_num - last_include > MAX_UNINTERESTING_LINES:
-        break
-
-      cur_line = cur_file.readline(MAX_LINE_LENGTH)
-      if cur_line == "":
-        break
-      cur_line = cur_line.strip()
-
-      # Check to see if we're at / inside a #if 0 block
-      if cur_line == '#if 0':
-        in_if0 += 1
-        continue
-      if in_if0 > 0:
-        if cur_line.startswith('#if'):
-          in_if0 += 1
-        elif cur_line == '#endif':
-          in_if0 -= 1
-        continue
-
-      is_include, line_status = CheckLine(rules, cur_line)
-      if is_include:
-        last_include = line_num
-      if line_status is not None:
-        if len(line_status) > 0:  # Add newline to separate messages.
-          line_status += "\n"
-        ret_val += line_status
-    cur_file.close()
-
-  except IOError:
-    if VERBOSE:
-      print "Unable to open file: " + file_name
-    cur_file.close()
-
-  # Map empty string to None for easier checking.
-  if len(ret_val) == 0:
-    return None
-  return ret_val
-
-
-def CheckDirectory(parent_rules, dir_name):
+def CheckDirectory(parent_rules, checkers, dir_name):
   (rules, skip_subdirs) = ApplyDirectoryRules(parent_rules, dir_name)
   if rules == None:
     return True
@@ -390,19 +278,20 @@ def CheckDirectory(parent_rules, dir_name):
     full_name = os.path.join(dir_name, cur)
     if os.path.isdir(full_name):
       dirs_to_check.append(full_name)
-    elif ShouldCheckFile(full_name):
+    elif os.path.splitext(full_name)[1] in checkers:
       files_to_check.append(full_name)
 
   # First check all files in this directory.
   for cur in files_to_check:
-    file_status = CheckFile(rules, cur)
-    if file_status != None:
+    checker = checkers[os.path.splitext(cur)[1]]
+    file_status = checker.CheckFile(rules, cur)
+    if file_status:
       print "ERROR in " + cur + "\n" + file_status
       success = False
 
   # Next recurse into the subdirectories.
   for cur in dirs_to_check:
-    if not CheckDirectory(rules, cur):
+    if not CheckDirectory(rules, checkers, cur):
       success = False
 
   return success
@@ -488,7 +377,12 @@ def checkdeps(options, args):
     global GIT_SOURCE_DIRECTORY
     GIT_SOURCE_DIRECTORY = GetGitSourceDirectory(BASE_DIRECTORY)
 
-  success = CheckDirectory(base_rules, start_dir)
+  java = java_checker.JavaChecker(BASE_DIRECTORY, VERBOSE)
+  cpp = cpp_checker.CppChecker(VERBOSE)
+  checkers = dict(
+      (extension, checker)
+      for checker in [java, cpp] for extension in checker.EXTENSIONS)
+  success = CheckDirectory(base_rules, checkers, start_dir)
   if not success:
     print "\nFAILED\n"
     return 1
diff --git a/tools/checkdeps/cpp_checker.py b/tools/checkdeps/cpp_checker.py
new file mode 100644
index 0000000..d32c6be
--- /dev/null
+++ b/tools/checkdeps/cpp_checker.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Checks C++ and Objective-C files for illegal includes."""
+
+import codecs
+import re
+
+
+class CppChecker(object):
+
+  EXTENSIONS = [
+      '.h',
+      '.cc',
+      '.m',
+      '.mm',
+  ]
+
+  # The maximum number of non-include lines we can see before giving up.
+  _MAX_UNINTERESTING_LINES = 50
+
+  # The maximum line length, this is to be efficient in the case of very long
+  # lines (which can't be #includes).
+  _MAX_LINE_LENGTH = 128
+
+  # This regular expression will be used to extract filenames from include
+  # statements.
+  _EXTRACT_INCLUDE_PATH = re.compile(
+      '[ \t]*#[ \t]*(?:include|import)[ \t]+"(.*)"')
+
+  def __init__(self, verbose):
+    self._verbose = verbose
+
+  def _CheckLine(self, rules, line):
+    """Checks the given file with the given rule set.
+    Returns a tuple (is_include, illegal_description).
+    If the line is an #include directive the first value will be True.
+    If it is also an illegal include, the second value will be a string
+    describing the error.  Otherwise, it will be None."""
+    found_item = self._EXTRACT_INCLUDE_PATH.match(line)
+    if not found_item:
+      return False, None  # Not a match
+
+    include_path = found_item.group(1)
+
+    if '\\' in include_path:
+      return True, 'Include paths may not include backslashes'
+
+    if '/' not in include_path:
+      # Don't fail when no directory is specified. We may want to be more
+      # strict about this in the future.
+      if self._verbose:
+        print ' WARNING: directory specified with no path: ' + include_path
+      return True, None
+
+    (allowed, why_failed) = rules.DirAllowed(include_path)
+    if not allowed:
+      if self._verbose:
+        retval = '\nFor %s' % rules
+      else:
+        retval = ''
+      return True, retval + ('Illegal include: "%s"\n    Because of %s' %
+          (include_path, why_failed))
+
+    return True, None
+
+  def CheckFile(self, rules, filepath):
+    if self._verbose:
+      print 'Checking: ' + filepath
+
+    ret_val = ''  # We'll collect the error messages in here
+    last_include = 0
+    with codecs.open(filepath, encoding='utf-8') as f:
+      in_if0 = 0
+      for line_num, line in enumerate(f):
+        if line_num - last_include > self._MAX_UNINTERESTING_LINES:
+          break
+
+        line = line.strip()
+
+        # Check to see if we're at / inside a #if 0 block
+        if line.startswith('#if 0'):
+          in_if0 += 1
+          continue
+        if in_if0 > 0:
+          if line.startswith('#if'):
+            in_if0 += 1
+          elif line.startswith('#endif'):
+            in_if0 -= 1
+          continue
+
+        is_include, line_status = self._CheckLine(rules, line)
+        if is_include:
+          last_include = line_num
+        if line_status is not None:
+          if len(line_status) > 0:  # Add newline to separate messages.
+            line_status += '\n'
+          ret_val += line_status
+
+    return ret_val
diff --git a/tools/checkdeps/java_checker.py b/tools/checkdeps/java_checker.py
new file mode 100644
index 0000000..933fd84
--- /dev/null
+++ b/tools/checkdeps/java_checker.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Checks Java files for illegal imports."""
+
+import codecs
+import os
+import re
+
+
+class JavaChecker(object):
+  """Import checker for Java files.
+
+  The CheckFile method uses real filesystem paths, but Java imports work in
+  terms of package names. To deal with this, we have an extra "prescan" pass
+  that reads all the .java files and builds a mapping of class name -> filepath.
+  In CheckFile, we convert each import statement into a real filepath, and check
+  that against the rules in the DEPS files.
+
+  Note that in Java you can always use classes in the same directory without an
+  explicit import statement, so these imports can't be blocked with DEPS files.
+  But that shouldn't be a problem, because same-package imports are pretty much
+  always correct by definition. (If we find a case where this is *not* correct,
+  it probably means the package is too big and needs to be split up.)
+
+  Properties:
+    _classmap: dict of fully-qualified Java class name -> filepath
+  """
+
+  EXTENSIONS = ['.java']
+
+  def __init__(self, base_directory, verbose):
+    self._base_directory = base_directory
+    self._verbose = verbose
+    self._classmap = {}
+    self._PrescanFiles()
+
+  def _PrescanFiles(self):
+    for root, dirs, files in os.walk(self._base_directory):
+      # Skip unwanted subdirectories. TODO(husky): it would be better to do
+      # this via the skip_child_includes flag in DEPS files. Maybe hoist this
+      # prescan logic into checkdeps.py itself?
+      for d in dirs:
+        # Skip hidden directories.
+        if d.startswith('.'):
+          dirs.remove(d)
+        # Skip the "out" directory, as dealing with generated files is awkward.
+        # We don't want paths like "out/Release/lib.java" in our DEPS files.
+        # TODO(husky): We need some way of determining the "real" path to
+        # a generated file -- i.e., where it would be in source control if
+        # it weren't generated.
+        if d == 'out':
+          dirs.remove(d)
+        # Skip third-party directories.
+        if d == 'third_party':
+          dirs.remove(d)
+      for f in files:
+        if f.endswith('.java'):
+          self._PrescanFile(os.path.join(root, f))
+
+  def _PrescanFile(self, filepath):
+    if self._verbose:
+      print 'Prescanning: ' + filepath
+    with codecs.open(filepath, encoding='utf-8') as f:
+      short_class_name, _ = os.path.splitext(os.path.basename(filepath))
+      for line in f:
+        for package in re.findall('^package ([\w\.]+);', line):
+          full_class_name = package + '.' + short_class_name
+          if full_class_name in self._classmap:
+            print 'WARNING: multiple definitions of %s:' % full_class_name
+            print '    ' + filepath
+            print '    ' + self._classmap[full_class_name]
+            print
+          else:
+            self._classmap[full_class_name] = filepath
+          return
+      print 'WARNING: no package definition found in %s' % filepath
+
+  def CheckFile(self, rules, filepath):
+    if self._verbose:
+      print 'Checking: ' + filepath
+
+    result = ''
+    with codecs.open(filepath, encoding='utf-8') as f:
+      for line in f:
+        for clazz in re.findall('^import\s+(?:static\s+)?([\w\.]+)\s*;', line):
+          if clazz not in self._classmap:
+            # Importing a class from outside the Chromium tree. That's fine --
+            # it's probably a Java or Android system class.
+            continue
+          include_path = os.path.relpath(
+              self._classmap[clazz], self._base_directory)
+          # Convert Windows paths to Unix style, as used in DEPS files.
+          include_path = include_path.replace(os.path.sep, '/')
+          (allowed, why_failed) = rules.DirAllowed(include_path)
+          if not allowed:
+            if self._verbose:
+              result += '\nFor %s' % rules
+            result += 'Illegal include: "%s"\n    Because of %s\n' % (
+                include_path, why_failed)
+        if '{' in line:
+          # This is code, so we're finished reading imports for this file.
+          break
+
+    return result
author	husky@chromium.org <husky@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-24 12:48:59 +0000
committer	husky@chromium.org <husky@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-24 12:48:59 +0000
commit	e11751b6fa5605ff91fd02e6fe4909fe15d95760 (patch)
tree	cbf86417a39c1efaf18e2f924aab5eb5bb92fbf4 /tools/checkdeps
parent	1364c6d731d7ecafbbf58baecd5484d216be78a0 (diff)
download	chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.zip chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.tar.gz chromium_src-e11751b6fa5605ff91fd02e6fe4909fe15d95760.tar.bz2