isolate_driver: Enable ninja parsing code all the time.

Split the ninja parsing code in its own file so that isolate_driver is focused on .isolate management. - This removes the need to specify the majority of binary runtime dependencies. Sadly, this means a few unrelated binaries are now pulled in, for example d8, libvpx_obj_int_extract and tls_edit. That's a maintenance/performance trade off. - Removed some dependencies for unit_tests.isolate to ensure this works. R=vadimsh@chromium.org BUG=368543 Review URL: https://codereview.chromium.org/270333002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@269140 0039d316-1c4b-4281-b951-d872f2087c98
author: maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-05-09 01:43:37 +0000
committer: maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-05-09 01:43:37 +0000
commit: b951b91f533b632c1bbbce8b5389a7bca872277e (patch)
tree: 3ac55739b5e8a2899c73330985b8e97b20687451 /tools
parent: e681b3dc58a974f2810d0009acf9b201699e4f21 (diff)
download: chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.zip
chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.tar.gz
chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.tar.bz2
2 files changed, 319 insertions, 158 deletions
diff --git a/tools/isolate_driver.py b/tools/isolate_driver.py
index 4db823e..bf1103a 100755
--- a/tools/isolate_driver.py
+++ b/tools/isolate_driver.py
@@ -20,172 +20,23 @@ WARNING: The target to use for build.ninja analysis is the base name of the
 """
 
 import StringIO
-import glob
 import logging
 import os
 import posixpath
 import subprocess
 import sys
-import time
 
 TOOLS_DIR = os.path.dirname(os.path.abspath(__file__))
 SWARMING_CLIENT_DIR = os.path.join(TOOLS_DIR, 'swarming_client')
 SRC_DIR = os.path.dirname(TOOLS_DIR)
 
+import ninja_parser
+
 sys.path.insert(0, SWARMING_CLIENT_DIR)
 
 import isolate_format
 
 
-def load_ninja_recursively(build_dir, ninja_path, build_steps):
-  """Crudely extracts all the subninja and build referenced in ninja_path.
-
-  In particular, it ignores rule and variable declarations. The goal is to be
-  performant (well, as much as python can be performant) which is currently in
-  the <200ms range for a complete chromium tree. As such the code is laid out
-  for performance instead of readability.
-  """
-  logging.debug('Loading %s', ninja_path)
-  try:
-    with open(os.path.join(build_dir, ninja_path), 'rb') as f:
-      line = None
-      merge_line = ''
-      subninja = []
-      for line in f:
-        line = line.rstrip()
-        if not line:
-          continue
-
-        if line[-1] == '$':
-          # The next line needs to be merged in.
-          merge_line += line[:-1]
-          continue
-
-        if merge_line:
-          line = merge_line + line
-          merge_line = ''
-
-        statement = line[:line.find(' ')]
-        if statement == 'build':
-          # Save the dependency list as a raw string. Only the lines needed will
-          # be processed with raw_build_to_deps(). This saves a good 70ms of
-          # processing time.
-          build_target, dependencies = line[6:].split(': ', 1)
-          # Interestingly, trying to be smart and only saving the build steps
-          # with the intended extensions ('', '.stamp', '.so') slows down
-          # parsing even if 90% of the build rules can be skipped.
-          # On Windows, a single step may generate two target, so split items
-          # accordingly. It has only been seen for .exe/.exe.pdb combos.
-          for i in build_target.strip().split():
-            build_steps[i] = dependencies
-        elif statement == 'subninja':
-          subninja.append(line[9:])
-  except IOError:
-    print >> sys.stderr, 'Failed to open %s' % ninja_path
-    raise
-
-  total = 1
-  for rel_path in subninja:
-    try:
-      # Load each of the files referenced.
-      # TODO(maruel): Skip the files known to not be needed. It saves an aweful
-      # lot of processing time.
-      total += load_ninja_recursively(build_dir, rel_path, build_steps)
-    except IOError:
-      print >> sys.stderr, '... as referenced by %s' % ninja_path
-      raise
-  return total
-
-
-def load_ninja(build_dir):
-  """Loads the tree of .ninja files in build_dir."""
-  build_steps = {}
-  total = load_ninja_recursively(build_dir, 'build.ninja', build_steps)
-  logging.info('Loaded %d ninja files, %d build steps', total, len(build_steps))
-  return build_steps
-
-
-def using_blacklist(item):
-  """Returns True if an item should be analyzed.
-
-  Ignores many rules that are assumed to not depend on a dynamic library. If
-  the assumption doesn't hold true anymore for a file format, remove it from
-  this list. This is simply an optimization.
-  """
-  IGNORED = (
-    '.a', '.cc', '.css', '.def', '.h', '.html', '.js', '.json', '.manifest',
-    '.o', '.obj', '.pak', '.png', '.pdb', '.strings', '.txt',
-  )
-  # ninja files use native path format.
-  ext = os.path.splitext(item)[1]
-  if ext in IGNORED:
-    return False
-  # Special case Windows, keep .dll.lib but discard .lib.
-  if item.endswith('.dll.lib'):
-    return True
-  if ext == '.lib':
-    return False
-  return item not in ('', '|', '||')
-
-
-def raw_build_to_deps(item):
-  """Converts a raw ninja build statement into the list of interesting
-  dependencies.
-  """
-  # TODO(maruel): Use a whitelist instead? .stamp, .so.TOC, .dylib.TOC,
-  # .dll.lib, .exe and empty.
-  # The first item is the build rule, e.g. 'link', 'cxx', 'phony', etc.
-  return filter(using_blacklist, item.split(' ')[1:])
-
-
-def recurse(target, build_steps, rules_seen):
-  """Recursively returns all the interesting dependencies for root_item."""
-  out = []
-  if rules_seen is None:
-    rules_seen = set()
-  if target in rules_seen:
-    # TODO(maruel): Figure out how it happens.
-    logging.warning('Circular dependency for %s!', target)
-    return []
-  rules_seen.add(target)
-  try:
-    dependencies = raw_build_to_deps(build_steps[target])
-  except KeyError:
-    logging.info('Failed to find a build step to generate: %s', target)
-    return []
-  logging.debug('recurse(%s) -> %s', target, dependencies)
-  for dependency in dependencies:
-    out.append(dependency)
-    dependency_raw_dependencies = build_steps.get(dependency)
-    if dependency_raw_dependencies:
-      for i in raw_build_to_deps(dependency_raw_dependencies):
-        out.extend(recurse(i, build_steps, rules_seen))
-    else:
-      logging.info('Failed to find a build step to generate: %s', dependency)
-  return out
-
-
-def post_process_deps(build_dir, dependencies):
-  """Processes the dependency list with OS specific rules."""
-  def filter_item(i):
-    if i.endswith('.so.TOC'):
-      # Remove only the suffix .TOC, not the .so!
-      return i[:-4]
-    if i.endswith('.dylib.TOC'):
-      # Remove only the suffix .TOC, not the .dylib!
-      return i[:-4]
-    if i.endswith('.dll.lib'):
-      # Remove only the suffix .lib, not the .dll!
-      return i[:-4]
-    return i
-
-  # Check for execute access. This gets rid of all the phony rules.
-  return [
-    i for i in map(filter_item, dependencies)
-    if os.access(os.path.join(build_dir, i), os.X_OK)
-  ]
-
-
 def create_wrapper(args, isolate_index, isolated_index):
   """Creates a wrapper .isolate that add dynamic libs.
 
@@ -218,8 +69,9 @@ def create_wrapper(args, isolate_index, isolated_index):
   # primary target '_run'. Fix accordingly if this doesn't hold true, e.g.
   # complain to maruel@.
   target = isolate[:-len('.isolate')] + '_run'
-  build_steps = load_ninja(build_dir)
-  binary_deps = post_process_deps(build_dir, recurse(target, build_steps, None))
+  build_steps = ninja_parser.load_ninja(build_dir)
+  binary_deps = ninja_parser.post_process_deps(
+      build_dir, ninja_parser.recurse(build_dir, target, build_steps))
   logging.debug(
       'Binary dependencies:%s', ''.join('\n  ' + i for i in binary_deps))
 
@@ -255,20 +107,16 @@ def main():
   args = sys.argv[1:]
   isolate = None
   isolated = None
-  is_component = False
   for i, arg in enumerate(args):
     if arg == '--isolate':
       isolate = i + 1
     if arg == '--isolated':
       isolated = i + 1
-    if arg == 'component=shared_library':
-      is_component = True
   if isolate is None or isolated is None:
     print >> sys.stderr, 'Internal failure'
     return 1
 
-  if is_component:
-    create_wrapper(args, isolate, isolated)
+  create_wrapper(args, isolate, isolated)
 
   swarming_client = os.path.join(SRC_DIR, 'tools', 'swarming_client')
   sys.stdout.flush()
diff --git a/tools/ninja_parser.py b/tools/ninja_parser.py
new file mode 100755
index 0000000..9083200a
--- /dev/null
+++ b/tools/ninja_parser.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Primitive ninja parser.
+
+It's primary use case is to be used by isolate_driver.py. It is a standalone
+tool so it can be used to verify its behavior more efficiently. It's a quirky
+tool since it is designed only to gather binary dependencies.
+
+<directory> is assumed to be based on src/, so usually it will have the form
+out/Release. Any target outside this directory is ignored.
+
+This script is assumed to be run *after* the target was built, since it looks
+for the +x bit on the files and only returns the ones that exist.
+"""
+
+import logging
+import os
+import optparse
+import sys
+import time
+
+TOOLS_DIR = os.path.dirname(os.path.abspath(__file__))
+SRC_DIR = os.path.dirname(TOOLS_DIR)
+
+
+### Private stuff.
+
+
+_HEX = frozenset('0123456789abcdef')
+
+
+# Ignore any dependency with one of these extensions. In particular, we do not
+# want to mark tool that would have generated these as binary dependencies,
+# these are likely tools only used during the build process that are not
+# necessary at runtime. The worst thing that would happen is either:
+# - Mapping tools generating a source file but not necessary at run time.
+# - Slower processing.
+# On the other hand, being too aggressive will hide legitimate runtime
+# dependencies. In particular, .a may cause an explicit dependency on a .so.
+_IGNORED_EXTENSIONS = frozenset([
+  '.asm', '.c', '.cc', '.cpp', '.css', '.def', '.grd', '.gypcmd', '.h',
+  '.html', '.idl', '.in', '.jinja2', '.js', '.json', '.manifest', '.mm', '.o',
+  '.obj', '.pak', '.pickle', '.png', '.pdb', '.prep', '.proto', '.py', '.rc',
+  '.strings', '.svg', '.tmp', '.ttf', '.txt', '.xtb', '.wav',
+  ])
+
+
+def _load_ninja_recursively(build_dir, ninja_path, build_steps):
+  """Crudely extracts all the subninja and build referenced in ninja_path.
+
+  In particular, it ignores rule and variable declarations. The goal is to be
+  performant (well, as much as python can be performant) which is currently in
+  the <200ms range for a complete chromium tree. As such the code is laid out
+  for performance instead of readability.
+  """
+  logging.debug('Loading %s', ninja_path)
+  try:
+    with open(os.path.join(build_dir, ninja_path), 'rb') as f:
+      line = None
+      merge_line = ''
+      subninja = []
+      for line in f:
+        line = line.rstrip()
+        if not line:
+          continue
+
+        if line[-1] == '$':
+          # The next line needs to be merged in.
+          if merge_line:
+            merge_line += ' ' + line[:-1].strip(' ')
+          else:
+            merge_line = line[:-1].strip(' ')
+          continue
+
+        if merge_line:
+          line = merge_line + ' ' + line.strip(' ')
+          merge_line = ''
+
+        statement = line[:line.find(' ')]
+        if statement == 'build':
+          # Save the dependency list as a raw string. Only the lines needed will
+          # be processed within _recurse(). This saves a good 70ms of processing
+          # time.
+          build_target, dependencies = line[6:].split(': ', 1)
+          # Interestingly, trying to be smart and only saving the build steps
+          # with the intended extensions ('', '.stamp', '.so') slows down
+          # parsing even if 90% of the build rules can be skipped.
+          # On Windows, a single step may generate two target, so split items
+          # accordingly. It has only been seen for .exe/.exe.pdb combos.
+          for i in build_target.strip().split():
+            build_steps[i] = dependencies
+        elif statement == 'subninja':
+          subninja.append(line[9:])
+  except IOError:
+    print >> sys.stderr, 'Failed to open %s' % ninja_path
+    raise
+
+  total = 1
+  for rel_path in subninja:
+    try:
+      # Load each of the files referenced.
+      # TODO(maruel): Skip the files known to not be needed. It saves an aweful
+      # lot of processing time.
+      total += _load_ninja_recursively(build_dir, rel_path, build_steps)
+    except IOError:
+      print >> sys.stderr, '... as referenced by %s' % ninja_path
+      raise
+  return total
+
+
+def _simple_blacklist(item):
+  """Returns True if an item should be analyzed."""
+  return item not in ('', '|', '||')
+
+
+def _using_blacklist(item):
+  """Returns True if an item should be analyzed.
+
+  Ignores many rules that are assumed to not depend on a dynamic library. If
+  the assumption doesn't hold true anymore for a file format, remove it from
+  this list. This is simply an optimization.
+  """
+  # ninja files use native path format.
+  ext = os.path.splitext(item)[1]
+  if ext in _IGNORED_EXTENSIONS:
+    return False
+  # Special case Windows, keep .dll.lib but discard .lib.
+  if item.endswith('.dll.lib'):
+    return True
+  if ext == '.lib':
+    return False
+  return _simple_blacklist(item)
+
+
+def _should_process(build_dir, target, build_steps, rules_seen):
+  """Returns the raw dependencies if the target should be processed."""
+  if target in rules_seen:
+    # The rule was already seen. Since rules_seen is not scoped at the target
+    # visibility, it usually simply means that multiple targets depends on the
+    # same dependencies. It's fine.
+    return None
+
+  raw_dependencies = build_steps.get(target, None)
+  if raw_dependencies is None:
+    # There is not build step defined to generate 'target'.
+    parts = target.rsplit('_', 1)
+    if len(parts) == 2 and len(parts[1]) == 32 and _HEX.issuperset(parts[1]):
+      # It's because it is a phony rule.
+      return None
+
+    # Kind of a hack, assume source files are always outside build_bir.
+    if (target.startswith('..') and
+        os.path.exists(os.path.join(build_dir, target))):
+      # It's because it is a source file.
+      return None
+
+    logging.debug('Failed to find a build step to generate: %s', target)
+    return None
+  return raw_dependencies
+
+
+def _recurse(build_dir, target, build_steps, rules_seen, blacklist):
+  raw_dependencies = _should_process(build_dir, target, build_steps, rules_seen)
+  rules_seen.add(target)
+  if raw_dependencies is None:
+    return []
+
+  out = [target]
+  # Filter out what we don't want to speed things up. This cuts off large parts
+  # of the dependency tree to analyze.
+  # The first item is the build rule, e.g. 'link', 'cxx', 'phony', 'stamp', etc.
+  dependencies = filter(blacklist, raw_dependencies.split(' ')[1:])
+  logging.debug('recurse(%s) -> %s', target, dependencies)
+  for dependency in dependencies:
+    out.extend(_recurse(
+        build_dir, dependency, build_steps, rules_seen, blacklist))
+  return out
+
+
+def _find_link(build_dir, target, build_steps, rules_seen, search_for):
+  raw_dependencies = _should_process(build_dir, target, build_steps, rules_seen)
+  rules_seen.add(target)
+  if raw_dependencies is None:
+    return
+
+  # Filter out what we don't want to speed things up. This cuts off large parts
+  # of the dependency tree to analyze.
+  # The first item is the build rule, e.g. 'link', 'cxx', 'phony', 'stamp', etc.
+  dependencies = filter(_simple_blacklist, raw_dependencies.split(' ')[1:])
+  for dependency in dependencies:
+    if dependency == search_for:
+      yield [dependency]
+    else:
+      for out in _find_link(
+          build_dir, dependency, build_steps, rules_seen, search_for):
+        yield [dependency] + out
+
+
+### Public API.
+
+
+def load_ninja(build_dir):
+  """Loads build.ninja and the tree of .ninja files in build_dir.
+
+  Returns:
+    dict of the target->build_step where build_step is an unprocessed str.
+
+  TODO(maruel): This should really just be done by ninja itself, then simply
+  process its output.
+  """
+  build_steps = {}
+  total = _load_ninja_recursively(build_dir, 'build.ninja', build_steps)
+  logging.info('Loaded %d ninja files, %d build steps', total, len(build_steps))
+  return build_steps
+
+
+def recurse(build_dir, target, build_steps, blacklist=_using_blacklist):
+  """Recursively returns all the interesting dependencies for the target
+  specified.
+  """
+  return _recurse(build_dir, target, build_steps, set(), blacklist)
+
+
+def find_link(build_dir, target, build_steps, search_for):
+  """Finds all the links from 'target' to 'search_for'.
+
+  Example:
+    With target='foo' and search_for='baz', if foo depends on bar which depends
+    on baz, it will yield only one list, ['foo', 'bar', 'baz']. If there were
+    multiple dependency links in the DAG, each one would be yield as a list.
+  """
+  for link in _find_link(build_dir, target, build_steps, set(), search_for):
+    yield link
+
+
+def post_process_deps(build_dir, dependencies):
+  """Processes the dependency list with OS specific rules.
+
+  It maps the dynamic libraries import library into their actual dynamic
+  library.
+
+  Must be run after the build is completed.
+  """
+  def filter_item(i):
+    if i.endswith('.so.TOC'):
+      # Remove only the suffix .TOC, not the .so!
+      return i[:-4]
+    if i.endswith('.dylib.TOC'):
+      # Remove only the suffix .TOC, not the .dylib!
+      return i[:-4]
+    if i.endswith('.dll.lib'):
+      # Remove only the suffix .lib, not the .dll!
+      return i[:-4]
+    return i
+
+  # Check for execute access. This gets rid of all the phony rules.
+  return [
+    i for i in map(filter_item, dependencies)
+    if os.access(os.path.join(build_dir, i), os.X_OK)
+  ]
+
+
+def main():
+  parser = optparse.OptionParser(
+      usage='%prog [options] <directory> <target> <search_for>',
+      description=sys.modules[__name__].__doc__)
+  parser.add_option(
+      '-v', '--verbose', action='count', default=0,
+      help='Use twice for more info')
+  options, args = parser.parse_args()
+
+  levels = (logging.ERROR, logging.INFO, logging.DEBUG)
+  logging.basicConfig(
+      level=levels[min(len(levels)-1, options.verbose)],
+      format='%(levelname)7s %(message)s')
+  if len(args) == 2:
+    build_dir, target = args
+    search_for = None
+  elif len(args) == 3:
+    build_dir, target, search_for = args
+  else:
+    parser.error('Please provide a directory, a target, optionally a link')
+
+  start = time.time()
+  build_dir = os.path.abspath(build_dir)
+  if not os.path.isdir(build_dir):
+    parser.error('build dir must exist')
+  build_steps = load_ninja(build_dir)
+
+  if search_for:
+    found_one = False
+    # Find how to get to link from target.
+    for rules in find_link(build_dir, target, build_steps, search_for):
+      found_one = True
+      print('%s -> %s' % (target, ' -> '.join(rules)))
+    if not found_one:
+      print('Find to find a link between %s and %s' % (target, search_for))
+    end = time.time()
+    logging.info('Processing took %.3fs', end-start)
+  else:
+    binary_deps = post_process_deps(
+        build_dir, recurse(build_dir, target, build_steps))
+    end = time.time()
+    logging.info('Processing took %.3fs', end-start)
+    print('Binary dependencies:%s' % ''.join('\n  ' + i for i in binary_deps))
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
author	maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-05-09 01:43:37 +0000
committer	maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-05-09 01:43:37 +0000
commit	b951b91f533b632c1bbbce8b5389a7bca872277e (patch)
tree	3ac55739b5e8a2899c73330985b8e97b20687451 /tools
parent	e681b3dc58a974f2810d0009acf9b201699e4f21 (diff)
download	chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.zip chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.tar.gz chromium_src-b951b91f533b632c1bbbce8b5389a7bca872277e.tar.bz2