summaryrefslogtreecommitdiffstats
path: root/tools/isolate
diff options
context:
space:
mode:
authormaruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-03-16 04:18:23 +0000
committermaruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-03-16 04:18:23 +0000
commit50ec9f23f7673cc5a1ea350a6183c16ed414d30c (patch)
treeb9b987f97bc32f0f8a32f970616358e965c251e9 /tools/isolate
parenta30a9ba33df4cc031a83a7674170fec593f5673f (diff)
downloadchromium_src-50ec9f23f7673cc5a1ea350a6183c16ed414d30c.zip
chromium_src-50ec9f23f7673cc5a1ea350a6183c16ed414d30c.tar.gz
chromium_src-50ec9f23f7673cc5a1ea350a6183c16ed414d30c.tar.bz2
Implement dtrace log parsing for MacOSX.
Rename strace_inputs.py to trace_inputs.py since it's now using both strace and dtrace, depending on the platform. R=rogerta@chromium.org BUG=98636 Review URL: http://codereview.chromium.org/9706058 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@127100 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/isolate')
-rwxr-xr-xtools/isolate/strace_inputs.py162
-rw-r--r--tools/isolate/trace_child_process.py33
-rwxr-xr-xtools/isolate/trace_inputs.py354
3 files changed, 387 insertions, 162 deletions
diff --git a/tools/isolate/strace_inputs.py b/tools/isolate/strace_inputs.py
deleted file mode 100755
index cdb34f4..0000000
--- a/tools/isolate/strace_inputs.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Runs strace on a test and processes the logs to extract the dependencies from
-the source tree.
-
-Automatically extracts directories where all the files are used to make the
-dependencies list more compact.
-"""
-
-import os
-import re
-import subprocess
-import sys
-
-
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
-
-IGNORED = (
- '/dev',
- '/etc',
- '/home',
- '/lib',
- '/proc',
- '/sys',
- '/tmp',
- '/usr',
- '/var',
-)
-
-
-def gen_trace(cmd, cwd, logname, silent):
- """Runs strace on an executable."""
- strace = ['strace', '-f', '-e', 'trace=open', '-o', logname]
- stdout = stderr = None
- if silent:
- stdout = subprocess.PIPE
- stderr = subprocess.PIPE
-
- cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd]
- p = subprocess.Popen(
- strace + cmd, cwd=cwd, stdout=stdout, stderr=stderr)
- out, err = p.communicate()
- if p.returncode != 0:
- print 'Failure: %d' % p.returncode
- # pylint: disable=E1103
- print ''.join(out.splitlines(True)[-100:])
- print ''.join(err.splitlines(True)[-100:])
- return p.returncode
-
-
-def parse_log(filename, blacklist):
- """Processes a strace log and returns the files opened and the files that do
- not exist.
-
- Most of the time, files that do not exist are temporary test files that should
- be put in /tmp instead. See http://crbug.com/116251
-
- TODO(maruel): Process chdir() calls so relative paths can be processed.
- """
- files = set()
- non_existent = set()
- for line in open(filename):
- # 1=pid, 2=filepath, 3=mode, 4=result
- m = re.match(r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$', line)
- if not m:
- continue
- if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3):
- # Not present or a directory.
- continue
- filepath = m.group(2)
- if blacklist(filepath):
- continue
- if not os.path.isfile(filepath):
- non_existent.add(filepath)
- else:
- files.add(filepath)
- return files, non_existent
-
-
-def relevant_files(files, root):
- """Trims the list of files to keep the expected files and unexpected files.
-
- Unexpected files are files that are not based inside the |root| directory.
- """
- expected = []
- unexpected = []
- for f in files:
- if f.startswith(root):
- expected.append(f[len(root):])
- else:
- unexpected.append(f)
- return sorted(set(expected)), sorted(set(unexpected))
-
-
-def extract_directories(files, root):
- """Detects if all the files in a directory were loaded and if so, replace the
- individual files by the directory entry.
- """
- directories = set(os.path.dirname(f) for f in files)
- files = set(files)
- for directory in sorted(directories, reverse=True):
- actual = set(
- os.path.join(directory, f) for f in
- os.listdir(os.path.join(root, directory))
- if not f.endswith(('.svn', '.pyc'))
- )
- if not (actual - files):
- files -= actual
- files.add(directory + '/')
- return sorted(files)
-
-
-def strace_inputs(unittest, cmd):
- """Tries to load the logs if available. If not, strace the test."""
- logname = os.path.join(BASE_DIR, os.path.basename(unittest))
- if not os.path.isfile(logname):
- returncode = gen_trace(cmd, ROOT_DIR, logname, True)
- if returncode:
- return returncode
-
- def blacklist(f):
- """Strips ignored paths."""
- return f.startswith(IGNORED) or f.endswith('.pyc')
-
- files, non_existent = parse_log(logname, blacklist)
- print('Total: %d' % len(files))
- print('Non existent: %d' % len(non_existent))
- for f in non_existent:
- print(' %s' % f)
-
- expected, unexpected = relevant_files(files, ROOT_DIR + '/')
- if unexpected:
- print('Unexpected: %d' % len(unexpected))
- for f in unexpected:
- print(' %s' % f)
-
- simplified = extract_directories(expected, ROOT_DIR)
- print('Interesting: %d reduced to %d' % (len(expected), len(simplified)))
- for f in simplified:
- print(' %s' % f)
-
- return 0
-
-
-def main():
- if len(sys.argv) < 3:
- print >> sys.stderr, (
- 'Usage: strace_inputs.py [testname] [cmd line...]\n'
- '\n'
- 'Example:\n'
- ' ./strace_inputs.py base_unittests testing/xvfb.py out/Release '
- 'out/Release/base_unittests')
- return 1
- return strace_inputs(sys.argv[1], sys.argv[2:])
-
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/tools/isolate/trace_child_process.py b/tools/isolate/trace_child_process.py
new file mode 100644
index 0000000..3a8367b
--- /dev/null
+++ b/tools/isolate/trace_child_process.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Waits for the go signal and replaces itself with the command to be run.
+
+Not meant to be used directly, only meant to be used by trace_inputs.py.
+"""
+
+import os
+import subprocess
+import sys
+
+
+def main():
+ signal = 'Go!'
+ value = sys.stdin.read(len(signal))
+ assert value == signal
+ sys.stdin.close()
+ # Replace the executable with an absolute path to make it easier to grok.
+ cmd = sys.argv[1:]
+ cmd[0] = os.path.abspath(cmd[0])
+ if cmd[0].endswith('.py'):
+ cmd.insert(0, sys.executable)
+ p = subprocess.Popen(cmd)
+ #print 'Child pid: %d' % p.pid
+ p.wait()
+ return p.returncode
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py
new file mode 100755
index 0000000..97a3928
--- /dev/null
+++ b/tools/isolate/trace_inputs.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Runs strace or dtrace on a test and processes the logs to extract the
+dependencies from the source tree.
+
+Automatically extracts directories where all the files are used to make the
+dependencies list more compact.
+"""
+
+import logging
+import optparse
+import os
+import re
+import subprocess
+import sys
+
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
+
+
+def isEnabledFor(level):
+ return logging.getLogger().isEnabledFor(level)
+
+
+class Strace(object):
+ """strace implies linux."""
+ IGNORED = (
+ '/dev',
+ '/etc',
+ '/lib',
+ '/proc',
+ '/sys',
+ '/tmp',
+ '/usr',
+ '/var',
+ )
+
+ @staticmethod
+ def gen_trace(cmd, cwd, logname):
+ """Runs strace on an executable."""
+ silent = not isEnabledFor(logging.DEBUG)
+ stdout = stderr = None
+ if silent:
+ stdout = subprocess.PIPE
+ stderr = subprocess.PIPE
+ trace_cmd = ['strace', '-f', '-e', 'trace=open', '-o', logname]
+ cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd]
+ p = subprocess.Popen(
+ trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ print 'Failure: %d' % p.returncode
+ # pylint: disable=E1103
+ print ''.join(out.splitlines(True)[-100:])
+ print ''.join(err.splitlines(True)[-100:])
+ return p.returncode
+
+ @staticmethod
+ def parse_log(filename, blacklist):
+ """Processes a strace log and returns the files opened and the files that do
+ not exist.
+
+ Most of the time, files that do not exist are temporary test files that
+ should be put in /tmp instead. See http://crbug.com/116251
+
+ TODO(maruel): Process chdir() calls so relative paths can be processed.
+ """
+ files = set()
+ non_existent = set()
+ # 1=pid, 2=filepath, 3=mode, 4=result
+ RE = re.compile(
+ # PID open(PATH, MODE) = RESULT
+ r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$')
+ for line in open(filename):
+ m = RE.match(line)
+ if not m:
+ continue
+ if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3):
+ # Not present or a directory.
+ continue
+ filepath = m.group(2)
+ if blacklist(filepath):
+ continue
+ if filepath not in files and filepath not in non_existent:
+ if os.path.isfile(filepath):
+ files.add(filepath)
+ else:
+ non_existent.add(filepath)
+ return files, non_existent
+
+
+class Dtrace(object):
+ """Uses DTrace framework through dtrace. Requires root access.
+
+ Implies Mac OSX.
+
+ dtruss can't be used because it has compatibility issues with python.
+ """
+ IGNORED = (
+ '/.vol',
+ '/Library',
+ '/System',
+ '/dev',
+ '/etc',
+ '/private/var',
+ '/tmp',
+ '/usr',
+ '/var',
+ )
+
+ # pylint: disable=C0301
+ # To understand the following code, you'll want to take a look at:
+ # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
+ # and
+ # https://wikis.oracle.com/display/DTrace/Variables
+ D_CODE = """
+ /* Child process tracking.
+ * I'm really depressed that I need to do it myself. */
+ dtrace:::BEGIN {
+ trackedpid[ppid] = 1;
+ trackedpid[pid] = 1;
+ /* Signal gen_trace() that we are ready to trace. */
+ printf("Go! 1 %d:%d %s", ppid, pid, execname);
+ }
+
+ /* Make sure all child processes are tracked. This is not very efficient
+ * but for our use case, it's fine enough.
+ * TODO(maruel): We should properly track fork, execve, vfork and friends
+ * instead. */
+ syscall:::entry /trackedpid[ppid]/ {
+ trackedpid[pid] = 1;
+ }
+ syscall::exit:entry /trackedpid[pid]/ {
+ trackedpid[pid] = 0;
+ }
+
+ /* Finally what we care about! */
+ syscall::open*:entry /trackedpid[pid]/ {
+ self->arg0 = copyinstr(arg0);
+ self->arg1 = arg1;
+ self->arg2 = arg2;
+ }
+ syscall::open*:return /trackedpid[pid]/ {
+ printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d",
+ ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno);
+ self->arg0 = 0;
+ self->arg1 = 0;
+ self->arg2 = 0;
+ }
+ """
+
+ @classmethod
+ def gen_trace(cls, cmd, cwd, logname):
+ """Runs dtrace on an executable."""
+ silent = not isEnabledFor(logging.DEBUG)
+ print 'Running: %s' % cmd
+ signal = 'Go!'
+ logging.debug('Our pid: %d' % os.getpid())
+
+ # Part 1: start the child process.
+ stdout = stderr = None
+ if silent:
+ stdout = subprocess.PIPE
+ stderr = subprocess.PIPE
+ child_cmd = [
+ sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'),
+ ]
+ child = subprocess.Popen(
+ child_cmd + cmd,
+ stdin=subprocess.PIPE,
+ stdout=stdout,
+ stderr=stderr,
+ cwd=cwd)
+ logging.debug('Started child pid: %d' % child.pid)
+
+ # Part 2: start dtrace process.
+ trace_cmd = [
+ 'sudo',
+ 'dtrace',
+ '-x', 'dynvarsize=4m',
+ '-x', 'evaltime=exec',
+ '-n', cls.D_CODE,
+ '-o', '/dev/stderr',
+ '-p', str(child.pid),
+ ]
+ with open(logname, 'w') as logfile:
+ dtrace = subprocess.Popen(
+ trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
+ logging.debug('Started dtrace pid: %d' % dtrace.pid)
+
+ # Part 3: Read until the Go! signal is sent.
+ with open(logname, 'r') as logfile:
+ while True:
+ x = logfile.readline()
+ if signal in x:
+ break
+
+ # Part 4: We can now tell our child to go.
+ child.communicate(signal)
+
+ dtrace.wait()
+ if dtrace.returncode != 0:
+ print 'Failure: %d' % dtrace.returncode
+ with open(logname) as logfile:
+ print ''.join(logfile.readlines()[-100:])
+ # Find a better way.
+ os.remove(logname)
+ return dtrace.returncode
+
+ @staticmethod
+ def parse_log(filename, blacklist):
+ """Processes a dtrace log and returns the files opened and the files that do
+ not exist.
+
+ Most of the time, files that do not exist are temporary test files that
+ should be put in /tmp instead. See http://crbug.com/116251
+
+ TODO(maruel): Process chdir() calls so relative paths can be processed.
+ """
+ files = set()
+ non_existent = set()
+ ignored = set()
+ # 1=filepath, 2=flags, 3=mode, 4=result
+ RE = re.compile(
+ # CPU ID PROBE PPID PID EXECNAME
+ r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; '
+ # PATH FLAGS MODE RESULT
+ r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$')
+ for line in open(filename):
+ m = RE.match(line)
+ if not m:
+ continue
+ if not m.group(4).startswith('0'):
+ # Called failed.
+ continue
+ filepath = m.group(1)
+ if blacklist(filepath):
+ continue
+ if (filepath not in files and
+ filepath not in non_existent and
+ filepath not in ignored):
+ if os.path.isfile(filepath):
+ files.add(filepath)
+ elif not os.path.isdir(filepath):
+ # Silently ignore directories.
+ non_existent.add(filepath)
+ else:
+ ignored.add(filepath)
+ return files, non_existent
+
+
+def relevant_files(files, root):
+ """Trims the list of files to keep the expected files and unexpected files.
+
+ Unexpected files are files that are not based inside the |root| directory.
+ """
+ expected = []
+ unexpected = []
+ for f in files:
+ if f.startswith(root):
+ expected.append(f[len(root):])
+ else:
+ unexpected.append(f)
+ return sorted(set(expected)), sorted(set(unexpected))
+
+
+def extract_directories(files, root):
+ """Detects if all the files in a directory were loaded and if so, replace the
+ individual files by the directory entry.
+ """
+ directories = set(os.path.dirname(f) for f in files)
+ files = set(files)
+ for directory in sorted(directories, reverse=True):
+ actual = set(
+ os.path.join(directory, f) for f in
+ os.listdir(os.path.join(root, directory))
+ if not f.endswith(('.svn', '.pyc'))
+ )
+ if not (actual - files):
+ files -= actual
+ files.add(directory + '/')
+ return sorted(files)
+
+
+def trace_inputs(log, cmd, api):
+ """Tries to load the logs if available. If not, trace the test."""
+ logname = os.path.join(BASE_DIR, os.path.basename(log))
+ if not os.path.isfile(logname):
+ print 'Tracing... %s' % cmd
+ returncode = api.gen_trace(cmd, ROOT_DIR, logname)
+ if returncode:
+ return returncode
+
+ def blacklist(f):
+ """Strips ignored paths."""
+ return f.startswith(api.IGNORED) or f.endswith('.pyc')
+
+ print 'Loading traces... %s' % logname
+ files, non_existent = api.parse_log(logname, blacklist)
+ print('Total: %d' % len(files))
+ print('Non existent: %d' % len(non_existent))
+ for f in non_existent:
+ print(' %s' % f)
+
+ expected, unexpected = relevant_files(files, ROOT_DIR + '/')
+ if unexpected:
+ print('Unexpected: %d' % len(unexpected))
+ for f in unexpected:
+ print(' %s' % f)
+
+ simplified = extract_directories(expected, ROOT_DIR)
+ print('Interesting: %d reduced to %d' % (len(expected), len(simplified)))
+ for f in simplified:
+ print(' %s' % f)
+
+ return 0
+
+
+def main():
+ parser = optparse.OptionParser(
+ usage='%prog <options> [cmd line...]')
+ parser.allow_interspersed_args = False
+ parser.add_option(
+ '-v', '--verbose', action='count', default=0, help='Use multiple times')
+ parser.add_option('-l', '--log', help='Log file')
+
+ options, args = parser.parse_args()
+ level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
+ logging.basicConfig(
+ level=level,
+ format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')
+
+ if not args:
+ parser.error('Must supply a command to run')
+ if not options.log:
+ parser.error('Must supply a log file with -l')
+
+ if sys.platform == 'linux2':
+ api = Strace
+ elif sys.platform == 'darwin':
+ api = Dtrace
+ else:
+ print >> sys.stderr, 'Unsupported platform'
+ return 1
+
+ return trace_inputs(options.log, args, api)
+
+
+if __name__ == '__main__':
+ sys.exit(main())