diff options
author | maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-03-28 17:15:04 +0000 |
---|---|---|
committer | maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-03-28 17:15:04 +0000 |
commit | 13ba777ea9008a059779c0065ec94ed458d8b380 (patch) | |
tree | 6da0055ecf0851b734e49277c9613164622a557b | |
parent | 19eb3c3ee57289bc8de5db2e8fba6a20f5fbd927 (diff) | |
download | chromium_src-13ba777ea9008a059779c0065ec94ed458d8b380.zip chromium_src-13ba777ea9008a059779c0065ec94ed458d8b380.tar.gz chromium_src-13ba777ea9008a059779c0065ec94ed458d8b380.tar.bz2 |
Implementing stateful directory tracking for dtrace on OSX.
Includes custom children process tracking and manual cwd tracking.
R=rogerta@chromium.org
BUG=98834
TEST=
Review URL: https://chromiumcodereview.appspot.com/9835084
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@129433 0039d316-1c4b-4281-b951-d872f2087c98
-rwxr-xr-x | tools/isolate/isolate_test.py | 23 | ||||
-rwxr-xr-x | tools/isolate/trace_inputs.py | 356 | ||||
-rwxr-xr-x | tools/isolate/trace_inputs_test.py | 54 |
3 files changed, 282 insertions, 151 deletions
diff --git a/tools/isolate/isolate_test.py b/tools/isolate/isolate_test.py index b4d94b2..6d19212 100755 --- a/tools/isolate/isolate_test.py +++ b/tools/isolate/isolate_test.py @@ -225,22 +225,13 @@ class Isolate(unittest.TestCase): # cmd[0] is not generated from infiles[0] so it's not using a relative path. self._expected_result( False, ['isolate_test.py'], ['isolate_test.py', '--ok'], False) - if sys.platform == 'linux2': - expected = ( - "{\n 'variables': {\n" - " 'isolate_files': [\n" - " '<(DEPTH)/isolate_test.py',\n" - " ],\n" - " 'isolate_dirs': [\n" - " ],\n },\n},\n") - else: - # TODO(maruel): BUG with relative paths on darwin. - expected = ( - "{\n 'variables': {\n" - " 'isolate_files': [\n" - " ],\n" - " 'isolate_dirs': [\n" - " ],\n },\n},\n") + expected = ( + "{\n 'variables': {\n" + " 'isolate_files': [\n" + " '<(DEPTH)/isolate_test.py',\n" + " ],\n" + " 'isolate_dirs': [\n" + " ],\n },\n},\n") self.assertEquals(expected, out) diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py index 512a4ec..04776d6 100755 --- a/tools/isolate/trace_inputs.py +++ b/tools/isolate/trace_inputs.py @@ -239,6 +239,12 @@ class Dtrace(object): Implies Mac OSX. dtruss can't be used because it has compatibility issues with python. + + Also, the pid->cwd handling needs to be done manually since OSX has no way to + get the absolute path of the 'cwd' dtrace variable from the probe. + + Also, OSX doesn't populate curpsinfo->pr_psargs properly, see + https://discussions.apple.com/thread/1980539. """ IGNORED = ( '/.vol', @@ -255,57 +261,239 @@ class Dtrace(object): # pylint: disable=C0301 # To understand the following code, you'll want to take a look at: # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html - # and # https://wikis.oracle.com/display/DTrace/Variables - # Note that cwd doesn't keep the absolute path so we need to compute it - # ourselve! + # http://docs.oracle.com/cd/E19205-01/820-4221/ + # + # The list of valid probes can be retrieved with: + # sudo dtrace -l -P syscall | less D_CODE = """ - /* Child process tracking. - * I'm really depressed that I need to do it myself. */ - dtrace:::BEGIN { - trackedpid[ppid] = 1; + proc:::start /trackedpid[ppid]/ { trackedpid[pid] = 1; - /* Signal gen_trace() that we are ready to trace. */ - printf("Go! 1 %d:%d %s", ppid, pid, execname); + current_processes += 1; + printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", + logindex, ppid, pid, probeprov, probename, execname, + current_processes); + logindex++; } - - /* Make sure all child processes are tracked. This is not very efficient - * but for our use case, it's fine enough. - * TODO(maruel): We should properly track fork, execve, vfork and friends - * instead. */ - syscall:::entry /trackedpid[ppid]/ { - trackedpid[pid] = 1; + proc:::exit /trackedpid[pid] && current_processes == 1/ { + trackedpid[pid] = 0; + current_processes -= 1; + printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", + logindex, ppid, pid, probeprov, probename, execname, + current_processes); + logindex++; + exit(0); } - syscall::exit:entry /trackedpid[pid]/ { + proc:::exit /trackedpid[pid]/ { trackedpid[pid] = 0; + current_processes -= 1; + printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", + logindex, ppid, pid, probeprov, probename, execname, + current_processes); + logindex++; } /* Finally what we care about! */ syscall::open*:entry /trackedpid[pid]/ { - self->arg0 = copyinstr(arg0); + self->arg0 = arg0; self->arg1 = arg1; self->arg2 = arg2; - self->cwd = cwd; } - syscall::open*:return /trackedpid[pid]/ { - printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d", - ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno); + syscall::open*:return /trackedpid[pid] && errno == 0/ { + printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n", + logindex, ppid, pid, probefunc, copyinstr(self->arg0), + self->arg1, self->arg2, errno); + logindex++; self->arg0 = 0; self->arg1 = 0; self->arg2 = 0; } + + syscall::rename:entry /trackedpid[pid]/ { + self->arg0 = arg0; + self->arg1 = arg1; + } + syscall::rename:return /trackedpid[pid]/ { + printf("%d %d:%d %s(\\"%s\\", \\"%s\\") = %d\\n", + logindex, ppid, pid, probefunc, copyinstr(self->arg0), + copyinstr(self->arg1), errno); + logindex++; + self->arg0 = 0; + self->arg1 = 0; + } + /* Track chdir, it's painful because it is only receiving relative path */ syscall::chdir:entry /trackedpid[pid]/ { - self->arg0 = copyinstr(arg0); + self->arg0 = arg0; } syscall::chdir:return /trackedpid[pid] && errno == 0/ { - printf("%d:%d \\"%s\\"; \\"%s\\" = %d", - ppid, pid, execname, self->arg0, errno); - self->cwd = self->arg0; + printf("%d %d:%d %s(\\"%s\\") = %d\\n", + logindex, ppid, pid, probefunc, copyinstr(self->arg0), errno); + logindex++; + self->arg0 = 0; } + /* TODO(maruel): *stat* functions and friends + syscall::access:return, + syscall::chdir:return, + syscall::chflags:return, + syscall::chown:return, + syscall::chroot:return, + syscall::getattrlist:return, + syscall::getxattr:return, + syscall::lchown:return, + syscall::lstat64:return, + syscall::lstat:return, + syscall::mkdir:return, + syscall::pathconf:return, + syscall::readlink:return, + syscall::removexattr:return, + syscall::setxattr:return, + syscall::stat64:return, + syscall::stat:return, + syscall::truncate:return, + syscall::unlink:return, + syscall::utimes:return, + */ """ @classmethod + def code(cls, pid, cwd): + """Setups the D code to implement child process tracking. + + Injects a fake chdir() trace to simplify parsing. The reason is that the + child process is already running at that point so: + - no proc_start() is logged for it. + - there is no way to figure out the absolute path of cwd in kernel on OSX + + Since the child process is already started, initialize current_processes to + 1. + """ + pid = str(pid) + cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') + return ( + 'dtrace:::BEGIN {\n' + ' current_processes = 1;\n' + ' logindex = 0;\n' + ' trackedpid[') + pid + ('] = 1;\n' + ' printf("%d %d:%d chdir(\\"' + cwd + '\\") = 0\\n",\n' + ' logindex, 1, ' + pid + ');\n' + ' logindex++;\n' + ' printf("%d %d:%d %s_%s() = 0\\n",\n' + ' logindex, ppid, pid, probeprov, probename);\n' + ' logindex++;\n' + '}\n') + cls.D_CODE + + class _Context(object): + """Processes a dtrace log line and keeps the list of existent and non + existent files accessed. + + Ignores directories. + """ + # This is the most common format. index pid function(args) = result + RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') + + # Arguments parsing. + RE_CHDIR = re.compile(r'^\"(.+?)\"$') + RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (\d+)$') + RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') + + O_DIRECTORY = 0x100000 + + def __init__(self, blacklist): + # TODO(maruel): Handling chdir() and cwd in general on OSX is tricky + # because OSX only keeps relative directory names. In addition, cwd is a + # process local variable so forks need to be properly traced and cwd + # saved. + self._cwd = {} + self.blacklist = blacklist + self.files = set() + self.non_existent = set() + + def on_line(self, line): + m = self.RE_HEADER.match(line) + assert m, line + fn = getattr( + self, + 'handle_%s' % m.group(3).replace('-', '_'), + self._handle_ignored) + return fn( + int(m.group(1)), + int(m.group(2)), + m.group(3), + m.group(4), + m.group(5)) + + def handle_dtrace_BEGIN(self, _ppid, _pid, _function, args, _result): + pass + + def handle_proc_start(self, ppid, pid, _function, _args, result): + """Transfers cwd.""" + assert result == '0' + self._cwd[pid] = self._cwd[ppid] + + def handle_proc_exit(self, _ppid, pid, _function, _args, _result): + """Removes cwd.""" + del self._cwd[pid] + + def handle_chdir(self, _ppid, pid, _function, args, result): + """Updates cwd.""" + if result.startswith('0'): + cwd = self.RE_CHDIR.match(args).group(1) + if not cwd.startswith('/'): + cwd2 = os.path.join(self._cwd[pid], cwd) + logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) + self._cwd[pid] = cwd2 + else: + logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) + self._cwd[pid] = cwd + else: + assert False, 'Unexecpected fail: %s' % result + + def handle_open_nocancel(self, ppid, pid, function, args, result): + return self.handle_open(ppid, pid, function, args, result) + + def handle_open(self, _ppid, pid, _function, args, result): + args = self.RE_OPEN.match(args).groups() + flag = int(args[1]) + if self.O_DIRECTORY & flag == self.O_DIRECTORY: + # Ignore directories. + return + self._handle_file(pid, args[0], result) + + def handle_rename(self, _ppid, pid, _function, args, result): + args = self.RE_RENAME.match(args).groups() + self._handle_file(pid, args[0], result) + self._handle_file(pid, args[1], result) + + def _handle_file(self, pid, filepath, result): + if result.startswith(('-1', '2')): + return + orig_filepath = filepath + if not filepath.startswith('/'): + filepath = os.path.join(self._cwd[pid], filepath) + filepath = os.path.normpath(filepath) + if self.blacklist(filepath): + return + # Sadly, still need to filter out directories here; + # saw open_nocancel(".", 0, 0) = 0 lines. + if (filepath not in self.files and + filepath not in self.non_existent and + not os.path.isdir(filepath)): + if orig_filepath: + logging.debug( + '_handle_file(%d, %s) -> %s' % (pid, orig_filepath, filepath)) + else: + logging.debug('_handle_file(%d, %s)' % (pid, filepath)) + if os.path.isfile(filepath): + self.files.add(filepath) + else: + self.non_existent.add(filepath) + + @staticmethod + def _handle_ignored(_ppid, pid, function, args, result): + logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) + + @classmethod def gen_trace(cls, cmd, cwd, logname): """Runs dtrace on an executable.""" logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname)) @@ -331,92 +519,84 @@ class Dtrace(object): logging.debug('Started child pid: %d' % child.pid) # Part 2: start dtrace process. + # Note: do not use the -p flag. It's useless if the initial process quits + # too fast, resulting in missing traces from the grand-children. The D code + # manages the dtrace lifetime itself. trace_cmd = [ 'sudo', 'dtrace', '-x', 'dynvarsize=4m', '-x', 'evaltime=exec', - '-n', cls.D_CODE, + '-n', cls.code(child.pid, cwd), '-o', '/dev/stderr', - '-p', str(child.pid), + '-q', ] with open(logname, 'w') as logfile: - # TODO(maruel): Inject a chdir() call with the absolute path (!) of cwd to - # be able to reconstruct the full paths. - #logfile.write('0 chdir("%s") = 0\n' % cwd) - #logfile.flush() dtrace = subprocess.Popen( trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) logging.debug('Started dtrace pid: %d' % dtrace.pid) - # Part 3: Read until the Go! signal is sent. + # Part 3: Read until one line is printed, which signifies dtrace is up and + # ready. with open(logname, 'r') as logfile: - while True: - x = logfile.readline() - if signal in x: + while 'dtrace_BEGIN' not in logfile.readline(): + if dtrace.poll() is not None: break - # Part 4: We can now tell our child to go. - child.communicate(signal) - - dtrace.wait() - if dtrace.returncode != 0: - print 'Failure: %d' % dtrace.returncode - with open(logname) as logfile: - print ''.join(logfile.readlines()[-100:]) - # Find a better way. - os.remove(logname) + try: + # Part 4: We can now tell our child to go. + child.communicate(signal) + + dtrace.wait() + if dtrace.returncode != 0: + print 'Failure: %d' % dtrace.returncode + with open(logname) as logfile: + print ''.join(logfile.readlines()[-100:]) + # Find a better way. + os.remove(logname) + else: + # Short the log right away to simplify our life. There isn't much + # advantage in keeping it out of order. + cls._sort_log(logname) + except KeyboardInterrupt: + # Still sort when testing. + cls._sort_log(logname) + raise + + # Note that we ignore the child result code. return dtrace.returncode - @staticmethod - def parse_log(filename, blacklist): + @classmethod + def parse_log(cls, filename, blacklist): """Processes a dtrace log and returns the files opened and the files that do not exist. + It does not track directories. + Most of the time, files that do not exist are temporary test files that should be put in /tmp instead. See http://crbug.com/116251 - - TODO(maruel): Process chdir() calls so relative paths can be processed. """ logging.info('parse_log(%s, %s)' % (filename, blacklist)) - files = set() - non_existent = set() - ignored = set() - # 1=filepath, 2=flags, 3=mode, 4=cwd 5=result - re_open = re.compile( - # CPU ID PROBE PPID PID EXECNAME - r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; ' - # PATH FLAGS MODE RESULT - r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$') - # TODO(maruel): Handling chdir() and cwd in general on OSX is complex - # because OSX only keeps relative directory names. In addition, cwd is a - # process local variable so forks need to be properly traced and cwd saved. - for line in open(filename): - m = re_open.match(line) - if not m: - continue - if not m.group(4).startswith('0'): - # Called failed. - continue - filepath = m.group(1) - if not filepath.startswith('/'): - # TODO(maruel): This is wrong since the cwd dtrace variable on OSX is - # not an absolute path. - # filepath = os.path.join(m.group(cwd), filepath) - pass - if blacklist(filepath): - continue - if (filepath not in files and - filepath not in non_existent and - filepath not in ignored): - if os.path.isfile(filepath): - files.add(filepath) - elif not os.path.isdir(filepath): - # Silently ignore directories. - non_existent.add(filepath) - else: - ignored.add(filepath) - return files, non_existent + context = cls._Context(blacklist) + for line in open(filename, 'rb'): + context.on_line(line) + # Resolve any symlink we hit. + return ( + set(os.path.realpath(f) for f in context.files), + set(os.path.realpath(f) for f in context.non_existent)) + + @staticmethod + def _sort_log(logname): + """Sorts the log back in order when each call occured. + + dtrace doesn't save the buffer in strict order since it keeps one buffer per + CPU. + """ + with open(logname, 'rb') as logfile: + lines = [f for f in logfile.readlines() if f.strip()] + lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) + with open(logname, 'wb') as logfile: + logfile.write(''.join(lines)) def relevant_files(files, root): diff --git a/tools/isolate/trace_inputs_test.py b/tools/isolate/trace_inputs_test.py index e1c391f..77c6bdc 100755 --- a/tools/isolate/trace_inputs_test.py +++ b/tools/isolate/trace_inputs_test.py @@ -58,20 +58,9 @@ class TraceInputs(unittest.TestCase): ] def test_trace(self): - if sys.platform == 'linux2': - return self._test_trace_linux() - if sys.platform == 'darwin': - return self._test_trace_mac() - print 'Unsupported: %s' % sys.platform - - def test_trace_gyp(self): - if sys.platform == 'linux2': - return self._test_trace_gyp_linux() - if sys.platform == 'darwin': - return self._test_trace_gyp_mac() - print 'Unsupported: %s' % sys.platform - - def _test_trace_linux(self): + if sys.platform not in ('linux2', 'darwin'): + print 'WARNING: unsupported: %s' % sys.platform + return expected_end = [ "Interesting: 4 reduced to 3", " data/trace_inputs/", @@ -88,7 +77,10 @@ class TraceInputs(unittest.TestCase): # virtualenv usage. self.assertEquals(expected_end, actual[-len(expected_end):]) - def _test_trace_gyp_linux(self): + def test_trace_gyp(self): + if sys.platform not in ('linux2', 'darwin'): + print 'WARNING: unsupported: %s' % sys.platform + return expected = ( "{\n" " 'variables': {\n" @@ -104,38 +96,6 @@ class TraceInputs(unittest.TestCase): actual = self._execute(self._gyp() + ['trace_inputs_test.py', '--child1']) self.assertEquals(expected, actual) - def _test_trace_mac(self): - # It is annoying in the case of dtrace because it requires root access. - # TODO(maruel): BUG: Note that child.py is missing. - expected = ( - "Total: 2\n" - "Non existent: 0\n" - "Interesting: 2 reduced to 2\n" - " trace_inputs.py\n" - " trace_inputs_test.py\n") - actual = self._execute( - ['trace_inputs_test.py', '--child1']).splitlines(True) - self.assertTrue(actual[0].startswith('Tracing... [')) - self.assertTrue(actual[1].startswith('Loading traces... ')) - self.assertEquals(expected, ''.join(actual[2:])) - - def _test_trace_gyp_mac(self): - # It is annoying in the case of dtrace because it requires root access. - # TODO(maruel): BUG: Note that child.py is missing. - expected = ( - "{\n" - " 'variables': {\n" - " 'isolate_files': [\n" - " '<(DEPTH)/trace_inputs.py',\n" - " '<(DEPTH)/trace_inputs_test.py',\n" - " ],\n" - " 'isolate_dirs': [\n" - " ],\n" - " },\n" - "},\n") - actual = self._execute(self._gyp() + ['trace_inputs_test.py', '--child1']) - self.assertEquals(expected, actual) - def child1(): print 'child1' |