diff options
-rwxr-xr-x | tools/isolate/isolate.py | 59 | ||||
-rwxr-xr-x | tools/isolate/isolate_test.py | 1 | ||||
-rwxr-xr-x | tools/isolate/trace_inputs.py | 237 | ||||
-rwxr-xr-x | tools/isolate/trace_inputs_test.py | 74 |
4 files changed, 289 insertions, 82 deletions
diff --git a/tools/isolate/isolate.py b/tools/isolate/isolate.py index 6fe7e8a..9a4dcdc 100755 --- a/tools/isolate/isolate.py +++ b/tools/isolate/isolate.py @@ -74,7 +74,7 @@ def separate_inputs_command(args, root, files): return [relpath(os.path.join(cwd, arg), root) for arg in args], cmd -def isolate(outdir, resultfile, indir, infiles, mode, read_only, cmd): +def isolate(outdir, resultfile, indir, infiles, mode, read_only, cmd, no_save): """Main function to isolate a target with its dependencies. Arguments: @@ -85,12 +85,14 @@ def isolate(outdir, resultfile, indir, infiles, mode, read_only, cmd): - mode: Action to do. See file level docstring. - read_only: Makes the temporary directory read only. - cmd: Command to execute. + - no_save: If True, do not touch resultfile. Some arguments are optional, dependending on |mode|. See the corresponding MODE<mode> function for the exact behavior. """ mode_fn = getattr(sys.modules[__name__], 'MODE' + mode) assert mode_fn + assert os.path.isabs(resultfile) infiles = tree_creator.expand_directories( indir, infiles, lambda x: re.match(r'.*\.(svn|pyc)$', x)) @@ -120,12 +122,13 @@ def isolate(outdir, resultfile, indir, infiles, mode, read_only, cmd): result = mode_fn( outdir, indir, dictfiles, read_only, cmd, relative_cwd, resultfile) - if result == 0: + if result == 0 and not no_save: # Saves the resulting file. out = { 'command': cmd, 'relative_cwd': relative_cwd, 'files': dictfiles, + 'read_only': read_only, } with open(resultfile, 'wb') as f: json.dump(out, f, indent=2, sort_keys=True) @@ -196,16 +199,14 @@ def MODEtrace( It constructs the equivalent of dictfiles. It is hardcoded to base the checkout at src/. """ - gyppath = os.path.relpath(relative_cwd, indir) - cwd = os.path.join(indir, relative_cwd) - logging.info('Running %s, cwd=%s' % (cmd, cwd)) + logging.info('Running %s, cwd=%s' % (cmd, os.path.join(indir, relative_cwd))) return trace_inputs.trace_inputs( '%s.log' % resultfile, cmd, - cwd, - gyppath, indir, - True) + relative_cwd, + os.path.dirname(resultfile), # Guesswork here. + False) def get_valid_modes(): @@ -228,7 +229,7 @@ def main(): help='Determines the action to be taken: %s' % ', '.join(valid_modes)) parser.add_option( '--result', metavar='FILE', - help='Output file containing the json information about inputs') + help='File containing the json information about inputs') parser.add_option( '--root', metavar='DIR', help='Base directory to fetch files, required') parser.add_option( @@ -237,9 +238,12 @@ def main(): 'For run and remap, uses a /tmp subdirectory. For the other modes, ' 'defaults to the directory containing --result') parser.add_option( - '--read-only', action='store_true', + '--read-only', action='store_true', default=False, help='Make the temporary tree read-only') parser.add_option( + '--from-results', action='store_true', + help='Loads everything from the result file instead of generating it') + parser.add_option( '--files', metavar='FILE', help='File to be read containing input files') @@ -249,10 +253,23 @@ def main(): level=level, format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s') - if not options.root: - parser.error('--root is required.') + if not options.mode: + parser.error('--mode is required') + if not options.result: parser.error('--result is required.') + if options.from_results: + if not options.root: + options.root = os.getcwd() + if args: + parser.error('Arguments cannot be used with --from-result') + if options.files: + parser.error('--files cannot be used with --from-result') + else: + if not options.root: + parser.error('--root is required.') + + options.result = os.path.abspath(options.result) # Normalize the root input directory. indir = os.path.normpath(options.root) @@ -265,20 +282,28 @@ def main(): logging.info('sys.argv: %s' % sys.argv) logging.info('cwd: %s' % os.getcwd()) logging.info('Args: %s' % args) - infiles, cmd = separate_inputs_command(args, indir, options.files) - if not infiles: - parser.error('Need at least one input file to map') + if not options.from_results: + infiles, cmd = separate_inputs_command(args, indir, options.files) + if not infiles: + parser.error('Need at least one input file to map') + else: + data = json.load(open(options.result)) + cmd = data['command'] + infiles = data['files'].keys() + os.chdir(data['relative_cwd']) + logging.info('infiles: %s' % infiles) try: return isolate( options.outdir, - os.path.abspath(options.result), + options.result, indir, infiles, options.mode, options.read_only, - cmd) + cmd, + options.from_results) except tree_creator.MappingError, e: print >> sys.stderr, str(e) return 1 diff --git a/tools/isolate/isolate_test.py b/tools/isolate/isolate_test.py index 58cc8b2..b4d94b2 100755 --- a/tools/isolate/isolate_test.py +++ b/tools/isolate/isolate_test.py @@ -55,6 +55,7 @@ class Isolate(unittest.TestCase): [unicode(x) for x in args], u'files': dict((unicode(f), {u'mode': mode(f)}) for f in files), u'relative_cwd': u'.', + u'read_only': False, } if with_hash: for filename in expected[u'files']: diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py index 04065255..512a4ec 100755 --- a/tools/isolate/trace_inputs.py +++ b/tools/isolate/trace_inputs.py @@ -29,6 +29,7 @@ def isEnabledFor(level): class Strace(object): """strace implies linux.""" IGNORED = ( + '/bin', '/dev', '/etc', '/lib', @@ -39,8 +40,146 @@ class Strace(object): '/var', ) - @staticmethod - def gen_trace(cmd, cwd, logname): + class _Context(object): + """Processes a strace log line and keeps the list of existent and non + existent files accessed. + + Ignores directories. + """ + # This is the most common format. pid function(args) = result + RE_HEADER = re.compile(r'^(\d+)\s+([^\(]+)\((.+?)\)\s+= (.+)$') + # An interrupted function call, only grab the minimal header. + RE_UNFINISHED = re.compile(r'^(\d+)\s+([^\(]+).*$') + UNFINISHED = ' <unfinished ...>' + # A resumed function call. + RE_RESUMED = re.compile(r'^(\d+)\s+<\.\.\. ([^ ]+) resumed> (.+)$') + # A process received a signal. + RE_SIGNAL = re.compile(r'^\d+\s+--- SIG[A-Z]+ .+ ---') + # A process didn't handle a signal. + RE_KILLED = re.compile(r'^(\d+) \+\+\+ killed by ([A-Z]+) \+\+\+$') + + # Arguments parsing. + RE_CHDIR = re.compile(r'^\"(.+?)\"$') + RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$') + RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$') + RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$') + RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') + + def __init__(self, blacklist): + self._cwd = {} + self.blacklist = blacklist + self.files = set() + self.non_existent = set() + # Key is a tuple(pid, function name) + self._pending_calls = {} + + @classmethod + def traces(cls): + prefix = 'handle_' + return [i[len(prefix):] for i in dir(cls) if i.startswith(prefix)] + + def on_line(self, line): + line = line.strip() + if self.RE_SIGNAL.match(line): + # Ignore signals. + return + + m = self.RE_KILLED.match(line) + if m: + self.handle_exit_group(int(m.group(1)), m.group(2), None, None) + return + + if line.endswith(self.UNFINISHED): + line = line[:-len(self.UNFINISHED)] + m = self.RE_UNFINISHED.match(line) + assert m, line + self._pending_calls[(m.group(1), m.group(2))] = line + return + + m = self.RE_RESUMED.match(line) + if m: + pending = self._pending_calls.pop((m.group(1), m.group(2))) + # Reconstruct the line. + line = pending + m.group(3) + + m = self.RE_HEADER.match(line) + assert m, line + return getattr(self, 'handle_%s' % m.group(2))( + int(m.group(1)), + m.group(2), + m.group(3), + m.group(4)) + + def handle_chdir(self, pid, _function, args, result): + """Updates cwd.""" + if result.startswith('0'): + cwd = self.RE_CHDIR.match(args).group(1) + if not cwd.startswith('/'): + cwd2 = os.path.join(self._cwd[pid], cwd) + logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) + self._cwd[pid] = cwd2 + else: + logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) + self._cwd[pid] = cwd + else: + assert False, 'Unexecpected fail: %s' % result + + def handle_clone(self, pid, _function, _args, result): + """Transfers cwd.""" + if result == '? ERESTARTNOINTR (To be restarted)': + return + self._cwd[int(result)] = self._cwd[pid] + + def handle_execve(self, pid, _function, args, result): + self._handle_file(pid, self.RE_EXECVE.match(args).group(1), result) + + def handle_exit_group(self, pid, _function, _args, _result): + """Removes cwd.""" + del self._cwd[pid] + + @staticmethod + def handle_fork(_pid, _function, args, result): + assert False, (args, result) + + def handle_open(self, pid, _function, args, result): + args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups() + if 'O_DIRECTORY' in args[1]: + return + self._handle_file(pid, args[0], result) + + def handle_rename(self, pid, _function, args, result): + args = self.RE_RENAME.match(args).groups() + self._handle_file(pid, args[0], result) + self._handle_file(pid, args[1], result) + + @staticmethod + def handle_stat64(_pid, _function, args, result): + assert False, (args, result) + + @staticmethod + def handle_vfork(_pid, _function, args, result): + assert False, (args, result) + + def _handle_file(self, pid, filepath, result): + if result.startswith('-1'): + return + if not filepath.startswith('/'): + filepath2 = os.path.join(self._cwd[pid], filepath) + logging.debug('_handle_file(%d, %s) -> %s' % (pid, filepath, filepath2)) + filepath = filepath2 + else: + logging.debug('_handle_file(%d, %s)' % (pid, filepath)) + + if self.blacklist(filepath): + return + if filepath not in self.files and filepath not in self.non_existent: + if os.path.isfile(filepath): + self.files.add(filepath) + else: + self.non_existent.add(filepath) + + @classmethod + def gen_trace(cls, cmd, cwd, logname): """Runs strace on an executable.""" logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname)) silent = not isEnabledFor(logging.INFO) @@ -48,7 +187,8 @@ class Strace(object): if silent: stdout = subprocess.PIPE stderr = subprocess.PIPE - trace_cmd = ['strace', '-f', '-e', 'trace=open,chdir', '-o', logname] + traces = ','.join(cls._Context.traces()) + trace_cmd = ['strace', '-f', '-e', 'trace=%s' % traces, '-o', logname] p = subprocess.Popen( trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr) out, err = p.communicate() @@ -60,7 +200,8 @@ class Strace(object): with open(logname) as f: content = f.read() with open(logname, 'w') as f: - f.write('0 chdir("%s") = 0\n' % cwd) + pid = content.split(' ', 1)[0] + f.write('%s chdir("%s") = 0\n' % (pid, cwd)) f.write(content) if p.returncode != 0: @@ -72,53 +213,24 @@ class Strace(object): print ''.join(err.splitlines(True)[-100:]) return p.returncode - @staticmethod - def parse_log(filename, blacklist): + @classmethod + def parse_log(cls, filename, blacklist): """Processes a strace log and returns the files opened and the files that do not exist. + It does not track directories. + Most of the time, files that do not exist are temporary test files that should be put in /tmp instead. See http://crbug.com/116251 """ logging.info('parse_log(%s, %s)' % (filename, blacklist)) - files = set() - non_existent = set() - # 1=pid, 2=filepath, 3=mode, 4=result - re_open = re.compile( - # PID open(PATH, MODE) = RESULT - r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$') - # 1=pid 2=path 3=result - re_chdir = re.compile( - # PID chdir(PATH) = RESULT - r'^(\d+)\s+chdir\("([^"]+)"\)\s+= (.+)$') - - # TODO(maruel): This code is totally wrong. cwd is a process local variable - # so this needs to be a dict with key = pid. - cwd = None + context = cls._Context(blacklist) for line in open(filename): - m = re_open.match(line) - if m: - if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3): - # Not present or a directory. - continue - filepath = m.group(2) - if not filepath.startswith('/'): - filepath = os.path.join(cwd, filepath) - if blacklist(filepath): - continue - if filepath not in files and filepath not in non_existent: - if os.path.isfile(filepath): - files.add(filepath) - else: - non_existent.add(filepath) - m = re_chdir.match(line) - if m: - if m.group(3).startswith('0'): - cwd = m.group(2) - else: - assert False, 'Unexecpected fail: %s' % line - - return files, non_existent + context.on_line(line) + # Resolve any symlink we hit. + return ( + set(os.path.realpath(f) for f in context.files), + set(os.path.realpath(f) for f in context.non_existent)) class Dtrace(object): @@ -316,7 +428,9 @@ def relevant_files(files, root): unexpected = [] for f in files: if f.startswith(root): - expected.append(f[len(root):]) + f = f[len(root):] + assert f + expected.append(f) else: unexpected.append(f) return sorted(set(expected)), sorted(set(unexpected)) @@ -347,13 +461,17 @@ def trace_inputs( Symlinks are not processed at all. """ logging.debug( - 'trace_inputs(%s, %s, %s, %s, %s)' % ( - logfile, cmd, root_dir, gyp_proj_dir, product_dir)) + 'trace_inputs(%s, %s, %s, %s, %s, %s)' % ( + logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace)) # It is important to have unambiguous path. assert os.path.isabs(root_dir), root_dir assert os.path.isabs(logfile), logfile - assert os.path.isabs(cmd[0]), cmd[0] + assert ( + (os.path.isfile(logfile) and not force_trace) or os.path.isabs(cmd[0]) + ), cmd[0] + # Resolve any symlink + root_dir = os.path.realpath(root_dir) def print_if(txt): if gyp_proj_dir is None: @@ -371,7 +489,11 @@ def trace_inputs( if os.path.isfile(logfile): os.remove(logfile) print_if('Tracing... %s' % cmd) - returncode = api.gen_trace(cmd, root_dir, logfile) + cwd = root_dir + # TODO(maruel): If --gyp is specified, use it as the cwd. + #if gyp_proj_dir: + # cwd = os.path.join(cwd, gyp_proj_dir) + returncode = api.gen_trace(cmd, cwd, logfile) if returncode and not force_trace: return returncode @@ -400,6 +522,7 @@ def trace_inputs( if gyp_proj_dir is not None: def cleanuppath(x): + """Cleans up a relative path.""" if x: x = x.rstrip('/') if x == '.': @@ -413,10 +536,12 @@ def trace_inputs( def fix(f): """Bases the file on the most restrictive variable.""" + logging.debug('fix(%s)' % f) if product_dir and f.startswith(product_dir): return '<(PRODUCT_DIR)/%s' % f[len(product_dir):] elif gyp_proj_dir and f.startswith(gyp_proj_dir): - return f[len(gyp_proj_dir):] + # May be empty if the whole directory containing the gyp file is needed. + return f[len(gyp_proj_dir):] or './' else: return '<(DEPTH)/%s' % f @@ -456,7 +581,8 @@ def main(): parser.add_option( '--root-dir', default=ROOT_DIR, help='Root directory to base everything off. Default: %default') - parser.add_option('-f', '--force', help='Force to retrace the file') + parser.add_option( + '-f', '--force', action='store_true', help='Force to retrace the file') options, args = parser.parse_args() level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] @@ -464,12 +590,17 @@ def main(): level=level, format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s') - if not args: - parser.error('Must supply a command to run') if not options.log: parser.error('Must supply a log file with -l') + if not args: + if not os.path.isfile(options.log) or options.force: + parser.error('Must supply a command to run') + else: + args[0] = os.path.abspath(args[0]) + + if options.root_dir: + options.root_dir = os.path.abspath(options.root_dir) - args[0] = os.path.abspath(args[0]) return trace_inputs( os.path.abspath(options.log), args, diff --git a/tools/isolate/trace_inputs_test.py b/tools/isolate/trace_inputs_test.py index d1887a1..e1c391f 100755 --- a/tools/isolate/trace_inputs_test.py +++ b/tools/isolate/trace_inputs_test.py @@ -18,9 +18,15 @@ VERBOSE = False class CalledProcessError(subprocess.CalledProcessError): """Makes 2.6 version act like 2.7""" - def __init__(self, returncode, cmd, output): + def __init__(self, returncode, cmd, output, cwd): super(CalledProcessError, self).__init__(returncode, cmd) self.output = output + self.cwd = cwd + + def __str__(self): + return super(CalledProcessError, self).__str__() + ( + '\n' + 'cwd=%s\n%s') % (self.cwd, self.output) class TraceInputs(unittest.TestCase): @@ -33,19 +39,24 @@ class TraceInputs(unittest.TestCase): def _execute(self, args): cmd = [ - sys.executable, os.path.join(ROOT_DIR, 'trace_inputs.py'), - '--log', self.log, - '--gyp', os.path.join('data', 'trace_inputs'), - '--product', '.', # Not tested. - '--root-dir', ROOT_DIR, + sys.executable, os.path.join(ROOT_DIR, 'trace_inputs.py'), + '--log', self.log, + '--root-dir', ROOT_DIR, ] + args p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=ROOT_DIR) out = p.communicate()[0] if p.returncode: - raise CalledProcessError(p.returncode, cmd, out) + raise CalledProcessError(p.returncode, cmd, out, ROOT_DIR) return out + @staticmethod + def _gyp(): + return [ + '--gyp', os.path.join('data', 'trace_inputs'), + '--product', '.', # Not tested. + ] + def test_trace(self): if sys.platform == 'linux2': return self._test_trace_linux() @@ -53,8 +64,31 @@ class TraceInputs(unittest.TestCase): return self._test_trace_mac() print 'Unsupported: %s' % sys.platform + def test_trace_gyp(self): + if sys.platform == 'linux2': + return self._test_trace_gyp_linux() + if sys.platform == 'darwin': + return self._test_trace_gyp_mac() + print 'Unsupported: %s' % sys.platform + def _test_trace_linux(self): - # TODO(maruel): BUG: Note that child.py is missing. + expected_end = [ + "Interesting: 4 reduced to 3", + " data/trace_inputs/", + " trace_inputs.py", + " trace_inputs_test.py", + ] + actual = self._execute(['trace_inputs_test.py', '--child1']).splitlines() + self.assertTrue(actual[0].startswith('Tracing... [')) + self.assertTrue(actual[1].startswith('Loading traces... ')) + self.assertTrue(actual[2].startswith('Total: ')) + self.assertEquals("Non existent: 0", actual[3]) + # Ignore any Unexpected part. + # TODO(maruel): Make sure there is no Unexpected part, even in the case of + # virtualenv usage. + self.assertEquals(expected_end, actual[-len(expected_end):]) + + def _test_trace_gyp_linux(self): expected = ( "{\n" " 'variables': {\n" @@ -63,16 +97,32 @@ class TraceInputs(unittest.TestCase): " '<(DEPTH)/trace_inputs_test.py',\n" " ],\n" " 'isolate_dirs': [\n" + " './',\n" " ],\n" " },\n" "},\n") - gyp = self._execute(['trace_inputs_test.py', '--child1']) - self.assertEquals(expected, gyp) + actual = self._execute(self._gyp() + ['trace_inputs_test.py', '--child1']) + self.assertEquals(expected, actual) def _test_trace_mac(self): # It is annoying in the case of dtrace because it requires root access. # TODO(maruel): BUG: Note that child.py is missing. expected = ( + "Total: 2\n" + "Non existent: 0\n" + "Interesting: 2 reduced to 2\n" + " trace_inputs.py\n" + " trace_inputs_test.py\n") + actual = self._execute( + ['trace_inputs_test.py', '--child1']).splitlines(True) + self.assertTrue(actual[0].startswith('Tracing... [')) + self.assertTrue(actual[1].startswith('Loading traces... ')) + self.assertEquals(expected, ''.join(actual[2:])) + + def _test_trace_gyp_mac(self): + # It is annoying in the case of dtrace because it requires root access. + # TODO(maruel): BUG: Note that child.py is missing. + expected = ( "{\n" " 'variables': {\n" " 'isolate_files': [\n" @@ -83,8 +133,8 @@ class TraceInputs(unittest.TestCase): " ],\n" " },\n" "},\n") - gyp = self._execute(['trace_inputs_test.py', '--child1']) - self.assertEquals(expected, gyp) + actual = self._execute(self._gyp() + ['trace_inputs_test.py', '--child1']) + self.assertEquals(expected, actual) def child1(): |