tools/clang/scripts/run_tool.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289

#!/usr/bin/env python
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Wrapper script to help run clang tools across Chromium code.

How to use this tool:
If you want to run the tool across all Chromium code:
run_tool.py <tool> <path/to/compiledb>

If you only want to run the tool across just chrome/browser and content/browser:
run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser

Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
information, which documents the entire automated refactoring flow in Chromium.

Why use this tool:
The clang tool implementation doesn't take advantage of multiple cores, and if
it fails mysteriously in the middle, all the generated replacements will be
lost.

Unfortunately, if the work is simply sharded across multiple cores by running
multiple RefactoringTools, problems arise when they attempt to rewrite a file at
the same time. To work around that, clang tools that are run using this tool
should output edits to stdout in the following format:

==== BEGIN EDITS ====
r:<file path>:<offset>:<length>:<replacement text>
r:<file path>:<offset>:<length>:<replacement text>
...etc...
==== END EDITS ====

Any generated edits are applied once the clang tool has finished running
across Chromium, regardless of whether some instances failed or not.
"""

import collections
import functools
import multiprocessing
import os.path
import subprocess
import sys


Edit = collections.namedtuple(
    'Edit', ('edit_type', 'offset', 'length', 'replacement'))


def _GetFilesFromGit(paths = None):
  """Gets the list of files in the git repository.

  Args:
    paths: Prefix filter for the returned paths. May contain multiple entries.
  """
  args = ['git', 'ls-files']
  if paths:
    args.extend(paths)
  command = subprocess.Popen(args, stdout=subprocess.PIPE)
  output, _ = command.communicate()
  return output.splitlines()


def _ExtractEditsFromStdout(build_directory, stdout):
  """Extracts generated list of edits from the tool's stdout.

  The expected format is documented at the top of this file.

  Args:
    build_directory: Directory that contains the compile database. Used to
      normalize the filenames.
    stdout: The stdout from running the clang tool.

  Returns:
    A dictionary mapping filenames to the associated edits.
  """
  lines = stdout.splitlines()
  start_index = lines.index('==== BEGIN EDITS ====')
  end_index = lines.index('==== END EDITS ====')
  edits = collections.defaultdict(list)
  for line in lines[start_index + 1:end_index]:
    try:
      edit_type, path, offset, length, replacement = line.split(':', 4)
      # Normalize the file path emitted by the clang tool to be relative to the
      # current working directory.
      path = os.path.relpath(os.path.join(build_directory, path))
      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
    except ValueError:
      print 'Unable to parse edit: %s' % line
  return edits


def _ExecuteTool(toolname, build_directory, filename):
  """Executes the tool.

  This is defined outside the class so it can be pickled for the multiprocessing
  module.

  Args:
    toolname: Path to the tool to execute.
    build_directory: Directory that contains the compile database.
    filename: The file to run the tool over.

  Returns:
    A dictionary that must contain the key "status" and a boolean value
    associated with it.

    If status is True, then the generated edits are stored with the key "edits"
    in the dictionary.

    Otherwise, the filename and the output from stderr are associated with the
    keys "filename" and "stderr" respectively.
  """
  command = subprocess.Popen((toolname, '-p', build_directory, filename),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
  stdout, stderr = command.communicate()
  if command.returncode != 0:
    return {'status': False, 'filename': filename, 'stderr': stderr}
  else:
    return {'status': True,
            'edits': _ExtractEditsFromStdout(build_directory, stdout)}


class _CompilerDispatcher(object):
  """Multiprocessing controller for running clang tools in parallel."""

  def __init__(self, toolname, build_directory, filenames):
    """Initializer method.

    Args:
      toolname: Path to the tool to execute.
      build_directory: Directory that contains the compile database.
      filenames: The files to run the tool over.
    """
    self.__toolname = toolname
    self.__build_directory = build_directory
    self.__filenames = filenames
    self.__success_count = 0
    self.__failed_count = 0
    self.__edits = collections.defaultdict(list)

  @property
  def edits(self):
    return self.__edits

  @property
  def failed_count(self):
    return self.__failed_count

  def Run(self):
    """Does the grunt work."""
    pool = multiprocessing.Pool()
    result_iterator = pool.imap_unordered(
        functools.partial(_ExecuteTool, self.__toolname,
                          self.__build_directory),
        self.__filenames)
    for result in result_iterator:
      self.__ProcessResult(result)
    sys.stdout.write('\n')
    sys.stdout.flush()

  def __ProcessResult(self, result):
    """Handles result processing.

    Args:
      result: The result dictionary returned by _ExecuteTool.
    """
    if result['status']:
      self.__success_count += 1
      for k, v in result['edits'].iteritems():
        self.__edits[k].extend(v)
    else:
      self.__failed_count += 1
      sys.stdout.write('\nFailed to process %s\n' % result['filename'])
      sys.stdout.write(result['stderr'])
      sys.stdout.write('\n')
    percentage = (
        float(self.__success_count + self.__failed_count) /
        len(self.__filenames)) * 100
    sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
        self.__success_count, self.__failed_count, percentage))
    sys.stdout.flush()


def _ApplyEdits(edits):
  """Apply the generated edits.

  Args:
    edits: A dict mapping filenames to Edit instances that apply to that file.
  """
  edit_count = 0
  for k, v in edits.iteritems():
    # Sort the edits and iterate through them in reverse order. Sorting allows
    # duplicate edits to be quickly skipped, while reversing means that
    # subsequent edits don't need to have their offsets updated with each edit
    # applied.
    v.sort()
    last_edit = None
    with open(k, 'rb+') as f:
      contents = bytearray(f.read())
      for edit in reversed(v):
        if edit == last_edit:
          continue
        last_edit = edit
        contents[edit.offset:edit.offset + edit.length] = edit.replacement
        if not edit.replacement:
          _ExtendDeletionIfElementIsInList(contents, edit.offset)
        edit_count += 1
      f.seek(0)
      f.truncate()
      f.write(contents)
  print 'Applied %d edits to %d files' % (edit_count, len(edits))


_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))


def _ExtendDeletionIfElementIsInList(contents, offset):
  """Extends the range of a deletion if the deleted element was part of a list.

  This rewriter helper makes it easy for refactoring tools to remove elements
  from a list. Even if a matcher callback knows that it is removing an element
  from a list, it may not have enough information to accurately remove the list
  element; for example, another matcher callback may end up removing an adjacent
  list element, or all the list elements may end up being removed.

  With this helper, refactoring tools can simply remove the list element and not
  worry about having to include the comma in the replacement.

  Args:
    contents: A bytearray with the deletion already applied.
    offset: The offset in the bytearray where the deleted range used to be.
  """
  char_before = char_after = None
  left_trim_count = 0
  for byte in reversed(contents[:offset]):
    left_trim_count += 1
    if byte in _WHITESPACE_BYTES:
      continue
    if byte in (ord(','), ord(':'), ord('('), ord('{')):
      char_before = chr(byte)
    break

  right_trim_count = 0
  for byte in contents[offset:]:
    right_trim_count += 1
    if byte in _WHITESPACE_BYTES:
      continue
    if byte == ord(','):
      char_after = chr(byte)
    break

  if char_before:
    if char_after:
      del contents[offset:offset + right_trim_count]
    elif char_before in (',', ':'):
      del contents[offset - left_trim_count:offset]


def main(argv):
  if len(argv) < 2:
    print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
    print '  <clang tool> is the clang tool that should be run.'
    print '  <compile db> is the directory that contains the compile database'
    print '  <path 1> <path2> ... can be used to filter what files are edited'
    return 1

  filenames = frozenset(_GetFilesFromGit(argv[2:]))
  # Filter out files that aren't C/C++/Obj-C/Obj-C++.
  extensions = frozenset(('.c', '.cc', '.m', '.mm'))
  dispatcher = _CompilerDispatcher(argv[0], argv[1],
                                   [f for f in filenames
                                    if os.path.splitext(f)[1] in extensions])
  dispatcher.Run()
  # Filter out edits to files that aren't in the git repository, since it's not
  # useful to modify files that aren't under source control--typically, these
  # are generated files or files in a git submodule that's not part of Chromium.
  _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
                    if k in filenames})
  # TODO(dcheng): Consider clang-formatting the result to avoid egregious style
  # violations.
  if dispatcher.failed_count != 0:
    return 2
  return 0


if __name__ == '__main__':
  sys.exit(main(sys.argv[1:]))