summaryrefslogtreecommitdiffstats
path: root/tools/cygprofile/patch_orderfile.py
blob: dbb344f174a8477912331551a7996df8ce025157 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
#!/usr/bin/python
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Patch an orderfile.

Starting with a list of symbols in a binary and an orderfile (ordered list of
sections), matches the symbols in the orderfile and augments each symbol with
the symbols residing at the same address (due to having identical code).  The
output is a list of section matching rules appropriate for the linker option
-section-ordering-file.  These section matching rules include both actual
section names and names with wildcard (*) suffixes.

Note: It is possible to have.
- Several symbols mapping to the same offset in the binary.
- Several offsets for a given symbol (because we strip the ".clone." and other
  suffixes)

The general pipeline is:
1. Get the symbol infos (name, offset, size, section) from the binary
2. Get the symbol names from the orderfile
3. Find the orderfile symbol names in the symbols coming from the binary
4. For each symbol found, get all the symbols at the same address
5. Output them to an updated orderfile, with several different prefixes
   and suffixes
6. Output catch-all section matching rules for unprofiled methods.
"""

import collections
import logging
import optparse
import sys

import cyglog_to_orderfile
import cygprofile_utils
import symbol_extractor

# Prefixes for the symbols. We strip them from the incoming symbols, and add
# them back in the output file.
_PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')

# Suffixes for the symbols.  These are due to method splitting for inlining and
# method cloning for various reasons including constant propagation and
# inter-procedural optimization.
_SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.')


def RemoveSuffixes(name):
  """Strips method name suffixes from cloning and splitting.

  .clone. comes from cloning in -O3.
  .part.  comes from partial method splitting for inlining.
  .isra.  comes from inter-procedural optimizations.
  .constprop. is cloning for constant propagation.
  """
  for suffix in _SUFFIXES:
    name = name.split(suffix)[0]
  return name


def _UniqueGenerator(generator):
  """Converts a generator to skip yielding elements already seen.

  Example:
    @_UniqueGenerator
    def Foo():
      yield 1
      yield 2
      yield 1
      yield 3

    Foo() yields 1,2,3.
  """
  def _FilteringFunction(*args, **kwargs):
    returned = set()
    for item in generator(*args, **kwargs):
      if item in returned:
        continue
      returned.add(item)
      yield item

  return _FilteringFunction


def _GroupSymbolInfos(symbol_infos):
  """Groups the symbol infos by name and offset.

  Args:
    symbol_infos: an iterable of SymbolInfo

  Returns:
    The same output as _GroupSymbolInfosFromBinary.
  """
  # Map the addresses to symbols.
  offset_to_symbol_infos = collections.defaultdict(list)
  name_to_symbol_infos = collections.defaultdict(list)
  for symbol in symbol_infos:
    symbol = symbol_extractor.SymbolInfo(name=RemoveSuffixes(symbol.name),
                                         offset=symbol.offset,
                                         size=symbol.size,
                                         section=symbol.section)
    offset_to_symbol_infos[symbol.offset].append(symbol)
    name_to_symbol_infos[symbol.name].append(symbol)
  return (dict(offset_to_symbol_infos), dict(name_to_symbol_infos))


def _GroupSymbolInfosFromBinary(binary_filename):
  """Group all the symbols from a binary by name and offset.

  Args:
    binary_filename: path to the binary.

  Returns:
    A tuple of dict:
    (offset_to_symbol_infos, name_to_symbol_infos):
    - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
    - name_to_symbol_infos: {name: [symbol_info1, ...]}
  """
  symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
  return _GroupSymbolInfos(symbol_infos)


def _StripPrefix(line):
  """Strips the linker section name prefix from a symbol line.

  Args:
    line: a line from an orderfile, usually in the form:
          .text.SymbolName

  Returns:
    The symbol, SymbolName in the example above.
  """
  for prefix in _PREFIXES:
    if line.startswith(prefix):
      return line[len(prefix):]
  return line  # Unprefixed case


def _SectionNameToSymbols(section_name, section_to_symbols_map):
  """Yields all symbols which could be referred to by section_name.

  If the section name is present in the map, the names in the map are returned.
  Otherwise, any clone annotations and prefixes are stripped from the section
  name and the remainder is returned.
  """
  if (not section_name or
      section_name == '.text' or
      section_name.endswith('*')):
    return  # Don't return anything for catch-all sections
  if section_name in section_to_symbols_map:
    for symbol in section_to_symbols_map[section_name]:
      yield symbol
  else:
    name = _StripPrefix(section_name)
    if name:
      yield name


def GetSectionsFromOrderfile(filename):
  """Yields the sections from an orderfile.

  Args:
    filename: The name of the orderfile.

  Yields:
    A list of symbol names.
  """
  with open(filename, 'r') as f:
    for line in f.xreadlines():
      line = line.rstrip('\n')
      if line:
        yield line


@_UniqueGenerator
def GetSymbolsFromOrderfile(filename, section_to_symbols_map):
  """Yields the symbols from an orderfile.  Output elements do not repeat.

  Args:
    filename: The name of the orderfile.
    section_to_symbols_map: The mapping from section to symbol names.  If a
                            section name is missing from the mapping, the
                            symbol name is assumed to be the section name with
                            prefixes and suffixes stripped.

  Yields:
    A list of symbol names.
  """
  # TODO(lizeb,pasko): Move this method to symbol_extractor.py
  for section in GetSectionsFromOrderfile(filename):
    for symbol in _SectionNameToSymbols(RemoveSuffixes(section),
                                        section_to_symbols_map):
      yield symbol


def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info,
                           offset_to_symbol_info):
  """Expands a symbol to include all symbols with the same offset.

  Args:
    profiled_symbol: the string symbol name to be expanded.
    name_to_symbol_info: {name: [symbol_info1], ...}, as returned by
        GetSymbolInfosFromBinary
    offset_to_symbol_info: {offset: [symbol_info1, ...], ...}

  Returns:
    A list of symbol names, or an empty list if profiled_symbol was not in
    name_to_symbol_info.
  """
  if profiled_symbol not in name_to_symbol_info:
    return []
  symbol_infos = name_to_symbol_info[profiled_symbol]
  expanded = []
  for symbol_info in symbol_infos:
    expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset])
  return expanded


@_UniqueGenerator
def _SectionMatchingRules(section_name, name_to_symbol_infos,
                          offset_to_symbol_infos, section_to_symbols_map,
                          symbol_to_sections_map, suffixed_sections):
  """Gets the set of section matching rules for section_name.

  These rules will include section_name, but also any sections which may
  contain the same code due to cloning, splitting, or identical code folding.

  Args:
    section_name: The section to expand.
    name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
        GetSymbolInfosFromBinary.
    offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
    section_to_symbols_map: The mapping from section to symbol name.  Missing
        section names are treated as per _SectionNameToSymbols.
    symbol_to_sections_map: The mapping from symbol name to names of linker
        sections containing the symbol.  If a symbol isn't in the mapping, the
        section names are generated from the set of _PREFIXES with the symbol
        name.
    suffixed_sections: A set of sections which can have suffixes.

  Yields:
    Section names including at least section_name.
  """
  for name in _ExpandSection(section_name, name_to_symbol_infos,
                             offset_to_symbol_infos, section_to_symbols_map,
                             symbol_to_sections_map):
    yield name
    # Since only a subset of methods (mostly those compiled with O2) ever get
    # suffixes, don't emit the wildcards for ones where it won't be helpful.
    # Otherwise linking takes too long.
    if name in suffixed_sections:
      # TODO(lizeb,pasko): instead of just appending .*, append .suffix.* for
      # _SUFFIXES.  We can't do this right now because that many wildcards
      # seems to kill the linker (linking libchrome takes 3 hours).  This gets
      # almost all the benefit at a much lower link-time cost, but could cause
      # problems with unexpected suffixes.
      yield name + '.*'

def _ExpandSection(section_name, name_to_symbol_infos, offset_to_symbol_infos,
                   section_to_symbols_map, symbol_to_sections_map):
  """Yields the set of section names for section_name.

  This set will include section_name, but also any sections which may contain
  the same code due to identical code folding.

  Args:
    section_name: The section to expand.
    name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
        GetSymbolInfosFromBinary.
    offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
    section_to_symbols_map: The mapping from section to symbol name.  Missing
        section names are treated as per _SectionNameToSymbols.
    symbol_to_sections_map: The mapping from symbol name to names of linker
        sections containing the symbol.  If a symbol isn't in the mapping, the
        section names are generated from the set of _PREFIXES with the symbol
        name.

  Yields:
    Section names including at least section_name.
  """
  yield section_name
  for first_sym in _SectionNameToSymbols(section_name,
                                         section_to_symbols_map):
    for symbol in _SymbolsWithSameOffset(first_sym, name_to_symbol_infos,
                                         offset_to_symbol_infos):
      if symbol in symbol_to_sections_map:
        for section in symbol_to_sections_map[symbol]:
          yield section
      for prefix in _PREFIXES:
        yield prefix + symbol


@_UniqueGenerator
def _ExpandSections(section_names, name_to_symbol_infos,
                    offset_to_symbol_infos, section_to_symbols_map,
                    symbol_to_sections_map, suffixed_sections):
  """Gets an ordered set of section matching rules for a list of sections.

  Rules will not be repeated.

  Args:
    section_names: The sections to expand.
    name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
                          _GroupSymbolInfosFromBinary.
    offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
    section_to_symbols_map: The mapping from section to symbol names.
    symbol_to_sections_map: The mapping from symbol name to names of linker
                            sections containing the symbol.
    suffixed_sections:      A set of sections which can have suffixes.

  Yields:
    Section matching rules including at least section_names.
  """
  for profiled_section in section_names:
    for section in _SectionMatchingRules(
        profiled_section, name_to_symbol_infos, offset_to_symbol_infos,
        section_to_symbols_map, symbol_to_sections_map, suffixed_sections):
      yield section


def _CombineSectionListsByPrimaryName(symbol_to_sections_map):
  """Combines values of the symbol_to_sections_map by stripping suffixes.

  Example:
    {foo: [.text.foo, .text.bar.part.1],
     foo.constprop.4: [.text.baz.constprop.3]} ->
    {foo: [.text.foo, .text.bar, .text.baz]}

  Args:
    symbol_to_sections_map: Mapping from symbol name to list of section names

  Returns:
    The same mapping, but with symbol and section names suffix-stripped.
  """
  simplified = {}
  for suffixed_symbol, suffixed_sections in symbol_to_sections_map.iteritems():
    symbol = RemoveSuffixes(suffixed_symbol)
    sections = [RemoveSuffixes(section) for section in suffixed_sections]
    simplified.setdefault(symbol, []).extend(sections)
  return simplified


def _SectionsWithSuffixes(symbol_to_sections_map):
  """Finds sections which have suffixes applied.

  Args:
    symbol_to_sections_map: a map where the values are lists of section names.

  Returns:
    A set containing all section names which were seen with suffixes applied.
  """
  sections_with_suffixes = set()
  for suffixed_sections in symbol_to_sections_map.itervalues():
    for suffixed_section in suffixed_sections:
      section = RemoveSuffixes(suffixed_section)
      if section != suffixed_section:
        sections_with_suffixes.add(section)
  return sections_with_suffixes


def _StripSuffixes(section_list):
  """Remove all suffixes on items in a list of sections or symbols."""
  return [RemoveSuffixes(section) for section in section_list]


def main(argv):
  parser = optparse.OptionParser(usage=
      'usage: %prog [options] <unpatched_orderfile> <library>')
  parser.add_option('--target-arch', action='store', dest='arch',
                    choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
                    help='The target architecture for the library.')
  options, argv = parser.parse_args(argv)
  if not options.arch:
    options.arch = cygprofile_utils.DetectArchitecture()
  if len(argv) != 3:
    parser.print_help()
    return 1
  orderfile_filename = argv[1]
  binary_filename = argv[2]
  symbol_extractor.SetArchitecture(options.arch)
  (offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary(
      binary_filename)
  obj_dir = cygprofile_utils.GetObjDir(binary_filename)
  raw_symbol_map = cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(
      obj_dir)
  suffixed = _SectionsWithSuffixes(raw_symbol_map)
  symbol_to_sections_map = _CombineSectionListsByPrimaryName(raw_symbol_map)
  section_to_symbols_map = cygprofile_utils.InvertMapping(
      symbol_to_sections_map)
  profiled_sections = _StripSuffixes(
      GetSectionsFromOrderfile(orderfile_filename))
  expanded_sections = _ExpandSections(
      profiled_sections, name_to_symbol_infos, offset_to_symbol_infos,
      section_to_symbols_map, symbol_to_sections_map, suffixed)
  for section in expanded_sections:
    print section
  # The following is needed otherwise Gold only applies a partial sort.
  print '.text'  # gets methods not in a section, such as assembly
  for prefix in _PREFIXES:
    print prefix + '*'  # gets everything else
  return 0


if __name__ == '__main__':
  logging.basicConfig(level=logging.INFO)
  sys.exit(main(sys.argv))