diff options
author | bratell@opera.com <bratell@opera.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-05-22 18:49:40 +0000 |
---|---|---|
committer | bratell@opera.com <bratell@opera.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-05-22 18:49:40 +0000 |
commit | 17a6219729e23c3053dabebbb7d2d6cca43a135f (patch) | |
tree | b87423688b071d43c90793953d6b2bac485ae1e8 /tools/binary_size/explain_binary_size_delta.py | |
parent | 56281735c6141a07f03fff2e9be6ac0eaccdc76f (diff) | |
download | chromium_src-17a6219729e23c3053dabebbb7d2d6cca43a135f.zip chromium_src-17a6219729e23c3053dabebbb7d2d6cca43a135f.tar.gz chromium_src-17a6219729e23c3053dabebbb7d2d6cca43a135f.tar.bz2 |
Graphical version of the run_binary_size_analysis tool.
The binary_size tool suit includes tools that are useful when trying
to reduce binary size of a program, and chromium related programs
in particular.
This commit (mostly written by andrewhayden@chromium.org for
Android but ported to generic Linux by bratell@opera.com) adds
a graphical HTML based output for run_binary_size_analysis.py.
In the generated web page it is possible to dynamically and
graphically browse the binary and each part of the source tree
is given a size that reflects its contribution to the binary size.
The run_binary_size_analysis tool is run on compiled binaries
with symbols and uses nm and addr2line to map parts of the
binary to source code. Since addr2line is slow the operation to map
binary symbols to source files takes a while but the output is
well worth it when shrinking programs. See its usage information
for details about how to run it.
This commit also includes the tool explain_binary_size_delta.py
(textual output) which can be used to understand why a binary
changed size and in what way. See its usage information for
details about how to run it.
There are many further improvements possible to to do on these tools.
Search the bug database for Label:Tools-BinarySize for suggestions.
BUG=339059
R=primiano@chromium.org,andrewhayden@chromium.org
Review URL: https://codereview.chromium.org/258633003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@272255 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/binary_size/explain_binary_size_delta.py')
-rwxr-xr-x | tools/binary_size/explain_binary_size_delta.py | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py new file mode 100755 index 0000000..ba1b25c --- /dev/null +++ b/tools/binary_size/explain_binary_size_delta.py @@ -0,0 +1,405 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Describe the size difference of two binaries. + +Generates a description of the size difference of two binaries based +on the difference of the size of various symbols. + +This tool needs "nm" dumps of each binary with full symbol +information. You can obtain the necessary dumps by running the +run_binary_size_analysis.py script upon each binary, with the +"--nm-out" parameter set to the location in which you want to save the +dumps. Example: + + # obtain symbol data from first binary in /tmp/nm1.dump + cd $CHECKOUT1_SRC + ninja -C out/Release binary_size_tool + tools/binary_size/run_binary_size_analysis \ + --library <path_to_library> + --destdir /tmp/throwaway + --nm-out /tmp/nm1.dump + + # obtain symbol data from second binary in /tmp/nm2.dump + cd $CHECKOUT2_SRC + ninja -C out/Release binary_size_tool + tools/binary_size/run_binary_size_analysis \ + --library <path_to_library> + --destdir /tmp/throwaway + --nm-out /tmp/nm2.dump + + # cleanup useless files + rm -r /tmp/throwaway + + # run this tool + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump +""" + +import optparse +import os +import sys + +import binary_size_utils + + +def Compare(symbols1, symbols2): + """Executes a comparison of the symbols in symbols1 and symbols2. + + Returns: + tuple of lists: (added_symbols, removed_symbols, changed_symbols, others) + """ + added = [] # tuples + removed = [] # tuples + changed = [] # tuples + unchanged = [] # tuples + + cache1 = {} + cache2 = {} + # Make a map of (file, symbol_type) : (symbol_name, symbol_size) + for cache, symbols in ((cache1, symbols1), (cache2, symbols2)): + for symbol_name, symbol_type, symbol_size, file_path in symbols: + if 'vtable for ' in symbol_name: + symbol_type = '@' # hack to categorize these separately + if file_path: + file_path = os.path.normpath(file_path) + else: + file_path = '(No Path)' + key = (file_path, symbol_type) + bucket = cache.setdefault(key, {}) + bucket[symbol_name] = symbol_size + + # Now diff them. We iterate over the elements in cache1. For each symbol + # that we find in cache2, we record whether it was deleted, changed, or + # unchanged. We then remove it from cache2; all the symbols that remain + # in cache2 at the end of the iteration over cache1 are the 'new' symbols. + for key, bucket1 in cache1.items(): + bucket2 = cache2.get(key) + if not bucket2: + # A file was removed. Everything in bucket1 is dead. + for symbol_name, symbol_size in bucket1.items(): + removed.append((key[0], key[1], symbol_name, symbol_size, None)) + else: + # File still exists, look for changes within. + for symbol_name, symbol_size in bucket1.items(): + size2 = bucket2.get(symbol_name) + if size2 is None: + # Symbol no longer exists in bucket2. + removed.append((key[0], key[1], symbol_name, symbol_size, None)) + else: + del bucket2[symbol_name] # Symbol is not new, delete from cache2. + if len(bucket2) == 0: + del cache1[key] # Entire bucket is empty, delete from cache2 + if symbol_size != size2: + # Symbol has change size in bucket. + changed.append((key[0], key[1], symbol_name, symbol_size, size2)) + else: + # Symbol is unchanged. + unchanged.append((key[0], key[1], symbol_name, symbol_size, size2)) + + # We have now analyzed all symbols that are in cache1 and removed all of + # the encountered symbols from cache2. What's left in cache2 is the new + # symbols. + for key, bucket2 in cache2.iteritems(): + for symbol_name, symbol_size in bucket2.items(): + added.append((key[0], key[1], symbol_name, None, symbol_size)) + return (added, removed, changed, unchanged) + + +def TestCompare(): + # List entries have form: symbol_name, symbol_type, symbol_size, file_path + symbol_list1 = ( + # File with one symbol, left as-is. + ( 'unchanged', 't', 1000, '/file_unchanged' ), + # File with one symbol, changed. + ( 'changed', 't', 1000, '/file_all_changed' ), + # File with one symbol, deleted. + ( 'removed', 't', 1000, '/file_all_deleted' ), + # File with two symbols, one unchanged, one changed, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), + ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ), + # File with two symbols, one unchanged, one deleted, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), + ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ), + # File with two symbols, one unchanged, one added, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), + # File with two symbols, one unchanged, one changed, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), + ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ), + # File with two symbols, one unchanged, one deleted, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), + ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ), + # File with two symbols, one unchanged, one added, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), + # File with four symbols, one added, one removed, one changed, one unchanged + ( 'size_changed', 't', 1000, '/file_tetra' ), + ( 'removed', 't', 1000, '/file_tetra' ), + ( 'unchanged', 't', 1000, '/file_tetra' ), + ) + + symbol_list2 = ( + # File with one symbol, left as-is. + ( 'unchanged', 't', 1000, '/file_unchanged' ), + # File with one symbol, changed. + ( 'changed', 't', 2000, '/file_all_changed' ), + # File with two symbols, one unchanged, one changed, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), + ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ), + # File with two symbols, one unchanged, one deleted, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), + # File with two symbols, one unchanged, one added, same bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), + ( 'added', 't', 1000, '/file_pair_unchanged_added' ), + # File with two symbols, one unchanged, one changed, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), + ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ), + # File with two symbols, one unchanged, one deleted, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), + # File with two symbols, one unchanged, one added, different bucket + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), + ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ), + # File with four symbols, one added, one removed, one changed, one unchanged + ( 'size_changed', 't', 2000, '/file_tetra' ), + ( 'unchanged', 't', 1000, '/file_tetra' ), + ( 'added', 't', 1000, '/file_tetra' ), + # New file with one symbol added + ( 'added', 't', 1000, '/file_new' ), + ) + + # Here we go + (added, removed, changed, unchanged) = Compare(symbol_list1, symbol_list2) + + # File with one symbol, left as-is. + assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged + # File with one symbol, changed. + assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed + # File with one symbol, deleted. + assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed + # New file with one symbol added + assert ('/file_new', 't', 'added', None, 1000) in added + # File with two symbols, one unchanged, one changed, same bucket + assert ('/file_pair_unchanged_changed', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_changed', + 't', 'changed', 1000, 2000) in changed + # File with two symbols, one unchanged, one removed, same bucket + assert ('/file_pair_unchanged_removed', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_removed', + 't', 'removed', 1000, None) in removed + # File with two symbols, one unchanged, one added, same bucket + assert ('/file_pair_unchanged_added', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_added', + 't', 'added', None, 1000) in added + # File with two symbols, one unchanged, one changed, different bucket + assert ('/file_pair_unchanged_diffbuck_changed', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_diffbuck_changed', + '@', 'changed', 1000, 2000) in changed + # File with two symbols, one unchanged, one removed, different bucket + assert ('/file_pair_unchanged_diffbuck_removed', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_diffbuck_removed', + '@', 'removed', 1000, None) in removed + # File with two symbols, one unchanged, one added, different bucket + assert ('/file_pair_unchanged_diffbuck_added', + 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_pair_unchanged_diffbuck_added', + '@', 'added', None, 1000) in added + # File with four symbols, one added, one removed, one changed, one unchanged + assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed + assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged + assert ('/file_tetra', 't', 'added', None, 1000) in added + assert ('/file_tetra', 't', 'removed', 1000, None) in removed + + # Now check final stats. + CrunchStats(added, removed, changed, unchanged, True, True) + + +def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols): + """Outputs to stdout a summary of changes based on the symbol lists.""" + print 'Symbol statistics:' + sources_with_new_symbols = set() + new_symbols_size = 0 + new_symbols_by_path = {} + for file_path, symbol_type, symbol_name, size1, size2 in added: + sources_with_new_symbols.add(file_path) + new_symbols_size += size2 + bucket = new_symbols_by_path.setdefault(file_path, []) + bucket.append((symbol_name, symbol_type, None, size2)) + print(' %d added, totalling %d bytes across %d sources' % + (len(added), new_symbols_size, len(sources_with_new_symbols))) + + sources_with_removed_symbols = set() + removed_symbols_size = 0 + removed_symbols_by_path = {} + for file_path, symbol_type, symbol_name, size1, size2 in removed: + sources_with_removed_symbols.add(file_path) + removed_symbols_size += size1 + bucket = removed_symbols_by_path.setdefault(file_path, []) + bucket.append((symbol_name, symbol_type, size1, None)) + print(' %d removed, totalling %d bytes removed across %d sources' % + (len(removed), removed_symbols_size, len(sources_with_removed_symbols))) + + sources_with_changed_symbols = set() + before_size = 0 + after_size = 0 + changed_symbols_by_path = {} + for file_path, symbol_type, symbol_name, size1, size2 in changed: + sources_with_changed_symbols.add(file_path) + before_size += size1 + after_size += size2 + bucket = changed_symbols_by_path.setdefault(file_path, []) + bucket.append((symbol_name, symbol_type, size1, size2)) + print(' %d changed, resulting in a net change of %d bytes ' + '(%d bytes before, %d bytes after) across %d sources' % + (len(changed), (after_size - before_size), before_size, after_size, + len(sources_with_changed_symbols))) + + maybe_unchanged_sources = set() + unchanged_symbols_size = 0 + for file_path, symbol_type, symbol_name, size1, size2 in unchanged: + maybe_unchanged_sources.add(file_path) + unchanged_symbols_size += size1 # == size2 + print(' %d unchanged, totalling %d bytes' % + (len(unchanged), unchanged_symbols_size)) + + # High level analysis, always output. + unchanged_sources = (maybe_unchanged_sources - + sources_with_changed_symbols - + sources_with_removed_symbols - + sources_with_new_symbols) + new_sources = (sources_with_new_symbols - + maybe_unchanged_sources - + sources_with_removed_symbols) + removed_sources = (sources_with_removed_symbols - + maybe_unchanged_sources - + sources_with_new_symbols) + partially_changed_sources = (sources_with_changed_symbols | + sources_with_new_symbols | + sources_with_removed_symbols) - removed_sources - new_sources + allFiles = (sources_with_new_symbols | + sources_with_removed_symbols | + sources_with_changed_symbols | + maybe_unchanged_sources) + print 'Source stats: ' + print(' %d sources encountered.' % len(allFiles)) + print(' %d completely new.' % len(new_sources)) + print(' %d removed completely.' % len(removed_sources)) + print(' %d partially changed.' % len(partially_changed_sources)) + print(' %d completely unchanged.' % len(unchanged_sources)) + remainder = (allFiles - new_sources - removed_sources - + partially_changed_sources - unchanged_sources) + assert len(remainder) == 0 + + if not showsources: + return # Per-source analysis, only if requested + print 'Per-source Analysis:' + delta_by_path = {} + for path in new_symbols_by_path: + entry = delta_by_path.get(path) + if not entry: + entry = {'plus': 0, 'minus': 0} + delta_by_path[path] = entry + for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]: + entry['plus'] += size2 + for path in removed_symbols_by_path: + entry = delta_by_path.get(path) + if not entry: + entry = {'plus': 0, 'minus': 0} + delta_by_path[path] = entry + for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]: + entry['minus'] += size1 + for path in changed_symbols_by_path: + entry = delta_by_path.get(path) + if not entry: + entry = {'plus': 0, 'minus': 0} + delta_by_path[path] = entry + for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]: + delta = size2 - size1 + if delta > 0: + entry['plus'] += delta + else: + entry['minus'] += (-1 * delta) + + for path in sorted(delta_by_path): + print ' Source: ' + path + size_data = delta_by_path[path] + gain = size_data['plus'] + loss = size_data['minus'] + delta = size_data['plus'] - size_data['minus'] + print (' Change: %d bytes (gained %d, lost %d)' % (delta, gain, loss)) + if showsymbols: + if path in new_symbols_by_path: + print ' New symbols:' + for symbol_name, symbol_type, size1, size2 in \ + new_symbols_by_path[path]: + print (' %s type=%s, size=%d bytes' % + (symbol_name, symbol_type, size2)) + if path in removed_symbols_by_path: + print ' Removed symbols:' + for symbol_name, symbol_type, size1, size2 in \ + removed_symbols_by_path[path]: + print (' %s type=%s, size=%d bytes' % + (symbol_name, symbol_type, size1)) + if path in changed_symbols_by_path: + print ' Changed symbols:' + def sortkey(item): + symbol_name, _symbol_type, size1, size2 = item + return (size1 - size2, symbol_name) + for symbol_name, symbol_type, size1, size2 in \ + sorted(changed_symbols_by_path[path], key=sortkey): + print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)' + % (symbol_name, symbol_type, (size2 - size1), size1, size2)) + + +def main(): + usage = """%prog [options] + + Analyzes the symbolic differences between two binary files + (typically, not necessarily, two different builds of the same + library) and produces a detailed description of symbols that have + been added, removed, or whose size has changed. + + Example: + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump + + Options are available via '--help'. + """ + parser = optparse.OptionParser(usage=usage) + parser.add_option('--nm1', metavar='PATH', + help='the nm dump of the first library') + parser.add_option('--nm2', metavar='PATH', + help='the nm dump of the second library') + parser.add_option('--showsources', action='store_true', default=False, + help='show per-source statistics') + parser.add_option('--showsymbols', action='store_true', default=False, + help='show all symbol information; implies --showfiles') + parser.add_option('--verbose', action='store_true', default=False, + help='output internal debugging stuff') + parser.add_option('--selftest', action='store_true', default=False, + help='run internal diagnosis') + opts, _args = parser.parse_args() + + if opts.selftest: + TestCompare() + return + + if not opts.nm1: + parser.error('--nm1 is required') + if not opts.nm2: + parser.error('--nm2 is required') + symbols = [] + for path in [opts.nm1, opts.nm2]: + with file(path, 'r') as nm_input: + if opts.verbose: + print 'parsing ' + path + '...' + symbols.append(list(binary_size_utils.ParseNm(nm_input))) + (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1]) + CrunchStats(added, removed, changed, unchanged, + opts.showsources | opts.showsymbols, opts.showsymbols) + +if __name__ == '__main__': + sys.exit(main()) |