diff options
author | aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-05-30 14:51:56 +0000 |
---|---|---|
committer | aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-05-30 14:51:56 +0000 |
commit | da0ac0fd0562d882d90232e563ff3a213d56591c (patch) | |
tree | f44f553388626c332ccb43baf43d6d7a6c712f8f /tools/cygprofile | |
parent | adbed2251169cd091cb594cb75eafe7b2dc5e077 (diff) | |
download | chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.zip chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.gz chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.bz2 |
Add scripts for using cygprofile to repository
These scripts, which are used to create a link order file for Linux based
builds (Linux, Android, ChromeOS). This commit simply moves them from a
Google private web page where they have lived for the last couple of years.
The scripts are:
mergetraces.py - merge multiple cygprofile traces into a single trace
symbolise.py - convert traces into an order file with symbols
patch_orderfile.py - complete the order file with symbols from Chrome.
I will seperately add a page on creating an order file to the Chromium
web site.
Review URL: https://chromiumcodereview.appspot.com/16151006
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@203143 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/cygprofile')
-rwxr-xr-x | tools/cygprofile/mergetraces.py | 186 | ||||
-rwxr-xr-x | tools/cygprofile/patch_orderfile.py | 117 | ||||
-rwxr-xr-x | tools/cygprofile/symbolize.py | 251 |
3 files changed, 554 insertions, 0 deletions
diff --git a/tools/cygprofile/mergetraces.py b/tools/cygprofile/mergetraces.py new file mode 100755 index 0000000..5d85920 --- /dev/null +++ b/tools/cygprofile/mergetraces.py @@ -0,0 +1,186 @@ +#!/usr/bin/python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog + +""""Merge multiple logs files from different processes into a single log. + +Given two log files of execution traces, merge the traces into a single trace. +Merging will use timestamps (i.e. the first two columns of logged calls) to +create a single log that is an ordered trace of calls by both processes. +""" + +import optparse +import os +import string +import subprocess +import sys + +def ParseLogLines(lines): + """Parse log file lines. + + Args: + lines: lines from log file produced by profiled run + + Below is an example of a small log file: + 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so + secs msecs pid:threadid func + START + 1314897086 795828 3587:1074648168 0x509e105c + 1314897086 795874 3587:1074648168 0x509e0eb4 + 1314897086 796326 3587:1074648168 0x509e0e3c + 1314897086 796552 3587:1074648168 0x509e07bc + END + + Returns: + tuple conisiting of 1) an ordered list of the logged calls, as an array of + fields, 2) the virtual start address of the library, used to compute the + offset of the symbol in the library and 3) the virtual end address + """ + call_lines = [] + vm_start = 0 + vm_end = 0 + dash_index = lines[0].find ('-') + space_index = lines[0].find (' ') + vm_start = int (lines[0][:dash_index], 16) + vm_end = int (lines[0][dash_index+1:space_index], 16) + for line in lines[2:]: + line = line.strip() + # print hex (vm_start) + fields = line.split() + call_lines.append (fields) + + return (call_lines, vm_start, vm_end) + +def HasDuplicates(calls): + """Funcition is a sanity check to make sure that calls are only logged once. + + Args: + calls: list of calls logged + + Returns: + boolean indicating if calls has duplicate calls + """ + seen = [] + for call in calls: + if call[3] in seen: + return true + else: + seen.append(call[3]) + +def CheckTimestamps(calls): + """Prints warning to stderr if the call timestamps are not in order. + + Args: + calls: list of calls logged + """ + index = 0 + last_timestamp_secs = -1 + last_timestamp_ms = -1 + while (index < len (calls)): + timestamp_secs = int (calls[index][0]) + timestamp_ms = int (calls[index][1]) + timestamp = (timestamp_secs * 1000000) + timestamp_ms + last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_ms + if (timestamp < last_timestamp): + sys.stderr.write("WARNING: last_timestamp: " + str(last_timestamp_secs) + + " " + str(last_timestamp_ms) + " timestamp: " + + str(timestamp_secs) + " " + str(timestamp_ms) + "\n") + last_timestamp_secs = timestamp_secs + last_timestamp_ms = timestamp_ms + index = index + 1 + +def Convert (call_lines, startAddr, endAddr): + """Converts the call addresses to static offsets and removes invalid calls. + + Removes profiled calls not in shared library using start and end virtual + addresses, converts strings to integer values, coverts virtual addresses to + address in shared library. + + Returns: + list of calls as tuples (sec, msec, pid:tid, callee) + """ + converted_calls = [] + call_addresses = [] + for fields in call_lines: + secs = int (fields[0]) + msecs = int (fields[1]) + callee = int (fields[3], 16) + # print ("callee: " + hex (callee) + " start: " + hex (startAddr) + " end: " + # + hex (endAddr)) + if (callee >= startAddr and callee < endAddr + and (not callee in call_addresses)): + converted_calls.append((secs, msecs, fields[2], (callee - startAddr))) + call_addresses.append(callee) + return converted_calls + +def Timestamp(trace_entry): + return int (trace_entry[0]) * 1000000 + int(trace_entry[1]) + +def AddTrace (tracemap, trace): + """Adds a trace to the tracemap. + + Adds entries in the trace to the tracemap. All new calls will be added to + the tracemap. If the calls already exist in the tracemap then they will be + replaced if they happened sooner in the new trace. + + Args: + tracemap: the tracemap + trace: the trace + + """ + for trace_entry in trace: + call = trace_entry[3] + if (not call in tracemap) or ( + Timestamp(tracemap[call]) > Timestamp(trace_entry)): + tracemap[call] = trace_entry + +def main(): + """Merge two traces for code in specified library and write to stdout. + + Merges the two traces and coverts the virtual addresses to the offsets in the + library. First line of merged trace has dummy virtual address of 0-ffffffff + so that symbolizing the addresses uses the addresses in the log, since the + addresses have already been converted to static offsets. + """ + parser = optparse.OptionParser('usage: %prog trace1 ... traceN') + (_, args) = parser.parse_args() + if len(args) <= 1: + parser.error('expected at least the following args: trace1 trace2') + + step = 0 + tracemap = dict() + for trace_file in args: + step += 1 + sys.stderr.write(" " + str(step) + "/" + str(len(args)) + + ": " + trace_file + ":\n") + + trace_lines = map(string.rstrip, open(trace_file).readlines()) + (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines) + CheckTimestamps(trace_calls) + sys.stderr.write("Len: " + str(len(trace_calls)) + + ". Start: " + hex(trace_start) + + ", end: " + hex(trace_end) + '\n') + + trace_calls = Convert(trace_calls, trace_start, trace_end) + sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n") + + AddTrace(tracemap, trace_calls) + sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n") + + # Extract the resulting trace from the tracemap + merged_trace = [] + for call in tracemap: + merged_trace.append(tracemap[call]) + merged_trace.sort(key=Timestamp) + + print "0-ffffffff r-xp 00000000 xx:00 00000 ./" + print "secs\tmsecs\tpid:threadid\tfunc" + for call in merged_trace: + print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" + + hex(call[3])) + +if __name__ == '__main__': + main() diff --git a/tools/cygprofile/patch_orderfile.py b/tools/cygprofile/patch_orderfile.py new file mode 100755 index 0000000..17cde76 --- /dev/null +++ b/tools/cygprofile/patch_orderfile.py @@ -0,0 +1,117 @@ +#!/usr/bin/python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import commands +import os +import sys + +orderfile = sys.argv[1] +uninstrumented_shlib = sys.argv[2] + +nmlines_uninstrumented = commands.getoutput ('nm -S -n ' + + uninstrumented_shlib + ' | egrep "( t )|( W )|( T )"').split('\n') + +nmlines = [] +for nmline in nmlines_uninstrumented: + if (len(nmline.split()) == 4): + nmlines.append(nmline) + +# Map addresses to list of functions at that address. There are multiple +# functions at an address because of aliasing. +nm_index = 0 +uniqueAddrs = [] +addressMap = {} +while nm_index < len(nmlines): + if (len(nmlines[nm_index].split()) == 4): + nm_int = int (nmlines[nm_index].split()[0], 16) + size = int (nmlines[nm_index].split()[1], 16) + fnames = [nmlines[nm_index].split()[3]] + nm_index = nm_index + 1 + while nm_index < len(nmlines) and nm_int == int ( + nmlines[nm_index].split()[0], 16): + fnames.append(nmlines[nm_index].split()[3]) + nm_index = nm_index + 1 + addressMap[nm_int] = fnames + uniqueAddrs.append((nm_int, size)) + else: + nm_index = nm_index + 1 + +def binary_search (addr, start, end): + # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end) + if start >= end or start == end - 1: + (nm_addr, size) = uniqueAddrs[start] + if not (addr >= nm_addr and addr < nm_addr + size): + sys.stderr.write ("ERROR: did not find function in binary: addr: " + + hex(addr) + " nm_addr: " + str(nm_addr) + " start: " + str(start) + + " end: " + str(end) + "\n") + raise Error("error") + return (addressMap[nm_addr], size) + else: + halfway = start + ((end - start) / 2) + (nm_addr, size) = uniqueAddrs[halfway] + # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway) + if (addr >= nm_addr and addr < nm_addr + size): + return (addressMap[nm_addr], size) + elif (addr < nm_addr): + return binary_search (addr, start, halfway) + elif (addr >= nm_addr + size): + return binary_search (addr, halfway, end) + else: + raise "ERROR: did not expect this case" + +f = open (orderfile) +lines = f.readlines() +profiled_list = [] +for line in lines: + if (line.strip() == ''): + continue + functionName = line.replace('.text.', '').split('.clone.')[0].strip() + profiled_list.append (functionName) + +# Symbol names are not unique. Since the order file uses symbol names, the +# patched order file pulls in all symbols with the same name. Multiple function +# addresses for the same function name may also be due to ".clone" symbols, +# since the substring is stripped. +functions = [] +functionAddressMap = {} +for line in nmlines: + try: + functionName = line.split()[3] + except: + functionName = line.split()[2] + functionName = functionName.split('.clone.')[0] + functionAddress = int (line.split()[0].strip(), 16) + try: + functionAddressMap[functionName].append(functionAddress) + except: + functionAddressMap[functionName] = [functionAddress] + functions.append(functionName) + +sys.stderr.write ("profiled list size: " + str(len(profiled_list)) + "\n") +addresses = [] +symbols_found = 0 +for function in profiled_list: + try: + addrs = functionAddressMap[function] + symbols_found = symbols_found + 1 + except: + addrs = [] + # sys.stderr.write ("WARNING: could not find symbol " + function + "\n") + for addr in addrs: + if not (addr in addresses): + addresses.append(addr) +sys.stderr.write ("symbols found: " + str(symbols_found) + "\n") + +sys.stderr.write ("number of addresses: " + str(len(addresses)) + "\n") +total_size = 0 +for addr in addresses: + # if (count % 500 == 0): + # print "current count: " + str(count) + (functions, size) = binary_search (addr, 0, len(uniqueAddrs)) + total_size = total_size + size + for function in functions: + print ".text." + function + print "" +sys.stderr.write ("total_size: " + str(total_size) + "\n") diff --git a/tools/cygprofile/symbolize.py b/tools/cygprofile/symbolize.py new file mode 100755 index 0000000..694ae54 --- /dev/null +++ b/tools/cygprofile/symbolize.py @@ -0,0 +1,251 @@ +#!/usr/bin/python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Symbolize log file produced by cypgofile instrumentation. + +Given a log file and the binary being profiled (e.g. executable, shared +library), the script can produce three different outputs: 1) symbols for the +addresses, 2) function and line numbers for the addresses, or 3) an order file. +""" + +import optparse +import os +import string +import subprocess +import sys + + +def ParseLogLines(log_file_lines): + """Parse a log file produced by the profiled run of clank. + + Args: + log_file_lines: array of lines in log file produced by profiled run + lib_name: library or executable containing symbols + + Below is an example of a small log file: + 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so + secs msecs pid:threadid func + START + 1314897086 795828 3587:1074648168 0x509e105c + 1314897086 795874 3587:1074648168 0x509e0eb4 + 1314897086 796326 3587:1074648168 0x509e0e3c + 1314897086 796552 3587:1074648168 0x509e07bc + END + + Returns: + call_info list with list of tuples of the format (sec, msec, call id, + function address called) + """ + call_lines = [] + has_started = False + vm_start = 0 + line = log_file_lines[0] + assert("r-xp" in line) + end_index = line.find('-') + vm_start = int(line[:end_index], 16) + for line in log_file_lines[2:]: + # print hex(vm_start) + fields = line.split() + if len(fields) == 4: + call_lines.append(fields) + + # Convert strings to int in fields. + call_info = [] + for call_line in call_lines: + (sec_timestamp, msec_timestamp) = map(int, call_line[0:2]) + callee_id = call_line[2] + addr = int(call_line[3], 16) + if vm_start < addr: + addr -= vm_start + call_info.append((sec_timestamp, msec_timestamp, callee_id, addr)) + + return call_info + + +def ParseLibSymbols(lib_file): + """Get output from running nm and greping for text symbols. + + Args: + lib_file: the library or executable that contains the profiled code + + Returns: + list of sorted unique addresses and corresponding size of function symbols + in lib_file and map of addresses to all symbols at a particular address + """ + cmd = ['nm', '-S', '-n', lib_file] + nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output = nm_p.communicate()[0] + nm_lines = output.split('\n') + + nm_symbols = [] + for nm_line in nm_lines: + if any(str in nm_line for str in (' t ', ' W ', ' T ')): + nm_symbols.append(nm_line) + + nm_index = 0 + unique_addrs = [] + address_map = {} + while nm_index < len(nm_symbols): + + # If the length of the split line is not 4, then it does not contain all the + # information needed to symbolize (i.e. address, size and symbol name). + if len(nm_symbols[nm_index].split()) == 4: + (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]] + + # Multiple symbols may be at the same address. This is do to aliasing + # done by the compiler. Since there is no way to be sure which one was + # called in profiled run, we will symbolize to include all symbol names at + # a particular address. + fnames = [] + while (nm_index < len(nm_symbols) and + addr == int(nm_symbols[nm_index].split()[0], 16)): + if len(nm_symbols[nm_index].split()) == 4: + fnames.append(nm_symbols[nm_index].split()[3]) + nm_index += 1 + address_map[addr] = fnames + unique_addrs.append((addr, size)) + else: + nm_index += 1 + + return (unique_addrs, address_map) + +class SymbolNotFoundException(Exception): + def __init__(self,value): + self.value = value + def __str__(self): + return repr(self.value) + +def BinarySearchAddresses(addr, start, end, arr): + """Find starting address of a symbol at a particular address. + + The reason we can not directly use the address provided by the log file is + that the log file may give an address after the start of the symbol. The + logged address is often one byte after the start. By using this search + function rather than just subtracting one from the logged address allows + the logging instrumentation to log any address in a function. + + Args: + addr: the address being searched for + start: the starting index for the binary search + end: the ending index for the binary search + arr: the list being searched containing tuple of address and size + + Returns: + the starting address of the symbol at address addr + + Raises: + Exception: if address not found. Functions expects all logged addresses + to be found + """ + # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end) + if start >= end or start == end - 1: + # arr[i] is a tuple of address and size. Check if addr inside range + if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]: + return arr[start][0] + elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]: + return arr[end][0] + else: + raise SymbolNotFoundException(addr) + else: + halfway = (start + end) / 2 + (nm_addr, size) = arr[halfway] + # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway) + if addr >= nm_addr and addr < nm_addr + size: + return nm_addr + elif addr < nm_addr: + return BinarySearchAddresses(addr, start, halfway-1, arr) + else: + # Condition (addr >= nm_addr + size) must be true. + return BinarySearchAddresses(addr, halfway+1, end, arr) + + +def FindFunctions(addr, unique_addrs, address_map): + """Find function symbol names at address addr.""" + return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, + unique_addrs)] + + +def AddrToLine(addr, lib_file): + """Use addr2line to determine line info of a particular address.""" + cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output = (p.communicate()[0]).split('\n') + line = output[0] + index = 1 + while index < len(output): + line = line + ':' + output[index] + index += 1 + return line + + +def main(): + """Write output for profiled run to standard out. + + The format of the output depends on the output type specified as the third + command line argument. The default output type is to symbolize the addresses + of the functions called. + """ + parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') + parser.add_option('-t', '--outputType', dest='output_type', + default='symbolize', type='string', + help='lineize or symbolize or orderfile') + + # Option for output type. The log file and lib file arguments are required + # by the script and therefore are not options. + (options, args) = parser.parse_args() + if len(args) != 2: + parser.error('expected 2 args: log_file lib_file') + + (log_file, lib_file) = args + output_type = options.output_type + + lib_name = lib_file.split('/')[-1].strip() + log_file_lines = map(string.rstrip, open(log_file).readlines()) + call_info = ParseLogLines(log_file_lines) + (unique_addrs, address_map) = ParseLibSymbols(lib_file) + + # Check for duplicate addresses in the log file, and print a warning if + # duplicates are found. The instrumentation that produces the log file + # should only print the first time a function is entered. + addr_list = [] + for call in call_info: + addr = call[3] + if addr not in addr_list: + addr_list.append(addr) + else: + print('WARNING: Address ' + hex(addr) + ' (line= ' + + AddrToLine(addr, lib_file) + ') already profiled.') + + for call in call_info: + if output_type == 'lineize': + symbol = AddrToLine(call[3], lib_file) + print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' + + symbol) + elif output_type == 'orderfile': + try: + symbols = FindFunctions(call[3], unique_addrs, address_map) + for symbol in symbols: + print '.text.' + symbol + print '' + except SymbolNotFoundException as e: + sys.stderr.write('WARNING: Did not find function in binary. addr: ' + + hex(addr) + '\n') + else: + try: + symbols = FindFunctions(call[3], unique_addrs, address_map) + print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' + + symbols[0]) + first_symbol = True + for symbol in symbols: + if not first_symbol: + print '\t\t\t\t\t' + symbol + else: + first_symbol = False + except SymbolNotFoundException as e: + sys.stderr.write('WARNING: Did not find function in binary. addr: ' + + hex(addr) + '\n') + +if __name__ == '__main__': + main() |