summaryrefslogtreecommitdiffstats
path: root/tools/cygprofile
diff options
context:
space:
mode:
authoraberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-05-30 14:51:56 +0000
committeraberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-05-30 14:51:56 +0000
commitda0ac0fd0562d882d90232e563ff3a213d56591c (patch)
treef44f553388626c332ccb43baf43d6d7a6c712f8f /tools/cygprofile
parentadbed2251169cd091cb594cb75eafe7b2dc5e077 (diff)
downloadchromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.zip
chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.gz
chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.bz2
Add scripts for using cygprofile to repository
These scripts, which are used to create a link order file for Linux based builds (Linux, Android, ChromeOS). This commit simply moves them from a Google private web page where they have lived for the last couple of years. The scripts are: mergetraces.py - merge multiple cygprofile traces into a single trace symbolise.py - convert traces into an order file with symbols patch_orderfile.py - complete the order file with symbols from Chrome. I will seperately add a page on creating an order file to the Chromium web site. Review URL: https://chromiumcodereview.appspot.com/16151006 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@203143 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/cygprofile')
-rwxr-xr-xtools/cygprofile/mergetraces.py186
-rwxr-xr-xtools/cygprofile/patch_orderfile.py117
-rwxr-xr-xtools/cygprofile/symbolize.py251
3 files changed, 554 insertions, 0 deletions
diff --git a/tools/cygprofile/mergetraces.py b/tools/cygprofile/mergetraces.py
new file mode 100755
index 0000000..5d85920
--- /dev/null
+++ b/tools/cygprofile/mergetraces.py
@@ -0,0 +1,186 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog
+
+""""Merge multiple logs files from different processes into a single log.
+
+Given two log files of execution traces, merge the traces into a single trace.
+Merging will use timestamps (i.e. the first two columns of logged calls) to
+create a single log that is an ordered trace of calls by both processes.
+"""
+
+import optparse
+import os
+import string
+import subprocess
+import sys
+
+def ParseLogLines(lines):
+ """Parse log file lines.
+
+ Args:
+ lines: lines from log file produced by profiled run
+
+ Below is an example of a small log file:
+ 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
+ secs msecs pid:threadid func
+ START
+ 1314897086 795828 3587:1074648168 0x509e105c
+ 1314897086 795874 3587:1074648168 0x509e0eb4
+ 1314897086 796326 3587:1074648168 0x509e0e3c
+ 1314897086 796552 3587:1074648168 0x509e07bc
+ END
+
+ Returns:
+ tuple conisiting of 1) an ordered list of the logged calls, as an array of
+ fields, 2) the virtual start address of the library, used to compute the
+ offset of the symbol in the library and 3) the virtual end address
+ """
+ call_lines = []
+ vm_start = 0
+ vm_end = 0
+ dash_index = lines[0].find ('-')
+ space_index = lines[0].find (' ')
+ vm_start = int (lines[0][:dash_index], 16)
+ vm_end = int (lines[0][dash_index+1:space_index], 16)
+ for line in lines[2:]:
+ line = line.strip()
+ # print hex (vm_start)
+ fields = line.split()
+ call_lines.append (fields)
+
+ return (call_lines, vm_start, vm_end)
+
+def HasDuplicates(calls):
+ """Funcition is a sanity check to make sure that calls are only logged once.
+
+ Args:
+ calls: list of calls logged
+
+ Returns:
+ boolean indicating if calls has duplicate calls
+ """
+ seen = []
+ for call in calls:
+ if call[3] in seen:
+ return true
+ else:
+ seen.append(call[3])
+
+def CheckTimestamps(calls):
+ """Prints warning to stderr if the call timestamps are not in order.
+
+ Args:
+ calls: list of calls logged
+ """
+ index = 0
+ last_timestamp_secs = -1
+ last_timestamp_ms = -1
+ while (index < len (calls)):
+ timestamp_secs = int (calls[index][0])
+ timestamp_ms = int (calls[index][1])
+ timestamp = (timestamp_secs * 1000000) + timestamp_ms
+ last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_ms
+ if (timestamp < last_timestamp):
+ sys.stderr.write("WARNING: last_timestamp: " + str(last_timestamp_secs)
+ + " " + str(last_timestamp_ms) + " timestamp: "
+ + str(timestamp_secs) + " " + str(timestamp_ms) + "\n")
+ last_timestamp_secs = timestamp_secs
+ last_timestamp_ms = timestamp_ms
+ index = index + 1
+
+def Convert (call_lines, startAddr, endAddr):
+ """Converts the call addresses to static offsets and removes invalid calls.
+
+ Removes profiled calls not in shared library using start and end virtual
+ addresses, converts strings to integer values, coverts virtual addresses to
+ address in shared library.
+
+ Returns:
+ list of calls as tuples (sec, msec, pid:tid, callee)
+ """
+ converted_calls = []
+ call_addresses = []
+ for fields in call_lines:
+ secs = int (fields[0])
+ msecs = int (fields[1])
+ callee = int (fields[3], 16)
+ # print ("callee: " + hex (callee) + " start: " + hex (startAddr) + " end: "
+ # + hex (endAddr))
+ if (callee >= startAddr and callee < endAddr
+ and (not callee in call_addresses)):
+ converted_calls.append((secs, msecs, fields[2], (callee - startAddr)))
+ call_addresses.append(callee)
+ return converted_calls
+
+def Timestamp(trace_entry):
+ return int (trace_entry[0]) * 1000000 + int(trace_entry[1])
+
+def AddTrace (tracemap, trace):
+ """Adds a trace to the tracemap.
+
+ Adds entries in the trace to the tracemap. All new calls will be added to
+ the tracemap. If the calls already exist in the tracemap then they will be
+ replaced if they happened sooner in the new trace.
+
+ Args:
+ tracemap: the tracemap
+ trace: the trace
+
+ """
+ for trace_entry in trace:
+ call = trace_entry[3]
+ if (not call in tracemap) or (
+ Timestamp(tracemap[call]) > Timestamp(trace_entry)):
+ tracemap[call] = trace_entry
+
+def main():
+ """Merge two traces for code in specified library and write to stdout.
+
+ Merges the two traces and coverts the virtual addresses to the offsets in the
+ library. First line of merged trace has dummy virtual address of 0-ffffffff
+ so that symbolizing the addresses uses the addresses in the log, since the
+ addresses have already been converted to static offsets.
+ """
+ parser = optparse.OptionParser('usage: %prog trace1 ... traceN')
+ (_, args) = parser.parse_args()
+ if len(args) <= 1:
+ parser.error('expected at least the following args: trace1 trace2')
+
+ step = 0
+ tracemap = dict()
+ for trace_file in args:
+ step += 1
+ sys.stderr.write(" " + str(step) + "/" + str(len(args)) +
+ ": " + trace_file + ":\n")
+
+ trace_lines = map(string.rstrip, open(trace_file).readlines())
+ (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines)
+ CheckTimestamps(trace_calls)
+ sys.stderr.write("Len: " + str(len(trace_calls)) +
+ ". Start: " + hex(trace_start) +
+ ", end: " + hex(trace_end) + '\n')
+
+ trace_calls = Convert(trace_calls, trace_start, trace_end)
+ sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n")
+
+ AddTrace(tracemap, trace_calls)
+ sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n")
+
+ # Extract the resulting trace from the tracemap
+ merged_trace = []
+ for call in tracemap:
+ merged_trace.append(tracemap[call])
+ merged_trace.sort(key=Timestamp)
+
+ print "0-ffffffff r-xp 00000000 xx:00 00000 ./"
+ print "secs\tmsecs\tpid:threadid\tfunc"
+ for call in merged_trace:
+ print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" +
+ hex(call[3]))
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/cygprofile/patch_orderfile.py b/tools/cygprofile/patch_orderfile.py
new file mode 100755
index 0000000..17cde76
--- /dev/null
+++ b/tools/cygprofile/patch_orderfile.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import commands
+import os
+import sys
+
+orderfile = sys.argv[1]
+uninstrumented_shlib = sys.argv[2]
+
+nmlines_uninstrumented = commands.getoutput ('nm -S -n ' +
+ uninstrumented_shlib + ' | egrep "( t )|( W )|( T )"').split('\n')
+
+nmlines = []
+for nmline in nmlines_uninstrumented:
+ if (len(nmline.split()) == 4):
+ nmlines.append(nmline)
+
+# Map addresses to list of functions at that address. There are multiple
+# functions at an address because of aliasing.
+nm_index = 0
+uniqueAddrs = []
+addressMap = {}
+while nm_index < len(nmlines):
+ if (len(nmlines[nm_index].split()) == 4):
+ nm_int = int (nmlines[nm_index].split()[0], 16)
+ size = int (nmlines[nm_index].split()[1], 16)
+ fnames = [nmlines[nm_index].split()[3]]
+ nm_index = nm_index + 1
+ while nm_index < len(nmlines) and nm_int == int (
+ nmlines[nm_index].split()[0], 16):
+ fnames.append(nmlines[nm_index].split()[3])
+ nm_index = nm_index + 1
+ addressMap[nm_int] = fnames
+ uniqueAddrs.append((nm_int, size))
+ else:
+ nm_index = nm_index + 1
+
+def binary_search (addr, start, end):
+ # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
+ if start >= end or start == end - 1:
+ (nm_addr, size) = uniqueAddrs[start]
+ if not (addr >= nm_addr and addr < nm_addr + size):
+ sys.stderr.write ("ERROR: did not find function in binary: addr: " +
+ hex(addr) + " nm_addr: " + str(nm_addr) + " start: " + str(start) +
+ " end: " + str(end) + "\n")
+ raise Error("error")
+ return (addressMap[nm_addr], size)
+ else:
+ halfway = start + ((end - start) / 2)
+ (nm_addr, size) = uniqueAddrs[halfway]
+ # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
+ if (addr >= nm_addr and addr < nm_addr + size):
+ return (addressMap[nm_addr], size)
+ elif (addr < nm_addr):
+ return binary_search (addr, start, halfway)
+ elif (addr >= nm_addr + size):
+ return binary_search (addr, halfway, end)
+ else:
+ raise "ERROR: did not expect this case"
+
+f = open (orderfile)
+lines = f.readlines()
+profiled_list = []
+for line in lines:
+ if (line.strip() == ''):
+ continue
+ functionName = line.replace('.text.', '').split('.clone.')[0].strip()
+ profiled_list.append (functionName)
+
+# Symbol names are not unique. Since the order file uses symbol names, the
+# patched order file pulls in all symbols with the same name. Multiple function
+# addresses for the same function name may also be due to ".clone" symbols,
+# since the substring is stripped.
+functions = []
+functionAddressMap = {}
+for line in nmlines:
+ try:
+ functionName = line.split()[3]
+ except:
+ functionName = line.split()[2]
+ functionName = functionName.split('.clone.')[0]
+ functionAddress = int (line.split()[0].strip(), 16)
+ try:
+ functionAddressMap[functionName].append(functionAddress)
+ except:
+ functionAddressMap[functionName] = [functionAddress]
+ functions.append(functionName)
+
+sys.stderr.write ("profiled list size: " + str(len(profiled_list)) + "\n")
+addresses = []
+symbols_found = 0
+for function in profiled_list:
+ try:
+ addrs = functionAddressMap[function]
+ symbols_found = symbols_found + 1
+ except:
+ addrs = []
+ # sys.stderr.write ("WARNING: could not find symbol " + function + "\n")
+ for addr in addrs:
+ if not (addr in addresses):
+ addresses.append(addr)
+sys.stderr.write ("symbols found: " + str(symbols_found) + "\n")
+
+sys.stderr.write ("number of addresses: " + str(len(addresses)) + "\n")
+total_size = 0
+for addr in addresses:
+ # if (count % 500 == 0):
+ # print "current count: " + str(count)
+ (functions, size) = binary_search (addr, 0, len(uniqueAddrs))
+ total_size = total_size + size
+ for function in functions:
+ print ".text." + function
+ print ""
+sys.stderr.write ("total_size: " + str(total_size) + "\n")
diff --git a/tools/cygprofile/symbolize.py b/tools/cygprofile/symbolize.py
new file mode 100755
index 0000000..694ae54
--- /dev/null
+++ b/tools/cygprofile/symbolize.py
@@ -0,0 +1,251 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Symbolize log file produced by cypgofile instrumentation.
+
+Given a log file and the binary being profiled (e.g. executable, shared
+library), the script can produce three different outputs: 1) symbols for the
+addresses, 2) function and line numbers for the addresses, or 3) an order file.
+"""
+
+import optparse
+import os
+import string
+import subprocess
+import sys
+
+
+def ParseLogLines(log_file_lines):
+ """Parse a log file produced by the profiled run of clank.
+
+ Args:
+ log_file_lines: array of lines in log file produced by profiled run
+ lib_name: library or executable containing symbols
+
+ Below is an example of a small log file:
+ 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
+ secs msecs pid:threadid func
+ START
+ 1314897086 795828 3587:1074648168 0x509e105c
+ 1314897086 795874 3587:1074648168 0x509e0eb4
+ 1314897086 796326 3587:1074648168 0x509e0e3c
+ 1314897086 796552 3587:1074648168 0x509e07bc
+ END
+
+ Returns:
+ call_info list with list of tuples of the format (sec, msec, call id,
+ function address called)
+ """
+ call_lines = []
+ has_started = False
+ vm_start = 0
+ line = log_file_lines[0]
+ assert("r-xp" in line)
+ end_index = line.find('-')
+ vm_start = int(line[:end_index], 16)
+ for line in log_file_lines[2:]:
+ # print hex(vm_start)
+ fields = line.split()
+ if len(fields) == 4:
+ call_lines.append(fields)
+
+ # Convert strings to int in fields.
+ call_info = []
+ for call_line in call_lines:
+ (sec_timestamp, msec_timestamp) = map(int, call_line[0:2])
+ callee_id = call_line[2]
+ addr = int(call_line[3], 16)
+ if vm_start < addr:
+ addr -= vm_start
+ call_info.append((sec_timestamp, msec_timestamp, callee_id, addr))
+
+ return call_info
+
+
+def ParseLibSymbols(lib_file):
+ """Get output from running nm and greping for text symbols.
+
+ Args:
+ lib_file: the library or executable that contains the profiled code
+
+ Returns:
+ list of sorted unique addresses and corresponding size of function symbols
+ in lib_file and map of addresses to all symbols at a particular address
+ """
+ cmd = ['nm', '-S', '-n', lib_file]
+ nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ output = nm_p.communicate()[0]
+ nm_lines = output.split('\n')
+
+ nm_symbols = []
+ for nm_line in nm_lines:
+ if any(str in nm_line for str in (' t ', ' W ', ' T ')):
+ nm_symbols.append(nm_line)
+
+ nm_index = 0
+ unique_addrs = []
+ address_map = {}
+ while nm_index < len(nm_symbols):
+
+ # If the length of the split line is not 4, then it does not contain all the
+ # information needed to symbolize (i.e. address, size and symbol name).
+ if len(nm_symbols[nm_index].split()) == 4:
+ (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]]
+
+ # Multiple symbols may be at the same address. This is do to aliasing
+ # done by the compiler. Since there is no way to be sure which one was
+ # called in profiled run, we will symbolize to include all symbol names at
+ # a particular address.
+ fnames = []
+ while (nm_index < len(nm_symbols) and
+ addr == int(nm_symbols[nm_index].split()[0], 16)):
+ if len(nm_symbols[nm_index].split()) == 4:
+ fnames.append(nm_symbols[nm_index].split()[3])
+ nm_index += 1
+ address_map[addr] = fnames
+ unique_addrs.append((addr, size))
+ else:
+ nm_index += 1
+
+ return (unique_addrs, address_map)
+
+class SymbolNotFoundException(Exception):
+ def __init__(self,value):
+ self.value = value
+ def __str__(self):
+ return repr(self.value)
+
+def BinarySearchAddresses(addr, start, end, arr):
+ """Find starting address of a symbol at a particular address.
+
+ The reason we can not directly use the address provided by the log file is
+ that the log file may give an address after the start of the symbol. The
+ logged address is often one byte after the start. By using this search
+ function rather than just subtracting one from the logged address allows
+ the logging instrumentation to log any address in a function.
+
+ Args:
+ addr: the address being searched for
+ start: the starting index for the binary search
+ end: the ending index for the binary search
+ arr: the list being searched containing tuple of address and size
+
+ Returns:
+ the starting address of the symbol at address addr
+
+ Raises:
+ Exception: if address not found. Functions expects all logged addresses
+ to be found
+ """
+ # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
+ if start >= end or start == end - 1:
+ # arr[i] is a tuple of address and size. Check if addr inside range
+ if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]:
+ return arr[start][0]
+ elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]:
+ return arr[end][0]
+ else:
+ raise SymbolNotFoundException(addr)
+ else:
+ halfway = (start + end) / 2
+ (nm_addr, size) = arr[halfway]
+ # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
+ if addr >= nm_addr and addr < nm_addr + size:
+ return nm_addr
+ elif addr < nm_addr:
+ return BinarySearchAddresses(addr, start, halfway-1, arr)
+ else:
+ # Condition (addr >= nm_addr + size) must be true.
+ return BinarySearchAddresses(addr, halfway+1, end, arr)
+
+
+def FindFunctions(addr, unique_addrs, address_map):
+ """Find function symbol names at address addr."""
+ return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
+ unique_addrs)]
+
+
+def AddrToLine(addr, lib_file):
+ """Use addr2line to determine line info of a particular address."""
+ cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ output = (p.communicate()[0]).split('\n')
+ line = output[0]
+ index = 1
+ while index < len(output):
+ line = line + ':' + output[index]
+ index += 1
+ return line
+
+
+def main():
+ """Write output for profiled run to standard out.
+
+ The format of the output depends on the output type specified as the third
+ command line argument. The default output type is to symbolize the addresses
+ of the functions called.
+ """
+ parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
+ parser.add_option('-t', '--outputType', dest='output_type',
+ default='symbolize', type='string',
+ help='lineize or symbolize or orderfile')
+
+ # Option for output type. The log file and lib file arguments are required
+ # by the script and therefore are not options.
+ (options, args) = parser.parse_args()
+ if len(args) != 2:
+ parser.error('expected 2 args: log_file lib_file')
+
+ (log_file, lib_file) = args
+ output_type = options.output_type
+
+ lib_name = lib_file.split('/')[-1].strip()
+ log_file_lines = map(string.rstrip, open(log_file).readlines())
+ call_info = ParseLogLines(log_file_lines)
+ (unique_addrs, address_map) = ParseLibSymbols(lib_file)
+
+ # Check for duplicate addresses in the log file, and print a warning if
+ # duplicates are found. The instrumentation that produces the log file
+ # should only print the first time a function is entered.
+ addr_list = []
+ for call in call_info:
+ addr = call[3]
+ if addr not in addr_list:
+ addr_list.append(addr)
+ else:
+ print('WARNING: Address ' + hex(addr) + ' (line= ' +
+ AddrToLine(addr, lib_file) + ') already profiled.')
+
+ for call in call_info:
+ if output_type == 'lineize':
+ symbol = AddrToLine(call[3], lib_file)
+ print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
+ + symbol)
+ elif output_type == 'orderfile':
+ try:
+ symbols = FindFunctions(call[3], unique_addrs, address_map)
+ for symbol in symbols:
+ print '.text.' + symbol
+ print ''
+ except SymbolNotFoundException as e:
+ sys.stderr.write('WARNING: Did not find function in binary. addr: '
+ + hex(addr) + '\n')
+ else:
+ try:
+ symbols = FindFunctions(call[3], unique_addrs, address_map)
+ print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
+ + symbols[0])
+ first_symbol = True
+ for symbol in symbols:
+ if not first_symbol:
+ print '\t\t\t\t\t' + symbol
+ else:
+ first_symbol = False
+ except SymbolNotFoundException as e:
+ sys.stderr.write('WARNING: Did not find function in binary. addr: '
+ + hex(addr) + '\n')
+
+if __name__ == '__main__':
+ main()