Add scripts for using cygprofile to repository

These scripts, which are used to create a link order file for Linux based builds (Linux, Android, ChromeOS). This commit simply moves them from a Google private web page where they have lived for the last couple of years. The scripts are: mergetraces.py - merge multiple cygprofile traces into a single trace symbolise.py - convert traces into an order file with symbols patch_orderfile.py - complete the order file with symbols from Chrome. I will seperately add a page on creating an order file to the Chromium web site. Review URL: https://chromiumcodereview.appspot.com/16151006 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@203143 0039d316-1c4b-4281-b951-d872f2087c98
author: aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-05-30 14:51:56 +0000
committer: aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-05-30 14:51:56 +0000
commit: da0ac0fd0562d882d90232e563ff3a213d56591c (patch)
tree: f44f553388626c332ccb43baf43d6d7a6c712f8f /tools/cygprofile
parent: adbed2251169cd091cb594cb75eafe7b2dc5e077 (diff)
download: chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.zip
chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.gz
chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.bz2
3 files changed, 554 insertions, 0 deletions
diff --git a/tools/cygprofile/mergetraces.py b/tools/cygprofile/mergetraces.py
new file mode 100755
index 0000000..5d85920
--- /dev/null
+++ b/tools/cygprofile/mergetraces.py
@@ -0,0 +1,186 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog
+
+""""Merge multiple logs files from different processes into a single log.
+
+Given two log files of execution traces, merge the traces into a single trace.
+Merging will use timestamps (i.e. the first two columns of logged calls) to
+create a single log that is an ordered trace of calls by both processes.
+"""
+
+import optparse
+import os
+import string
+import subprocess
+import sys
+
+def ParseLogLines(lines):
+  """Parse log file lines.
+
+  Args:
+    lines: lines from log file produced by profiled run
+
+    Below is an example of a small log file:
+    5086e000-52e92000 r-xp 00000000 b3:02 51276      libchromeview.so
+    secs       msecs      pid:threadid    func
+    START
+    1314897086 795828     3587:1074648168 0x509e105c
+    1314897086 795874     3587:1074648168 0x509e0eb4
+    1314897086 796326     3587:1074648168 0x509e0e3c
+    1314897086 796552     3587:1074648168 0x509e07bc
+    END
+
+  Returns:
+    tuple conisiting of 1) an ordered list of the logged calls, as an array of
+    fields, 2) the virtual start address of the library, used to compute the
+    offset of the symbol in the library and 3) the virtual end address
+  """
+  call_lines = []
+  vm_start = 0
+  vm_end = 0
+  dash_index = lines[0].find ('-')
+  space_index = lines[0].find (' ')
+  vm_start = int (lines[0][:dash_index], 16)
+  vm_end = int (lines[0][dash_index+1:space_index], 16)
+  for line in lines[2:]:
+    line = line.strip()
+    # print hex (vm_start)
+    fields = line.split()
+    call_lines.append (fields)
+
+  return (call_lines, vm_start, vm_end)
+
+def HasDuplicates(calls):
+  """Funcition is a sanity check to make sure that calls are only logged once.
+
+  Args:
+    calls: list of calls logged
+
+  Returns:
+    boolean indicating if calls has duplicate calls
+  """
+  seen = []
+  for call in calls:
+    if call[3] in seen:
+      return true
+    else:
+      seen.append(call[3])
+
+def CheckTimestamps(calls):
+  """Prints warning to stderr if the call timestamps are not in order.
+
+  Args:
+    calls: list of calls logged
+  """
+  index = 0
+  last_timestamp_secs = -1
+  last_timestamp_ms = -1
+  while (index < len (calls)):
+    timestamp_secs = int (calls[index][0])
+    timestamp_ms = int (calls[index][1])
+    timestamp = (timestamp_secs * 1000000) + timestamp_ms
+    last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_ms
+    if (timestamp < last_timestamp):
+      sys.stderr.write("WARNING: last_timestamp: " + str(last_timestamp_secs)
+                       + " " + str(last_timestamp_ms) + " timestamp: "
+                       + str(timestamp_secs) + " " + str(timestamp_ms) + "\n")
+    last_timestamp_secs = timestamp_secs
+    last_timestamp_ms = timestamp_ms
+    index = index + 1
+
+def Convert (call_lines, startAddr, endAddr):
+  """Converts the call addresses to static offsets and removes invalid calls.
+
+  Removes profiled calls not in shared library using start and end virtual
+  addresses, converts strings to integer values, coverts virtual addresses to
+  address in shared library.
+
+  Returns:
+     list of calls as tuples (sec, msec, pid:tid, callee)
+  """
+  converted_calls = []
+  call_addresses = []
+  for fields in call_lines:
+    secs = int (fields[0])
+    msecs = int (fields[1])
+    callee = int (fields[3], 16)
+    # print ("callee: " + hex (callee) + " start: " + hex (startAddr) + " end: "
+    #        + hex (endAddr))
+    if (callee >= startAddr and callee < endAddr
+        and (not callee in call_addresses)):
+      converted_calls.append((secs, msecs, fields[2], (callee - startAddr)))
+      call_addresses.append(callee)
+  return converted_calls
+
+def Timestamp(trace_entry):
+  return int (trace_entry[0]) * 1000000 + int(trace_entry[1])
+
+def AddTrace (tracemap, trace):
+  """Adds a trace to the tracemap.
+
+  Adds entries in the trace to the tracemap. All new calls will be added to
+  the tracemap. If the calls already exist in the tracemap then they will be
+  replaced if they happened sooner in the new trace.
+
+  Args:
+    tracemap: the tracemap
+    trace: the trace
+
+  """
+  for trace_entry in trace:
+    call = trace_entry[3]
+    if (not call in tracemap) or (
+        Timestamp(tracemap[call]) > Timestamp(trace_entry)):
+      tracemap[call] = trace_entry
+
+def main():
+  """Merge two traces for code in specified library and write to stdout.
+
+  Merges the two traces and coverts the virtual addresses to the offsets in the
+  library.  First line of merged trace has dummy virtual address of 0-ffffffff
+  so that symbolizing the addresses uses the addresses in the log, since the
+  addresses have already been converted to static offsets.
+  """
+  parser = optparse.OptionParser('usage: %prog trace1 ... traceN')
+  (_, args) = parser.parse_args()
+  if len(args) <= 1:
+    parser.error('expected at least the following args: trace1 trace2')
+
+  step = 0
+  tracemap = dict()
+  for trace_file in args:
+    step += 1
+    sys.stderr.write("    " + str(step) + "/" + str(len(args)) +
+                     ": " + trace_file + ":\n")
+
+    trace_lines = map(string.rstrip, open(trace_file).readlines())
+    (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines)
+    CheckTimestamps(trace_calls)
+    sys.stderr.write("Len: " + str(len(trace_calls)) +
+                     ". Start: " + hex(trace_start) +
+                     ", end: " + hex(trace_end) + '\n')
+
+    trace_calls = Convert(trace_calls, trace_start, trace_end)
+    sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n")
+
+    AddTrace(tracemap, trace_calls)
+    sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n")
+
+  # Extract the resulting trace from the tracemap
+  merged_trace = []
+  for call in tracemap:
+    merged_trace.append(tracemap[call])
+  merged_trace.sort(key=Timestamp)
+
+  print "0-ffffffff r-xp 00000000 xx:00 00000 ./"
+  print "secs\tmsecs\tpid:threadid\tfunc"
+  for call in merged_trace:
+    print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" +
+           hex(call[3]))
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/cygprofile/patch_orderfile.py b/tools/cygprofile/patch_orderfile.py
new file mode 100755
index 0000000..17cde76
--- /dev/null
+++ b/tools/cygprofile/patch_orderfile.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import commands
+import os
+import sys
+
+orderfile = sys.argv[1]
+uninstrumented_shlib = sys.argv[2]
+
+nmlines_uninstrumented = commands.getoutput ('nm -S -n ' +
+   uninstrumented_shlib + '  | egrep "( t )|( W )|( T )"').split('\n')
+
+nmlines = []
+for nmline in nmlines_uninstrumented:
+  if (len(nmline.split()) == 4):
+    nmlines.append(nmline)
+
+# Map addresses to list of functions at that address.  There are multiple
+# functions at an address because of aliasing.
+nm_index = 0
+uniqueAddrs = []
+addressMap = {}
+while nm_index < len(nmlines):
+  if (len(nmlines[nm_index].split()) == 4):
+    nm_int = int (nmlines[nm_index].split()[0], 16)
+    size = int (nmlines[nm_index].split()[1], 16)
+    fnames = [nmlines[nm_index].split()[3]]
+    nm_index = nm_index + 1
+    while nm_index < len(nmlines) and nm_int == int (
+        nmlines[nm_index].split()[0], 16):
+      fnames.append(nmlines[nm_index].split()[3])
+      nm_index = nm_index + 1
+    addressMap[nm_int] = fnames
+    uniqueAddrs.append((nm_int, size))
+  else:
+    nm_index = nm_index + 1
+
+def binary_search (addr, start, end):
+  # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
+  if start >= end or start == end - 1:
+    (nm_addr, size) = uniqueAddrs[start]
+    if not (addr >= nm_addr and addr < nm_addr + size):
+      sys.stderr.write ("ERROR: did not find function in binary: addr: " +
+          hex(addr) + " nm_addr: " + str(nm_addr) + " start: " + str(start) +
+          " end: " + str(end) + "\n")
+      raise Error("error")
+    return (addressMap[nm_addr], size)
+  else:
+    halfway = start + ((end - start) / 2)
+    (nm_addr, size) = uniqueAddrs[halfway]
+    # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
+    if (addr >= nm_addr and addr < nm_addr + size):
+      return (addressMap[nm_addr], size)
+    elif (addr < nm_addr):
+      return binary_search (addr, start, halfway)
+    elif (addr >= nm_addr + size):
+      return binary_search (addr, halfway, end)
+    else:
+      raise "ERROR: did not expect this case"
+
+f = open (orderfile)
+lines = f.readlines()
+profiled_list = []
+for line in lines:
+  if (line.strip() == ''):
+    continue
+  functionName = line.replace('.text.', '').split('.clone.')[0].strip()
+  profiled_list.append (functionName)
+
+# Symbol names are not unique.  Since the order file uses symbol names, the
+# patched order file pulls in all symbols with the same name.  Multiple function
+# addresses for the same function name may also be due to ".clone" symbols,
+# since the substring is stripped.
+functions = []
+functionAddressMap = {}
+for line in nmlines:
+  try:
+    functionName = line.split()[3]
+  except:
+    functionName = line.split()[2]
+  functionName = functionName.split('.clone.')[0]
+  functionAddress = int (line.split()[0].strip(), 16)
+  try:
+    functionAddressMap[functionName].append(functionAddress)
+  except:
+    functionAddressMap[functionName] = [functionAddress]
+    functions.append(functionName)
+
+sys.stderr.write ("profiled list size: " + str(len(profiled_list)) + "\n")
+addresses = []
+symbols_found = 0
+for function in profiled_list:
+   try:
+     addrs = functionAddressMap[function]
+     symbols_found = symbols_found + 1
+   except:
+     addrs = []
+     # sys.stderr.write ("WARNING: could not find symbol " + function + "\n")
+   for addr in addrs:
+     if not (addr in addresses):
+       addresses.append(addr)
+sys.stderr.write ("symbols found: " + str(symbols_found) + "\n")
+
+sys.stderr.write ("number of addresses: " + str(len(addresses)) + "\n")
+total_size = 0
+for addr in addresses:
+  # if (count % 500 == 0):
+  #    print "current count: " + str(count)
+  (functions, size) = binary_search (addr, 0, len(uniqueAddrs))
+  total_size = total_size + size
+  for function in functions:
+    print ".text." + function
+  print ""
+sys.stderr.write ("total_size: " + str(total_size) + "\n")
diff --git a/tools/cygprofile/symbolize.py b/tools/cygprofile/symbolize.py
new file mode 100755
index 0000000..694ae54
--- /dev/null
+++ b/tools/cygprofile/symbolize.py
@@ -0,0 +1,251 @@
+#!/usr/bin/python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Symbolize log file produced by cypgofile instrumentation.
+
+Given a log file and the binary being profiled (e.g. executable, shared
+library), the script can produce three different outputs: 1) symbols for the
+addresses, 2) function and line numbers for the addresses, or 3) an order file.
+"""
+
+import optparse
+import os
+import string
+import subprocess
+import sys
+
+
+def ParseLogLines(log_file_lines):
+  """Parse a log file produced by the profiled run of clank.
+
+  Args:
+    log_file_lines: array of lines in log file produced by profiled run
+    lib_name: library or executable containing symbols
+
+    Below is an example of a small log file:
+    5086e000-52e92000 r-xp 00000000 b3:02 51276      libchromeview.so
+    secs       msecs      pid:threadid    func
+    START
+    1314897086 795828     3587:1074648168 0x509e105c
+    1314897086 795874     3587:1074648168 0x509e0eb4
+    1314897086 796326     3587:1074648168 0x509e0e3c
+    1314897086 796552     3587:1074648168 0x509e07bc
+    END
+
+  Returns:
+    call_info list with list of tuples of the format (sec, msec, call id,
+    function address called)
+  """
+  call_lines = []
+  has_started = False
+  vm_start = 0
+  line = log_file_lines[0]
+  assert("r-xp" in line)
+  end_index = line.find('-')
+  vm_start = int(line[:end_index], 16)
+  for line in log_file_lines[2:]:
+  # print hex(vm_start)
+    fields = line.split()
+    if len(fields) == 4:
+      call_lines.append(fields)
+
+  # Convert strings to int in fields.
+  call_info = []
+  for call_line in call_lines:
+    (sec_timestamp, msec_timestamp) = map(int, call_line[0:2])
+    callee_id = call_line[2]
+    addr = int(call_line[3], 16)
+    if vm_start < addr:
+      addr -= vm_start
+      call_info.append((sec_timestamp, msec_timestamp, callee_id, addr))
+
+  return call_info
+
+
+def ParseLibSymbols(lib_file):
+  """Get output from running nm and greping for text symbols.
+
+  Args:
+    lib_file: the library or executable that contains the profiled code
+
+  Returns:
+    list of sorted unique addresses and corresponding size of function symbols
+    in lib_file and map of addresses to all symbols at a particular address
+  """
+  cmd = ['nm', '-S', '-n', lib_file]
+  nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  output = nm_p.communicate()[0]
+  nm_lines = output.split('\n')
+
+  nm_symbols = []
+  for nm_line in nm_lines:
+    if any(str in nm_line for str in (' t ', ' W ', ' T ')):
+      nm_symbols.append(nm_line)
+
+  nm_index = 0
+  unique_addrs = []
+  address_map = {}
+  while nm_index < len(nm_symbols):
+
+    # If the length of the split line is not 4, then it does not contain all the
+    # information needed to symbolize (i.e. address, size and symbol name).
+    if len(nm_symbols[nm_index].split()) == 4:
+      (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]]
+
+      # Multiple symbols may be at the same address.  This is do to aliasing
+      # done by the compiler.  Since there is no way to be sure which one was
+      # called in profiled run, we will symbolize to include all symbol names at
+      # a particular address.
+      fnames = []
+      while (nm_index < len(nm_symbols) and
+             addr == int(nm_symbols[nm_index].split()[0], 16)):
+        if len(nm_symbols[nm_index].split()) == 4:
+          fnames.append(nm_symbols[nm_index].split()[3])
+        nm_index += 1
+      address_map[addr] = fnames
+      unique_addrs.append((addr, size))
+    else:
+      nm_index += 1
+
+  return (unique_addrs, address_map)
+
+class SymbolNotFoundException(Exception):
+  def __init__(self,value):
+    self.value = value
+  def __str__(self):
+    return repr(self.value)
+
+def BinarySearchAddresses(addr, start, end, arr):
+  """Find starting address of a symbol at a particular address.
+
+  The reason we can not directly use the address provided by the log file is
+  that the log file may give an address after the start of the symbol.  The
+  logged address is often one byte after the start.  By using this search
+  function rather than just subtracting one from the logged address allows
+  the logging instrumentation to log any address in a function.
+
+  Args:
+    addr: the address being searched for
+    start: the starting index for the binary search
+    end: the ending index for the binary search
+    arr: the list being searched containing tuple of address and size
+
+  Returns:
+    the starting address of the symbol at address addr
+
+  Raises:
+    Exception: if address not found.  Functions expects all logged addresses
+    to be found
+  """
+  # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
+  if start >= end or start == end - 1:
+    # arr[i] is a tuple of address and size.  Check if addr inside range
+    if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]:
+      return arr[start][0]
+    elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]:
+      return arr[end][0]
+    else:
+      raise SymbolNotFoundException(addr)
+  else:
+    halfway = (start + end) / 2
+    (nm_addr, size) = arr[halfway]
+    # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
+    if addr >= nm_addr and addr < nm_addr + size:
+      return nm_addr
+    elif addr < nm_addr:
+      return BinarySearchAddresses(addr, start, halfway-1, arr)
+    else:
+      # Condition (addr >= nm_addr + size) must be true.
+      return BinarySearchAddresses(addr, halfway+1, end, arr)
+
+
+def FindFunctions(addr, unique_addrs, address_map):
+  """Find function symbol names at address addr."""
+  return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
+                                           unique_addrs)]
+
+
+def AddrToLine(addr, lib_file):
+  """Use addr2line to determine line info of a particular address."""
+  cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  output = (p.communicate()[0]).split('\n')
+  line = output[0]
+  index = 1
+  while index < len(output):
+    line = line + ':' + output[index]
+    index += 1
+  return line
+
+
+def main():
+  """Write output for profiled run to standard out.
+
+  The format of the output depends on the output type specified as the third
+  command line argument.  The default output type is to symbolize the addresses
+  of the functions called.
+  """
+  parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
+  parser.add_option('-t', '--outputType', dest='output_type',
+                    default='symbolize', type='string',
+                    help='lineize or symbolize or orderfile')
+
+  # Option for output type.  The log file and lib file arguments are required
+  # by the script and therefore are not options.
+  (options, args) = parser.parse_args()
+  if len(args) != 2:
+    parser.error('expected 2 args: log_file lib_file')
+
+  (log_file, lib_file) = args
+  output_type = options.output_type
+
+  lib_name = lib_file.split('/')[-1].strip()
+  log_file_lines = map(string.rstrip, open(log_file).readlines())
+  call_info = ParseLogLines(log_file_lines)
+  (unique_addrs, address_map) = ParseLibSymbols(lib_file)
+
+  # Check for duplicate addresses in the log file, and print a warning if
+  # duplicates are found. The instrumentation that produces the log file
+  # should only print the first time a function is entered.
+  addr_list = []
+  for call in call_info:
+    addr = call[3]
+    if addr not in addr_list:
+      addr_list.append(addr)
+    else:
+      print('WARNING: Address ' + hex(addr) + ' (line= ' +
+            AddrToLine(addr, lib_file) + ') already profiled.')
+
+  for call in call_info:
+    if output_type == 'lineize':
+      symbol = AddrToLine(call[3], lib_file)
+      print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
+            + symbol)
+    elif output_type == 'orderfile':
+      try:
+        symbols = FindFunctions(call[3], unique_addrs, address_map)
+        for symbol in symbols:
+          print '.text.' + symbol
+        print ''
+      except SymbolNotFoundException as e:
+        sys.stderr.write('WARNING: Did not find function in binary. addr: '
+                      + hex(addr) + '\n')
+    else:
+      try:
+        symbols = FindFunctions(call[3], unique_addrs, address_map)
+        print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
+              + symbols[0])
+        first_symbol = True
+        for symbol in symbols:
+          if not first_symbol:
+            print '\t\t\t\t\t' + symbol
+          else:
+            first_symbol = False
+      except SymbolNotFoundException as e:
+        sys.stderr.write('WARNING: Did not find function in binary. addr: '
+                      + hex(addr) + '\n')
+
+if __name__ == '__main__':
+  main()
author	aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-05-30 14:51:56 +0000
committer	aberent@chromium.org <aberent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-05-30 14:51:56 +0000
commit	da0ac0fd0562d882d90232e563ff3a213d56591c (patch)
tree	f44f553388626c332ccb43baf43d6d7a6c712f8f /tools/cygprofile
parent	adbed2251169cd091cb594cb75eafe7b2dc5e077 (diff)
download	chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.zip chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.gz chromium_src-da0ac0fd0562d882d90232e563ff3a213d56591c.tar.bz2