diff options
author | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-02 13:24:30 +0000 |
---|---|---|
committer | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-02 13:24:30 +0000 |
commit | 4e5d2bbf9770f19ecdbb3ac0f669f83849cf5091 (patch) | |
tree | 01fde9d12587b3e34ccb2c2666c177991ef4edb6 /tools/mac | |
parent | dfba8766fbe2b0d0ac7e538f18aaf6b92aa68c1e (diff) | |
download | chromium_src-4e5d2bbf9770f19ecdbb3ac0f669f83849cf5091.zip chromium_src-4e5d2bbf9770f19ecdbb3ac0f669f83849cf5091.tar.gz chromium_src-4e5d2bbf9770f19ecdbb3ac0f669f83849cf5091.tar.bz2 |
Move tools/mac/symbolicate_crash.py from src-internal to the public repo.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/7825019
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@99356 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/mac')
-rwxr-xr-x | tools/mac/symbolicate_crash.py | 504 |
1 files changed, 504 insertions, 0 deletions
diff --git a/tools/mac/symbolicate_crash.py b/tools/mac/symbolicate_crash.py new file mode 100755 index 0000000..4e84125 --- /dev/null +++ b/tools/mac/symbolicate_crash.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python2.6 +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +""" +This script can take an Apple-style CrashReporter log and symbolicate it. This +is useful for when a user's reports aren't being uploaded, for example. + +Only versions 6, 7, 8, and 9 reports are supported. For more information on the +file format, reference this document: + TN2123 <http://developer.apple.com/library/mac/#technotes/tn2004/tn2123.html> + +Information on symbolication was gleaned from: + <http://developer.apple.com/tools/xcode/symbolizingcrashdumps.html> +""" + +import optparse +import os.path +import re +import subprocess +import sys + +# Maps binary image identifiers to binary names (minus the .dSYM portion) found +# in the archive. These are the only objects that will be looked up. +SYMBOL_IMAGE_MAP = { + 'com.google.Chrome': 'Google Chrome.app', + 'com.google.Chrome.framework': 'Google Chrome Framework.framework', + 'com.google.Chrome.helper': 'Google Chrome Helper.app' +} + +class CrashReport(object): + """A parsed representation of an Apple CrashReport text file.""" + def __init__(self, file_name): + super(CrashReport, self).__init__() + self.report_info = {} + self.threads = [] + self._binary_images = {} + + fd = open(file_name, 'r') + self._ParseHeader(fd) + + # Try and get the report version. If it's not a version we handle, abort. + self.report_version = int(self.report_info['Report Version']) + # Version 6: 10.5 and 10.6 crash report + # Version 7: 10.6 spindump report + # Version 8: 10.7 spindump report + # Version 9: 10.7 crash report + valid_versions = (6, 7, 8, 9) + if self.report_version not in valid_versions: + raise Exception("Only crash reports of versions %s are accepted." % + str(valid_versions)) + + # If this is a spindump (version 7 or 8 report), use a special parser. The + # format is undocumented, but is similar to version 6. However, the spindump + # report contains user and kernel stacks for every process on the system. + if self.report_version == 7 or self.report_version == 8: + self._ParseSpindumpStack(fd) + else: + self._ParseStack(fd) + + self._ParseBinaryImages(fd) + fd.close() + + def Symbolicate(self, symbol_path): + """Symbolicates a crash report stack trace.""" + # In order to be efficient, collect all the offsets that will be passed to + # atos by the image name. + offsets_by_image = self._CollectAddressesForImages(SYMBOL_IMAGE_MAP.keys()) + + # For each image, run atos with the list of addresses. + for image_name, addresses in offsets_by_image.items(): + # If this image was not loaded or is in no stacks, skip. + if image_name not in self._binary_images or not len(addresses): + continue + + # Combine the |image_name| and |symbol_path| into the path of the dSYM. + dsym_file = self._GetDSymPath(symbol_path, image_name) + + # From the list of 2-Tuples of (frame, address), create a list of just + # addresses. + address_list = map(lambda x: x[1], addresses) + + # Look up the load address of the image. + binary_base = self._binary_images[image_name][0] + + # This returns a list of just symbols. The indices will match up with the + # list of |addresses|. + symbol_names = self._RunAtos(binary_base, dsym_file, address_list) + if not symbol_names: + print 'Error loading symbols for ' + image_name + continue + + # Attaches a list of symbol names to stack frames. This assumes that the + # order of |addresses| has stayed the same as |symbol_names|. + self._AddSymbolsToFrames(symbol_names, addresses) + + def _ParseHeader(self, fd): + """Parses the header section of a crash report, which contains the OS and + application version information.""" + # The header is made up of different sections, depending on the type of + # report and the report version. Almost all have a format of a key and + # value separated by a colon. Accumulate all of these artifacts into a + # dictionary until the first thread stack is reached. + thread_re = re.compile('^[ \t]*Thread ([a-f0-9]+)') + line = '' + while not thread_re.match(line): + # Skip blank lines. There are typically three or four sections separated + # by newlines in the header. + line = line.strip() + if line: + parts = line.split(':', 1) + # Certain lines in different report versions don't follow the key-value + # format, so skip them. + if len(parts) == 2: + # There's a varying amount of space padding after the ':' to align all + # the values; strip that. + self.report_info[parts[0]] = parts[1].lstrip() + line = fd.readline() + + # When this loop exits, the header has been read in full. However, the first + # thread stack heading has been read past. Seek backwards from the current + # position by the length of the line so that it is re-read when + # _ParseStack() is entered. + fd.seek(-len(line), os.SEEK_CUR) + + def _ParseStack(self, fd): + """Parses the stack dump of a crash report and creates a list of threads + and their stack traces.""" + # Compile a regex that matches the start of a thread stack. Note that this + # must be specific to not include the thread state section, which comes + # right after all the stack traces. + line_re = re.compile('^Thread ([0-9]+)( Crashed)?:(.*)') + + # On entry into this function, the fd has been walked up to the "Thread 0" + # line. + line = fd.readline().rstrip() + in_stack = False + thread = None + while line_re.match(line) or in_stack: + # Check for start of the thread stack. + matches = line_re.match(line) + + if not line.strip(): + # A blank line indicates a break in the thread stack. + in_stack = False + elif matches: + # If this is the start of a thread stack, create the CrashThread. + in_stack = True + thread = CrashThread(matches.group(1)) + thread.name = matches.group(3) + thread.did_crash = matches.group(2) != None + self.threads.append(thread) + else: + # All other lines are stack frames. + thread.stack.append(self._ParseStackFrame(line)) + # Read the next line. + line = fd.readline() + + def _ParseStackFrame(self, line): + """Takes in a single line of text and transforms it into a StackFrame.""" + frame = StackFrame(line) + + # A stack frame is in the format of: + # |<frame-number> <binary-image> 0x<address> <symbol> <offset>|. + regex = '^([0-9]+) +(.+)[ \t]+(0x[0-9a-f]+) (.*) \+ ([0-9]+)$' + matches = re.match(regex, line) + if matches is None: + return frame + + # Create a stack frame with the information extracted from the regex. + frame.frame_id = matches.group(1) + frame.image = matches.group(2) + frame.address = int(matches.group(3), 0) # Convert HEX to an int. + frame.original_symbol = matches.group(4) + frame.offset = matches.group(5) + frame.line = None + return frame + + def _ParseSpindumpStack(self, fd): + """Parses a spindump stack report. In this format, each thread stack has + both a user and kernel trace. Only the user traces are symbolicated.""" + + # The stack trace begins with the thread header, which is identified by a + # HEX number. The thread names appear to be incorrect in spindumps. + user_thread_re = re.compile('^ Thread ([0-9a-fx]{4})') + + # When this method is called, the fd has been walked right up to the first + # line. + line = fd.readline() + in_user_stack = False + in_kernel_stack = False + thread = None + frame_id = 0 + while user_thread_re.match(line) or in_user_stack or in_kernel_stack: + # Check for the start of a thread. + matches = user_thread_re.match(line) + + if not line.strip(): + # A blank line indicates the start of a new thread. The blank line comes + # after the kernel stack before a new thread header. + in_kernel_stack = False + elif matches: + # This is the start of a thread header. The next line is the heading for + # the user stack, followed by the actual trace. + thread = CrashThread(matches.group(1)) + frame_id = 0 + self.threads.append(thread) + in_user_stack = True + line = fd.readline() # Read past the 'User stack:' header. + elif line.startswith(' Kernel stack:'): + # The kernel stack header comes immediately after the last frame (really + # the top frame) in the user stack, without a blank line. + in_user_stack = False + in_kernel_stack = True + elif in_user_stack: + # If this is a line while in the user stack, parse it as a stack frame. + thread.stack.append(self._ParseSpindumpStackFrame(line)) + # Loop with the next line. + line = fd.readline() + + # When the loop exits, the file has been read through the 'Binary images:' + # header. Seek backwards so that _ParseBinaryImages() does the right thing. + fd.seek(-len(line), os.SEEK_CUR) + + def _ParseSpindumpStackFrame(self, line): + """Parses a spindump-style stackframe.""" + frame = StackFrame(line) + + # The format of the frame is either: + # A: |<space><steps> <symbol> + <offset> (in <image-name>) [<address>]| + # B: |<space><steps> ??? (in <image-name> + <offset>) [<address>]| + regex_a = '^([ ]+[0-9]+) (.*) \+ ([0-9]+) \(in (.*)\) \[(0x[0-9a-f]+)\]' + regex_b = '^([ ]+[0-9]+) \?\?\?( \(in (.*) \+ ([0-9]+)\))? \[(0x[0-9a-f]+)\]' + + # Create the stack frame with the information extracted from the regex. + matches = re.match(regex_a, line) + if matches: + frame.frame_id = matches.group(1)[4:] # Remove some leading spaces. + frame.original_symbol = matches.group(2) + frame.offset = matches.group(3) + frame.image = matches.group(4) + frame.address = int(matches.group(5), 0) + frame.line = None + return frame + + # If pattern A didn't match (which it will most of the time), try B. + matches = re.match(regex_b, line) + if matches: + frame.frame_id = matches.group(1)[4:] # Remove some leading spaces. + frame.image = matches.group(3) + frame.offset = matches.group(4) + frame.address = int(matches.group(5), 0) + frame.line = None + return frame + + # Otherwise, this frame could not be matched and just use the raw input. + frame.line = frame.line.strip() + return frame + + def _ParseBinaryImages(self, fd): + """Parses out the binary images section in order to get the load offset.""" + # The parser skips some sections, so advance until the "Binary Images" + # header is reached. + while not fd.readline().lstrip().startswith("Binary Images:"): pass + + # Create a regex to match the lines of format: + # |0x<start> - 0x<end> <binary-image> <version> (<version>) <<UUID>> <path>| + image_re = re.compile( + '[ ]*(0x[0-9a-f]+) -[ \t]+(0x[0-9a-f]+) [+ ]([a-zA-Z0-9._\-]+)') + + # This section is in this format: + # |<start address> - <end address> <image name>|. + while True: + line = fd.readline() + if not line.strip(): + # End when a blank line is hit. + return + # Match the line to the regex. + match = image_re.match(line) + if match: + # Store the offsets by image name so it can be referenced during + # symbolication. These are hex numbers with leading '0x', so int() can + # convert them to decimal if base=0. + address_range = (int(match.group(1), 0), int(match.group(2), 0)) + self._binary_images[match.group(3)] = address_range + + def _CollectAddressesForImages(self, images): + """Iterates all the threads and stack frames and all the stack frames that + are in a list of binary |images|. The result is a dictionary, keyed by the + image name that maps to a list of tuples. Each is a 2-Tuple of + (stack_frame, address)""" + # Create the collection and initialize it with empty lists for each image. + collection = {} + for image in images: + collection[image] = [] + + # Perform the iteration. + for thread in self.threads: + for frame in thread.stack: + image_name = self._ImageForAddress(frame.address) + if image_name in images: + # Replace the image name in the frame in case it was elided. + frame.image = image_name + collection[frame.image].append((frame, frame.address)) + + # Return the result. + return collection + + def _ImageForAddress(self, address): + """Given a PC address, returns the bundle identifier of the image in which + the address resides.""" + for image_name, address_range in self._binary_images.items(): + if address >= address_range[0] and address <= address_range[1]: + return image_name + return None + + def _GetDSymPath(self, base_path, image_name): + """Takes a base path for the symbols and an image name. It looks the name up + in SYMBOL_IMAGE_MAP and creates a full path to the dSYM in the bundle.""" + image_file = SYMBOL_IMAGE_MAP[image_name] + return os.path.join(base_path, image_file + '.dSYM', 'Contents', + 'Resources', 'DWARF', + os.path.splitext(image_file)[0]) # Chop off the extension. + + def _RunAtos(self, load_address, dsym_file, addresses): + """Runs the atos with the provided arguments. |addresses| is used as stdin. + Returns a list of symbol information in the same order as |addresses|.""" + args = ['atos', '-l', str(load_address), '-o', dsym_file] + + # Get the arch type. This is of the format |X86 (Native)|. + if 'Code Type' in self.report_info: + arch = self.report_info['Code Type'].lower().split(' ') + if len(arch) == 2: + arch = arch[0] + if arch == 'x86': + # The crash report refers to i386 as x86, but atos doesn't know what + # that is. + arch = 'i386' + args.extend(['-arch', arch]) + + proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + addresses = map(hex, addresses) + (stdout, stderr) = proc.communicate(' '.join(addresses)) + if proc.returncode: + return None + return stdout.rstrip().split('\n') + + def _AddSymbolsToFrames(self, symbols, address_tuples): + """Takes a single value (the list) from _CollectAddressesForImages and does + a smart-zip with the data returned by atos in |symbols|. Note that the + indices must match for this to succeed.""" + if len(symbols) != len(address_tuples): + print 'symbols do not match' + + # Each line of output from atos is in this format: + # |<symbol> (in <image>) (<file>:<line>)|. + line_regex = re.compile('(.+) \(in (.+)\) (\((.+):([0-9]+)\))?') + + # Zip the two data sets together. + for i in range(len(symbols)): + symbol_parts = line_regex.match(symbols[i]) + if not symbol_parts: + continue # Error. + frame = address_tuples[i][0] + frame.symbol = symbol_parts.group(1) + frame.image = symbol_parts.group(2) + frame.file_name = symbol_parts.group(4) + frame.line_number = symbol_parts.group(5) + + +class CrashThread(object): + """A CrashThread represents a stacktrace of a single thread """ + def __init__(self, thread_id): + super(CrashThread, self).__init__() + self.thread_id = thread_id + self.name = None + self.did_crash = False + self.stack = [] + + def __repr__(self): + name = '' + if self.name: + name = ': ' + self.name + return 'Thread ' + self.thread_id + name + '\n' + \ + '\n'.join(map(str, self.stack)) + + +class StackFrame(object): + """A StackFrame is owned by a CrashThread.""" + def __init__(self, line): + super(StackFrame, self).__init__() + # The original line. This will be set to None if symbolication was + # successfuly. + self.line = line + + self.frame_id = 0 + self.image = None + self.address = 0x0 + self.original_symbol = None + self.offset = 0x0 + # The following members are set after symbolication. + self.symbol = None + self.file_name = None + self.line_number = 0 + + def __repr__(self): + # If symbolication failed, just use the original line. + if self.line: + return ' %s' % self.line + + # Use different location information depending on symbolicated data. + location = None + if self.file_name: + location = ' - %s:%s' % (self.file_name, self.line_number) + else: + location = ' + %s' % self.offset + + # Same with the symbol information. + symbol = self.original_symbol + if self.symbol: + symbol = self.symbol + + return ' %s\t0x%x\t[%s\t%s]\t%s' % (self.frame_id, self.address, + self.image, location, symbol) + + +def PrettyPrintReport(report): + """Takes a crash report and prints it like the crash server would.""" + print 'Process : ' + report.report_info['Process'] + print 'Version : ' + report.report_info['Version'] + print 'Date : ' + report.report_info['Date/Time'] + print 'OS Version : ' + report.report_info['OS Version'] + print + if 'Crashed Thread' in report.report_info: + print 'Crashed Thread : ' + report.report_info['Crashed Thread'] + print + if 'Event' in report.report_info: + print 'Event : ' + report.report_info['Event'] + print + + for thread in report.threads: + print + if thread.did_crash: + exc_type = report.report_info['Exception Type'].split(' ')[0] + exc_code = report.report_info['Exception Codes'].replace('at', '@') + print '*CRASHED* ( ' + exc_type + ' / ' + exc_code + ' )' + # Version 7 reports have spindump-style output (with a stepped stack trace), + # so remove the first tab to get better alignment. + if report.report_version == 7: + for line in repr(thread).split('\n'): + print line.replace('\t', ' ', 1) + else: + print thread + + +def Main(args): + """Program main.""" + parser = optparse.OptionParser( + usage='%prog [options] symbol_path crash_report', + description='This will parse and symbolicate an Apple CrashReporter v6-9 ' + 'file.') + parser.add_option('-s', '--std-path', action='store_true', dest='std_path', + help='With this flag, the symbol_path is a containing ' + 'directory, in which a dSYM files are stored in a ' + 'directory named by the version. Example: ' + '[symbolicate_crash.py -s ./symbols/ report.crash] will ' + 'look for dSYMs in ./symbols/15.0.666.0/ if the report is ' + 'from that verison.') + (options, args) = parser.parse_args(args[1:]) + + # Check that we have something to symbolicate. + if len(args) != 2: + parser.print_usage() + return 1 + + report = CrashReport(args[1]) + symbol_path = None + + # If not using the standard layout, this is a full path to the symbols. + if not options.std_path: + symbol_path = args[0] + # Otherwise, use the report version to locate symbols in a directory. + else: + # This is in the format of |M.N.B.P (B.P)|. Get just the part before the + # space. + chrome_version = report.report_info['Version'].split(' ')[0] + symbol_path = os.path.join(args[0], chrome_version) + + # Check that the symbols exist. + if not os.path.isdir(symbol_path): + print >>sys.stderr, 'Symbol path %s is not a directory' % symbol_path + return 2 + + print >>sys.stderr, 'Using symbols from ' + symbol_path + print >>sys.stderr, '=' * 80 + + report.Symbolicate(symbol_path) + PrettyPrintReport(report) + return 0 + + +if __name__ == '__main__': + sys.exit(Main(sys.argv)) |