From de7feec08a3e4723510a3b5b87cca6ff95a8ef11 Mon Sep 17 00:00:00 2001 From: "jeun@chromium.org" Date: Fri, 15 Aug 2014 18:36:25 +0000 Subject: [Findit] Plain objects to represent the returned result from running the algorithm, NOTRY=true Review URL: https://codereview.chromium.org/421223003 Cr-Commit-Position: refs/heads/master@{#289949} git-svn-id: svn://svn.chromium.org/chrome/trunk/src@289949 0039d316-1c4b-4281-b951-d872f2087c98 --- tools/findit/blame.py | 162 ++++++++++++++++++++++++++++++++++++++++++++++ tools/findit/match_set.py | 128 ++++++++++++++++++++++++++++++++++++ tools/findit/result.py | 17 +++++ 3 files changed, 307 insertions(+) create mode 100644 tools/findit/blame.py create mode 100644 tools/findit/match_set.py create mode 100644 tools/findit/result.py (limited to 'tools/findit') diff --git a/tools/findit/blame.py b/tools/findit/blame.py new file mode 100644 index 0000000..7546262 --- /dev/null +++ b/tools/findit/blame.py @@ -0,0 +1,162 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from threading import Lock, Thread + +import utils + + +class Blame(object): + """Represents a blame object. + + The object contains blame information for one line of stack, and this + information is shown when there are no CLs that change the crashing files. + Attributes: + line_content: The content of the line to find the blame for. + component_name: The name of the component for this line. + stack_frame_index: The stack frame index of this file. + file_name: The name of the file. + line_number: The line that caused a crash. + author: The author of this line on the latest revision. + crash_revision: The revision that caused the crash. + revision: The latest revision of this line before the crash revision. + url: The url of the change for the revision. + range_start: The starting range of the regression for this component. + range_end: The ending range of the regression. + + """ + + def __init__(self, line_content, component_name, stack_frame_index, + file_name, line_number, author, revision, + url, range_start, range_end): + # Set all the variables from the arguments. + self.line_content = line_content + self.component_name = component_name + self.stack_frame_index = stack_frame_index + self.file = file_name + self.line_number = line_number + self.author = author + self.revision = revision + self.url = url + self.range_start = range_start + self.range_end = range_end + + +class BlameList(object): + """Represents a list of blame objects. + + Thread-safe. + """ + + def __init__(self): + self.blame_list = [] + self.blame_list_lock = Lock() + + def __getitem__(self, index): + return self.blame_list[index] + + def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers, + top_n_frames=10): + """Given a stack within a stacktrace, retrieves blame information. + + Only either first 'top_n_frames' or the length of stack, whichever is + shorter, results are returned. The default value of 'top_n_frames' is 10. + + Args: + callstack: The list of stack frames. + crash_revision_dict: A dictionary that maps component to its crash + revision. + regression_dict: A dictionary that maps component to its revision + range. + parsers: A list of two parsers, svn_parser and git_parser + top_n_frames: A number of stack frames to show the blame result for. + """ + # Only return blame information for first 'top_n_frames' frames. + stack_frames = callstack.GetTopNFrames(top_n_frames) + + threads = [] + # Iterate through frames in stack. + for stack_frame in stack_frames: + # If the component this line is from does not have a crash revision, + # it is not possible to get blame information, so ignore this line. + component_path = stack_frame.component_path + if component_path not in crash_revision_dict: + continue + + crash_revision = crash_revision_dict[component_path]['revision'] + range_start = None + range_end = None + is_git = utils.IsGitHash(crash_revision) + if is_git: + repository_parser = parsers['git'] + else: + repository_parser = parsers['svn'] + + # If the revision is in SVN, and if regression information is available, + # get it. For Git, we cannot know the ordering between hash numbers. + if not is_git: + if regression_dict and component_path in regression_dict: + component_object = regression_dict[component_path] + range_start = int(component_object['old_revision']) + range_end = int(component_object['new_revision']) + + # Generate blame entry, one thread for one entry. + blame_thread = Thread( + target=self.__GenerateBlameEntry, + args=[repository_parser, stack_frame, crash_revision, + range_start, range_end]) + threads.append(blame_thread) + blame_thread.start() + + # Join the results before returning. + for blame_thread in threads: + blame_thread.join() + + def __GenerateBlameEntry(self, repository_parser, stack_frame, + crash_revision, range_start, range_end): + """Generates blame list from the arguments.""" + stack_frame_index = stack_frame.index + component_path = stack_frame.component_path + component_name = stack_frame.component_name + file_name = stack_frame.file_name + file_path = stack_frame.file_path + crashed_line_number = stack_frame.crashed_line_number + + # Parse blame information. + parsed_blame_info = repository_parser.ParseBlameInfo( + component_path, file_path, crashed_line_number, crash_revision) + + # If it fails to retrieve information, do not do anything. + if not parsed_blame_info or len(parsed_blame_info) != 4: + return + + # Create blame object from the parsed info and add it to the list. + (line_content, revision, author, url) = parsed_blame_info + blame = Blame(line_content, component_name, stack_frame_index, file_name, + crashed_line_number, author, revision, url, + range_start, range_end) + + with self.blame_list_lock: + self.blame_list.append(blame) + + def FilterAndSortBlameList(self): + """Filters and sorts the blame list.""" + # Sort the blame list by its position in stack. + self.blame_list.sort(key=lambda blame: blame.stack_frame_index) + + filtered_blame_list = [] + + for blame in self.blame_list: + # If regression information is available, check if it needs to be + # filtered. + if blame.range_start and blame.range_end: + + # Discards results that are after the end of regression. + if not utils.IsGitHash(blame.revision) and ( + int(blame.revision) < int(blame.range_end)): + continue + + filtered_blame_list.append(blame) + + self.blame_list = filtered_blame_list diff --git a/tools/findit/match_set.py b/tools/findit/match_set.py new file mode 100644 index 0000000..c4fe99f --- /dev/null +++ b/tools/findit/match_set.py @@ -0,0 +1,128 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import logging +import re + +from threading import Lock + +import crash_utils + + +REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*)') + + +class Match(object): + """Represents a match entry. + + A match is a CL that is suspected to have caused the crash. A match object + contains information about files it changes, their authors, etc. + + Attributes: + is_reverted: True if this CL is reverted by other CL. + revert_of: If this CL is a revert of some other CL, a revision number/ + git hash of that CL. + crashed_line_numbers: The list of lines that caused crash for this CL. + function_list: The list of functions that caused the crash. + min_distance: The minimum distance between the lines that CL changed and + lines that caused the crash. + changed_files: The list of files that the CL changed. + changed_file_urls: The list of URLs for the file. + author: The author of the CL. + component_name: The name of the component that this CL belongs to. + stack_frame_indices: For files that caused crash, list of where in the + stackframe they occur. + rank: The highest priority among the files the CL changes. Priority = 1 + if it changes the crashed line, and priority = 2 if it is a simple + file change. + priorities: A list of priorities for each of the changed file. + reivision_url: The revision URL of the CL. + review_url: The codereview URL that reviews this CL. + reviewers: The list of people that reviewed this CL. + reason: The reason why this CL is suspected. + """ + REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I) + + def __init__(self, revision, component_name): + self.is_reverted = False + self.revert_of = None + self.crashed_line_numbers = [] + self.function_list = [] + self.min_distance = crash_utils.INFINITY + self.changed_files = [] + self.changed_file_urls = [] + self.author = revision['author'] + self.component_name = component_name + self.stack_frame_indices = [] + self.rank = crash_utils.INFINITY + self.priorities = [] + self.revision_url = revision['url'] + self.review_url = '' + self.reviewers = [] + self.reason = None + + def ParseMessage(self, message, codereview_api_url): + """Parses the message. + + It checks the message to extract the code review website and list of + reviewers, and it also checks if the CL is a revert of another CL. + + Args: + message: The message to parse. + codereview_api_url: URL to retrieve codereview data from. + """ + for line in message.splitlines(): + line = line.strip() + review_url_line_match = REVIEW_URL_PATTERN.match(line) + + # Check if the line has the code review information. + if review_url_line_match: + + # Get review number for the code review site from the line. + issue_number = review_url_line_match.group(2) + + # Get JSON from the code review site, ignore the line if it fails. + url = codereview_api_url % issue_number + json_string = crash_utils.GetDataFromURL(url) + if not json_string: + logging.warning('Failed to retrieve code review information from %s', + url) + continue + + # Load the JSON from the string, and get the list of reviewers. + code_review = crash_utils.LoadJSON(json_string) + if code_review: + self.reviewers = code_review['reviewers'] + + # Check if this CL is a revert of other CL. + if line.lower().startswith('revert'): + self.is_reverted = True + + # Check if the line says what CL this CL is a revert of. + revert = self.REVERT_PATTERN.match(line) + if revert: + self.revert_of = revert.group(2) + return + + +class MatchSet(object): + """Represents a set of matches. + + Attributes: + matches: A map from CL to a match object. + cls_to_ignore: A set of CLs to ignore. + matches_lock: A lock guarding matches dictionary. + """ + + def __init__(self, codereview_api_url): + self.codereview_api_url = codereview_api_url + self.matches = {} + self.cls_to_ignore = set() + self.matches_lock = Lock() + + def RemoveRevertedCLs(self): + """Removes CLs that are revert.""" + for cl in self.matches: + if cl in self.cls_to_ignore: + del self.matches[cl] diff --git a/tools/findit/result.py b/tools/findit/result.py new file mode 100644 index 0000000..cd32333 --- /dev/null +++ b/tools/findit/result.py @@ -0,0 +1,17 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +class Result(object): + + def __init__(self, suspected_cl, revision_url, component_name, author, + reason, review_url, reviewers, line_content): + self.suspected_cl = suspected_cl + self.revision_url = revision_url + self.component_name = component_name + self.author = author + self.reason = reason + self.review_url = review_url + self.reviewers = reviewers + self.line_content = line_content -- cgit v1.1