[Findit] Plain objects to represent the returned result from running the algorithm,

NOTRY=true Review URL: https://codereview.chromium.org/421223003 Cr-Commit-Position: refs/heads/master@{#289949} git-svn-id: svn://svn.chromium.org/chrome/trunk/src@289949 0039d316-1c4b-4281-b951-d872f2087c98
author: jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-08-15 18:36:25 +0000
committer: jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-08-15 18:38:22 +0000
commit: de7feec08a3e4723510a3b5b87cca6ff95a8ef11 (patch)
tree: 1fc218102803cd7b2739e36172a8f4d3775fe923 /tools/findit
parent: e2a75a5196134d37d4df8f7f6f97524d03443ab0 (diff)
download: chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.zip
chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.tar.gz
chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.tar.bz2
3 files changed, 307 insertions, 0 deletions
diff --git a/tools/findit/blame.py b/tools/findit/blame.py
new file mode 100644
index 0000000..7546262
--- /dev/null
+++ b/tools/findit/blame.py
@@ -0,0 +1,162 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from threading import Lock, Thread
+
+import utils
+
+
+class Blame(object):
+  """Represents a blame object.
+
+  The object contains blame information for one line of stack, and this
+  information is shown when there are no CLs that change the crashing files.
+  Attributes:
+    line_content: The content of the line to find the blame for.
+    component_name: The name of the component for this line.
+    stack_frame_index: The stack frame index of this file.
+    file_name: The name of the file.
+    line_number: The line that caused a crash.
+    author: The author of this line on the latest revision.
+    crash_revision: The revision that caused the crash.
+    revision: The latest revision of this line before the crash revision.
+    url: The url of the change for the revision.
+    range_start: The starting range of the regression for this component.
+    range_end: The ending range of the regression.
+
+  """
+
+  def __init__(self, line_content, component_name, stack_frame_index,
+               file_name, line_number, author, revision,
+               url, range_start, range_end):
+    # Set all the variables from the arguments.
+    self.line_content = line_content
+    self.component_name = component_name
+    self.stack_frame_index = stack_frame_index
+    self.file = file_name
+    self.line_number = line_number
+    self.author = author
+    self.revision = revision
+    self.url = url
+    self.range_start = range_start
+    self.range_end = range_end
+
+
+class BlameList(object):
+  """Represents a list of blame objects.
+
+  Thread-safe.
+  """
+
+  def __init__(self):
+    self.blame_list = []
+    self.blame_list_lock = Lock()
+
+  def __getitem__(self, index):
+    return self.blame_list[index]
+
+  def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers,
+                top_n_frames=10):
+    """Given a stack within a stacktrace, retrieves blame information.
+
+    Only either first 'top_n_frames' or the length of stack, whichever is
+    shorter, results are returned. The default value of 'top_n_frames' is 10.
+
+    Args:
+      callstack: The list of stack frames.
+      crash_revision_dict: A dictionary that maps component to its crash
+                          revision.
+      regression_dict: A dictionary that maps component to its revision
+                          range.
+      parsers: A list of two parsers, svn_parser and git_parser
+      top_n_frames: A number of stack frames to show the blame result for.
+    """
+    # Only return blame information for first 'top_n_frames' frames.
+    stack_frames = callstack.GetTopNFrames(top_n_frames)
+
+    threads = []
+    # Iterate through frames in stack.
+    for stack_frame in stack_frames:
+      # If the component this line is from does not have a crash revision,
+      # it is not possible to get blame information, so ignore this line.
+      component_path = stack_frame.component_path
+      if component_path not in crash_revision_dict:
+        continue
+
+      crash_revision = crash_revision_dict[component_path]['revision']
+      range_start = None
+      range_end = None
+      is_git = utils.IsGitHash(crash_revision)
+      if is_git:
+        repository_parser = parsers['git']
+      else:
+        repository_parser = parsers['svn']
+
+      # If the revision is in SVN, and if regression information is available,
+      # get it. For Git, we cannot know the ordering between hash numbers.
+      if not is_git:
+        if regression_dict and component_path in regression_dict:
+          component_object = regression_dict[component_path]
+          range_start = int(component_object['old_revision'])
+          range_end = int(component_object['new_revision'])
+
+      # Generate blame entry, one thread for one entry.
+      blame_thread = Thread(
+          target=self.__GenerateBlameEntry,
+          args=[repository_parser, stack_frame, crash_revision,
+                range_start, range_end])
+      threads.append(blame_thread)
+      blame_thread.start()
+
+    # Join the results before returning.
+    for blame_thread in threads:
+      blame_thread.join()
+
+  def __GenerateBlameEntry(self, repository_parser, stack_frame,
+                           crash_revision, range_start, range_end):
+    """Generates blame list from the arguments."""
+    stack_frame_index = stack_frame.index
+    component_path = stack_frame.component_path
+    component_name = stack_frame.component_name
+    file_name = stack_frame.file_name
+    file_path = stack_frame.file_path
+    crashed_line_number = stack_frame.crashed_line_number
+
+    # Parse blame information.
+    parsed_blame_info = repository_parser.ParseBlameInfo(
+        component_path, file_path, crashed_line_number, crash_revision)
+
+    # If it fails to retrieve information, do not do anything.
+    if not parsed_blame_info or len(parsed_blame_info) != 4:
+      return
+
+    # Create blame object from the parsed info and add it to the list.
+    (line_content, revision, author, url) = parsed_blame_info
+    blame = Blame(line_content, component_name, stack_frame_index, file_name,
+                  crashed_line_number, author, revision, url,
+                  range_start, range_end)
+
+    with self.blame_list_lock:
+      self.blame_list.append(blame)
+
+  def FilterAndSortBlameList(self):
+    """Filters and sorts the blame list."""
+    # Sort the blame list by its position in stack.
+    self.blame_list.sort(key=lambda blame: blame.stack_frame_index)
+
+    filtered_blame_list = []
+
+    for blame in self.blame_list:
+      # If regression information is available, check if it needs to be
+      # filtered.
+      if blame.range_start and blame.range_end:
+
+        # Discards results that are after the end of regression.
+        if not utils.IsGitHash(blame.revision) and (
+            int(blame.revision) < int(blame.range_end)):
+          continue
+
+      filtered_blame_list.append(blame)
+
+    self.blame_list = filtered_blame_list
diff --git a/tools/findit/match_set.py b/tools/findit/match_set.py
new file mode 100644
index 0000000..c4fe99f
--- /dev/null
+++ b/tools/findit/match_set.py
@@ -0,0 +1,128 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import re
+
+from threading import Lock
+
+import crash_utils
+
+
+REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*)')
+
+
+class Match(object):
+  """Represents a match entry.
+
+  A match is a CL that is suspected to have caused the crash. A match object
+  contains information about files it changes, their authors, etc.
+
+  Attributes:
+    is_reverted: True if this CL is reverted by other CL.
+    revert_of: If this CL is a revert of some other CL, a revision number/
+               git hash of that CL.
+    crashed_line_numbers: The list of lines that caused crash for this CL.
+    function_list: The list of functions that caused the crash.
+    min_distance: The minimum distance between the lines that CL changed and
+                  lines that caused the crash.
+    changed_files: The list of files that the CL changed.
+    changed_file_urls: The list of URLs for the file.
+    author: The author of the CL.
+    component_name: The name of the component that this CL belongs to.
+    stack_frame_indices: For files that caused crash, list of where in the
+                         stackframe they occur.
+    rank: The highest priority among the files the CL changes. Priority = 1
+          if it changes the crashed line, and priority = 2 if it is a simple
+          file change.
+    priorities: A list of priorities for each of the changed file.
+    reivision_url: The revision URL of the CL.
+    review_url: The codereview URL that reviews this CL.
+    reviewers: The list of people that reviewed this CL.
+    reason: The reason why this CL is suspected.
+  """
+  REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
+
+  def __init__(self, revision, component_name):
+    self.is_reverted = False
+    self.revert_of = None
+    self.crashed_line_numbers = []
+    self.function_list = []
+    self.min_distance = crash_utils.INFINITY
+    self.changed_files = []
+    self.changed_file_urls = []
+    self.author = revision['author']
+    self.component_name = component_name
+    self.stack_frame_indices = []
+    self.rank = crash_utils.INFINITY
+    self.priorities = []
+    self.revision_url = revision['url']
+    self.review_url = ''
+    self.reviewers = []
+    self.reason = None
+
+  def ParseMessage(self, message, codereview_api_url):
+    """Parses the message.
+
+    It checks the message to extract the code review website and list of
+    reviewers, and it also checks if the CL is a revert of another CL.
+
+    Args:
+      message: The message to parse.
+      codereview_api_url: URL to retrieve codereview data from.
+    """
+    for line in message.splitlines():
+      line = line.strip()
+      review_url_line_match = REVIEW_URL_PATTERN.match(line)
+
+      # Check if the line has the code review information.
+      if review_url_line_match:
+
+        # Get review number for the code review site from the line.
+        issue_number = review_url_line_match.group(2)
+
+        # Get JSON from the code review site, ignore the line if it fails.
+        url = codereview_api_url % issue_number
+        json_string = crash_utils.GetDataFromURL(url)
+        if not json_string:
+          logging.warning('Failed to retrieve code review information from %s',
+                          url)
+          continue
+
+        # Load the JSON from the string, and get the list of reviewers.
+        code_review = crash_utils.LoadJSON(json_string)
+        if code_review:
+          self.reviewers = code_review['reviewers']
+
+      # Check if this CL is a revert of other CL.
+      if line.lower().startswith('revert'):
+        self.is_reverted = True
+
+        # Check if the line says what CL this CL is a revert of.
+        revert = self.REVERT_PATTERN.match(line)
+        if revert:
+          self.revert_of = revert.group(2)
+        return
+
+
+class MatchSet(object):
+  """Represents a set of matches.
+
+  Attributes:
+    matches: A map from CL to a match object.
+    cls_to_ignore: A set of CLs to ignore.
+    matches_lock: A lock guarding matches dictionary.
+  """
+
+  def __init__(self, codereview_api_url):
+    self.codereview_api_url = codereview_api_url
+    self.matches = {}
+    self.cls_to_ignore = set()
+    self.matches_lock = Lock()
+
+  def RemoveRevertedCLs(self):
+    """Removes CLs that are revert."""
+    for cl in self.matches:
+      if cl in self.cls_to_ignore:
+        del self.matches[cl]
diff --git a/tools/findit/result.py b/tools/findit/result.py
new file mode 100644
index 0000000..cd32333
--- /dev/null
+++ b/tools/findit/result.py
@@ -0,0 +1,17 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+class Result(object):
+
+  def __init__(self, suspected_cl, revision_url, component_name, author,
+               reason, review_url, reviewers, line_content):
+    self.suspected_cl = suspected_cl
+    self.revision_url = revision_url
+    self.component_name = component_name
+    self.author = author
+    self.reason = reason
+    self.review_url = review_url
+    self.reviewers = reviewers
+    self.line_content = line_content
author	jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-08-15 18:36:25 +0000
committer	jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-08-15 18:38:22 +0000
commit	de7feec08a3e4723510a3b5b87cca6ff95a8ef11 (patch)
tree	1fc218102803cd7b2739e36172a8f4d3775fe923 /tools/findit
parent	e2a75a5196134d37d4df8f7f6f97524d03443ab0 (diff)
download	chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.zip chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.tar.gz chromium_src-de7feec08a3e4723510a3b5b87cca6ff95a8ef11.tar.bz2