summaryrefslogtreecommitdiffstats
path: root/tools/findit/match_set.py
blob: 52114b8c2bb4c8d7a494325d83cd054d7a8cd100 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import re

from threading import Lock

import crash_utils


REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*?)/(\d+)')


class Match(object):
  """Represents a match entry.

  A match is a CL that is suspected to have caused the crash. A match object
  contains information about files it changes, their authors, etc.

  Attributes:
    is_revert: True if this CL is reverted by other CL.
    revert_of: If this CL is a revert of some other CL, a revision number/
               git hash of that CL.
    crashed_line_numbers: The list of lines that caused crash for this CL.
    function_list: The list of functions that caused the crash.
    min_distance: The minimum distance between the lines that CL changed and
                  lines that caused the crash.
    changed_files: The list of files that the CL changed.
    changed_file_urls: The list of URLs for the file.
    author: The author of the CL.
    component_name: The name of the component that this CL belongs to.
    stack_frame_indices: For files that caused crash, list of where in the
                         stackframe they occur.
    priorities: A list of priorities for each of the changed file. A priority
                is 1 if the file changes a crashed line, and 2 if it changes
                the file but not the crashed line.
    reivision_url: The revision URL of the CL.
    review_url: The codereview URL that reviews this CL.
    reviewers: The list of people that reviewed this CL.
    reason: The reason why this CL is suspected.
    time: When this CL was committed.
  """
  REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)

  def __init__(self, revision, component_name):
    self.is_revert = False
    self.revert_of = None
    self.message = None
    self.crashed_line_numbers = []
    self.function_list = []
    self.min_distance = crash_utils.INFINITY
    self.min_distance_info = None
    self.changed_files = []
    self.changed_file_urls = []
    self.author = revision['author']
    self.component_name = component_name
    self.stack_frame_indices = []
    self.priorities = []
    self.revision_url = revision['url']
    self.review_url = ''
    self.reviewers = []
    self.reason = None
    self.time = revision['time']

  def ParseMessage(self, message, codereview_api_url):
    """Parses the message.

    It checks the message to extract the code review website and list of
    reviewers, and it also checks if the CL is a revert of another CL.

    Args:
      message: The message to parse.
      codereview_api_url: URL to retrieve codereview data from.
    """
    self.message = message
    for line in message.splitlines():
      line = line.strip()
      review_url_line_match = REVIEW_URL_PATTERN.match(line)

      # Check if the line has the code review information.
      if review_url_line_match:

        # Get review number for the code review site from the line.
        issue_number = review_url_line_match.group(3)

        # Get JSON from the code review site, ignore the line if it fails.
        url = codereview_api_url % issue_number
        json_string = crash_utils.GetDataFromURL(url)
        if not json_string:
          continue

        # Load the JSON from the string, and get the list of reviewers.
        code_review = crash_utils.LoadJSON(json_string)
        if code_review:
          self.reviewers = code_review['reviewers']

      # Check if this CL is a revert of other CL.
      if line.lower().startswith('revert'):
        self.is_revert = True

        # Check if the line says what CL this CL is a revert of.
        revert = self.REVERT_PATTERN.match(line)
        if revert:
          self.revert_of = revert.group(2)
        return


class MatchSet(object):
  """Represents a set of matches.

  Attributes:
    matches: A map from CL to a match object.
    cls_to_ignore: A set of CLs to ignore.
    matches_lock: A lock guarding matches dictionary.
  """

  def __init__(self, codereview_api_url):
    self.codereview_api_url = codereview_api_url
    self.matches = {}
    self.cls_to_ignore = set()
    self.matches_lock = Lock()

  def RemoveRevertedCLs(self):
    """Removes CLs that are revert."""
    for cl in self.matches:
      if cl in self.cls_to_ignore:
        del self.matches[cl]