[Findit] changed stacktrace, crash_util and component dictionary to support any Git repository in DEPS file.

BUG= Review URL: https://codereview.chromium.org/472033002 Cr-Commit-Position: refs/heads/master@{#289778} git-svn-id: svn://svn.chromium.org/chrome/trunk/src@289778 0039d316-1c4b-4281-b951-d872f2087c98
author: jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-08-15 04:19:26 +0000
committer: jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-08-15 04:20:36 +0000
commit: dd128b9b633f3f4098afd3df0481cd2cbb25acdb (patch)
tree: 2fcb7a543ebed7a892d6233447cfaca72fcd29cc /tools/findit
parent: f62b3777106ee31c9a08e8077c7f02706492e376 (diff)
download: chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.zip
chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.gz
chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.bz2
4 files changed, 306 insertions, 71 deletions
diff --git a/tools/findit/chromium_deps.py b/tools/findit/chromium_deps.py
index 0de6721..6bad2c4 100644
--- a/tools/findit/chromium_deps.py
+++ b/tools/findit/chromium_deps.py
@@ -2,14 +2,18 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+import base64
 import https
 import utils
 
 
-DEPS_FILE_URL = 'https://src.chromium.org/chrome/trunk/src/DEPS?p=%s'
+DEPS_FILE_URL_SVN = 'https://src.chromium.org/chrome/trunk/src/DEPS?p=%s'
+DEPS_FILE_URL_GIT = (
+    'https://chromium.googlesource.com/chromium/src/+/%s/DEPS?format=text')
 
 
 class _VarImpl(object):
+
   def __init__(self, local_scope):
     self._local_scope = local_scope
 
@@ -70,7 +74,11 @@ def _GetComponentName(path):
 
 
 def _GetContentOfDEPS(chromium_revision):
-  return https.SendRequest(DEPS_FILE_URL % chromium_revision)
+  if utils.IsGitHash(chromium_revision):
+    url = DEPS_FILE_URL_GIT
+  else:
+    url = DEPS_FILE_URL_SVN
+  return https.SendRequest(url % chromium_revision)
 
 
 def GetChromiumComponents(chromium_revision,
@@ -85,13 +93,23 @@ def GetChromiumComponents(chromium_revision,
                           and returns the content of the DEPS file. The returned
                           content is assumed to be trusted input and will be
                           evaluated as python code.
+
+  Returns:
+    A map from component path to parsed component name, repository URL,
+    repository type and revision.
   """
   if os_platform.lower() == 'linux':
     os_platform = 'unix'
 
+  is_git_hash = utils.IsGitHash(chromium_revision)
+
   # Download the content of DEPS file in chromium.
   deps_content = deps_file_downloader(chromium_revision)
 
+  # Googlesource git returns text file encoded in base64, so decode it.
+  if is_git_hash:
+    deps_content = base64.b64decode(deps_content)
+
   all_deps = {}
 
   # Parse the content of DEPS file.
@@ -102,7 +120,7 @@ def GetChromiumComponents(chromium_revision,
 
   # Figure out components based on the dependencies.
   components = {}
-  for component_path in all_deps.keys():
+  for component_path in all_deps:
     name = _GetComponentName(component_path)
     repository, revision = all_deps[component_path].split('@')
     is_git_hash = utils.IsGitHash(revision)
@@ -123,47 +141,51 @@ def GetChromiumComponents(chromium_revision,
         'revision': revision
     }
 
-  # Add chromium as a component.
-  # TODO(stgao): Move to git.
+  # Add chromium as a component, depending on the repository type.
+  if is_git_hash:
+    repository = 'https://chromium.googlesource.com/chromium/src/'
+    repository_type = 'git'
+  else:
+    repository = 'https://src.chromium.org/chrome/trunk'
+    repository_type = 'svn'
+
   components['src/'] = {
       'path': 'src/',
       'name': 'chromium',
-      'repository': 'https://src.chromium.org/chrome/trunk',
-      'repository_type': 'svn',
+      'repository': repository,
+      'repository_type': repository_type,
       'revision': chromium_revision
   }
 
   return components
 
 
-def GetChromiumComponentRange(chromium_revision1,
-                              chromium_revision2,
+def GetChromiumComponentRange(old_revision,
+                              new_revision,
                               os_platform='unix',
                               deps_file_downloader=_GetContentOfDEPS):
   """Return a list of components with their revision ranges.
 
   Args:
-    chromium_revision1: The revision of a Chrome build.
-    chromium_revision2: The revision of another Chrome build.
+    old_revision: The old revision of a Chrome build.
+    new_revision: The new revision of a Chrome build.
     os_platform: The target platform of the Chrome build, eg. win, mac, etc.
     deps_file_downloader: A function that takes the chromium_revision as input,
                           and returns the content of the DEPS file. The returned
                           content is assumed to be trusted input and will be
                           evaluated as python code.
-  """
-  # TODO(stgao): support git.
-  chromium_revision1 = int(chromium_revision1)
-  chromium_revision2 = int(chromium_revision2)
-  old_revision = str(min(chromium_revision1, chromium_revision2))
-  new_revision = str(max(chromium_revision1, chromium_revision2))
 
+  Returns:
+    A map from component path to its parsed regression and other information.
+  """
+  # Assume first revision is the old revision.
   old_components = GetChromiumComponents(old_revision, os_platform,
                                          deps_file_downloader)
   new_components = GetChromiumComponents(new_revision, os_platform,
                                          deps_file_downloader)
 
   components = {}
-  for path in new_components.keys():
+  for path in new_components:
     new_component = new_components[path]
     old_revision = None
     if path in old_components:
@@ -184,4 +206,5 @@ def GetChromiumComponentRange(chromium_revision1,
 
 if __name__ == '__main__':
   import json
-  print json.dumps(GetChromiumComponents(284750), sort_keys=True, indent=2)
+  print json.dumps(GetChromiumComponents(
+      'b4b1aea80b25a3b2f7952c9d95585e880421ef2b'), sort_keys=True, indent=2)
diff --git a/tools/findit/component_dictionary.py b/tools/findit/component_dictionary.py
index 93ea19e..4149b75 100644
--- a/tools/findit/component_dictionary.py
+++ b/tools/findit/component_dictionary.py
@@ -1,4 +1,4 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -70,10 +70,10 @@ class FileDictionary(object):
 class ComponentDictionary(object):
   """Represents a file dictionary.
 
-  It maps each component (blink, chrome, etc) to a file dictionary.
+  It maps each component path to a file dictionary.
   """
 
-  def __init__(self, components):
+  def __init__(self, callstack, components):
     """Initializes the dictionary with given components."""
     self.component_dict = {}
 
@@ -81,21 +81,21 @@ class ComponentDictionary(object):
     for component in components:
       self.component_dict[component] = FileDictionary()
 
-  def __iter__(self):
-    return iter(self.component_dict)
+    # Create file dict from callstack.
+    self.__CreateFileDictFromCallstack(callstack)
 
   def GetFileDict(self, component):
     """Returns a file dictionary for a given component."""
-    return self.component_dict[component]
+    return self.component_dict.get(component)
 
-  def GenerateFileDict(self, stack_frame_list):
+  def __GenerateFileDict(self, stack_frame_list):
     """Generates file dictionary, given an instance of StackFrame list."""
     # Iterate through the list of stackframe objects.
     for stack_frame in stack_frame_list:
       # If the component of this line is not in the list of components to
       # look for, ignore this line.
-      component = stack_frame.component
-      if component not in self.component_dict:
+      component_path = stack_frame.component_path
+      if component_path not in self.component_dict:
         continue
 
       # Get values of the variables
@@ -106,6 +106,31 @@ class ComponentDictionary(object):
       function = stack_frame.function
 
       # Add the file to this component's dictionary of files.
-      file_dict = self.component_dict[component]
+      file_dict = self.component_dict[component_path]
       file_dict.AddFile(file_name, file_path, crashed_line_number,
-                       stack_frame_index, function)
+                        stack_frame_index, function)
+
+  def __CreateFileDictFromCallstack(self, callstack, top_n_frames=15):
+    """Creates a file dict that maps a file to the occurrence in the stack.
+
+    Args:
+      callstack: A list containing parsed result from a single stack
+                  within a stacktrace. For example, a stacktrace from
+                  previously-allocated thread in release build stacktrace.
+      top_n_frames: The number of frames to look for.
+
+    Returns:
+      Component_dict, a dictionary with key as a file name and value as another
+      dictionary, which maps the file's path (because there can be multiple
+      files with same name but in different directory) to the list of this
+      file's place in stack, lines that this file caused a crash, and the name
+      of the function.
+    """
+
+    # Only look at first top_n_frames of the stacktrace, below those are likely
+    # to be noisy. Parse the stacktrace into the component dictionary.
+    stack_list = callstack.GetTopNFrames(top_n_frames)
+    self.__GenerateFileDict(stack_list)
+
+  def __iter__(self):
+    return iter(self.component_dict)
diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py
index 7d6ac96..a1bafe6 100644
--- a/tools/findit/crash_utils.py
+++ b/tools/findit/crash_utils.py
@@ -1,48 +1,144 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
 import cgi
+import ConfigParser
 import json
+import logging
 import os
 import time
-import urllib
+import urllib2
+
+from result import Result
 
 
 INFINITY = float('inf')
 
 
-def NormalizePathLinux(path):
+def ParseURLsFromConfig(file_name):
+  """Parses URLS from the config file.
+
+  The file should be in python config format, where svn section is in the
+  format "svn:component_path", except for git URLs and codereview URL.
+  Each of the section for svn should contain changelog_url, revision_url,
+  diff_url and blame_url.
+
+  Args:
+    file_name: The name of the file that contains URL information.
+
+  Returns:
+    A dictionary that maps repository type to list of URLs. For svn, it maps
+    key 'svn' to another dictionary, which maps component path to the URLs
+    as explained above. For git, it maps to the URLs as explained above.
+    Codereview maps to codereview API url.
+  """
+  config = ConfigParser.ConfigParser()
+
+  # Get the absolute path of the config file, and read the file. If it fails,
+  # return none.
+  config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
+                                  file_name)
+  config.read(config_file_path)
+  if not config:
+    logging.error('Config file with URLs does not exist.')
+    return None
+
+  # Iterate through the config file, check for sections.
+  repository_type_to_url_map = {}
+  for section in config.sections():
+    # These two do not need another layer of dictionary, so add it and go
+    # to next section.
+    if section == 'git' or section == 'codereview':
+      for option in config.options(section):
+        if section not in repository_type_to_url_map:
+          repository_type_to_url_map[section] = {}
+
+        url = config.get(section, option)
+        repository_type_to_url_map[section][option] = url
+
+      continue
+
+    # Get repository type and component name from the section name.
+    repository_type_and_component = section.split(':')
+    repository_type = repository_type_and_component[0]
+    component_path = repository_type_and_component[1]
+
+    # Add 'svn' as the key, if it is not already there.
+    if repository_type not in repository_type_to_url_map:
+      repository_type_to_url_map[repository_type] = {}
+    url_map_for_repository = repository_type_to_url_map[repository_type]
+
+    # Add the path to the 'svn', if it is not already there.
+    if component_path not in url_map_for_repository:
+      url_map_for_repository[component_path] = {}
+    type_to_url = url_map_for_repository[component_path]
+
+    # Add all URLs to this map.
+    for option in config.options(section):
+      url = config.get(section, option)
+      type_to_url[option] = url
+
+  return repository_type_to_url_map
+
+
+def NormalizePathLinux(path, parsed_deps):
   """Normalizes linux path.
 
   Args:
     path: A string representing a path.
+    parsed_deps: A map from component path to its component name, repository,
+                 etc.
 
   Returns:
     A tuple containing a component this path is in (e.g blink, skia, etc)
     and a path in that component's repository.
   """
+  # First normalize the path by retreiving the absolute path.
   normalized_path = os.path.abspath(path)
 
-  if 'src/v8/' in normalized_path:
-    component = 'v8'
-    normalized_path = normalized_path.split('src/v8/')[1]
+  # Iterate through all component paths in the parsed DEPS, in the decreasing
+  # order of the length of the file path.
+  for component_path in sorted(parsed_deps,
+                               key=(lambda path: -len(path))):
+    # New_path is the component path with 'src/' removed.
+    new_path = component_path
+    if new_path.startswith('src/') and new_path != 'src/':
+      new_path = new_path[len('src/'):]
 
-  # TODO(jeun): Integrate with parsing DEPS file.
-  if 'WebKit/' in normalized_path:
-    component = 'blink'
-    normalized_path = ''.join(path.split('WebKit/')[1:])
-  else:
-    component = 'chromium'
+    # If this path is the part of file path, this file must be from this
+    # component.
+    if new_path in normalized_path:
+
+      # Currently does not support googlecode.
+      if 'googlecode' in parsed_deps[component_path]['repository']:
+        return (None, '', '')
+
+      # Normalize the path by stripping everything off the component's relative
+      # path.
+      normalized_path = normalized_path.split(new_path,1)[1]
 
-  if '/build/' in normalized_path:
-    normalized_path = normalized_path.split('/build/')[-1]
+      # Add 'src/' or 'Source/' at the front of the normalized path, depending
+      # on what prefix the component path uses. For example, blink uses
+      # 'Source' but chromium uses 'src/', and blink component path is
+      # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
+      # normalized path.
+      if not normalized_path.startswith('src/') or \
+          normalized_path.startswith('Source/'):
 
-  if not (normalized_path.startswith('src/') or
-      normalized_path.startswith('Source/')):
-    normalized_path = 'src/' + normalized_path
+        if (new_path.lower().endswith('src/') or
+            new_path.lower().endswith('source/')):
+          normalized_path = new_path.split('/')[-2] + '/' + normalized_path
 
-  return (component, normalized_path)
+        else:
+          normalized_path = 'src/' + normalized_path
+
+      component_name = parsed_deps[component_path]['name']
+
+      return (component_path, component_name, normalized_path)
+
+  # If the path does not match any component, default to chromium.
+  return ('src/', 'chromium', normalized_path)
 
 
 def SplitRange(regression):
@@ -55,6 +151,9 @@ def SplitRange(regression):
     A list containing two numbers represented in string, for example
     ['1234','5678'].
   """
+  if not regression:
+    return None
+
   revisions = regression.split(':')
 
   # If regression information is not available, return none.
@@ -62,10 +161,10 @@ def SplitRange(regression):
     return None
 
   # Strip 'r' from both start and end range.
-  start_range = revisions[0].lstrip('r')
-  end_range = revisions[1].lstrip('r')
+  range_start = revisions[0].lstrip('r')
+  range_end = revisions[1].lstrip('r')
 
-  return [start_range, end_range]
+  return [range_start, range_end]
 
 
 def LoadJSON(json_string):
@@ -85,13 +184,14 @@ def LoadJSON(json_string):
   return data
 
 
-def GetDataFromURL(url, retries=10, sleep_time=0.1):
+def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=10):
   """Retrieves raw data from URL, tries 10 times.
 
   Args:
     url: URL to get data from.
     retries: Number of times to retry connection.
     sleep_time: Time in seconds to wait before retrying connection.
+    timeout: Time in seconds to wait before time out.
 
   Returns:
     None if the data retrieval fails, or the raw data.
@@ -100,13 +200,16 @@ def GetDataFromURL(url, retries=10, sleep_time=0.1):
   for i in range(retries):
     # Retrieves data from URL.
     try:
-      data = urllib.urlopen(url)
+      data = urllib2.urlopen(url, timeout=timeout)
 
       # If retrieval is successful, return the data.
       if data:
         return data.read()
 
     # If retrieval fails, try after sleep_time second.
+    except urllib2.URLError:
+      time.sleep(sleep_time)
+      continue
     except IOError:
       time.sleep(sleep_time)
       continue
@@ -205,7 +308,7 @@ def AddHyperlink(text, link):
     A string with hyperlink added.
   """
   sanitized_link = cgi.escape(link, quote=True)
-  sanitized_text = cgi.escape(text)
+  sanitized_text = cgi.escape(str(text))
   return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
 
 
@@ -283,3 +386,77 @@ def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
       break
 
   return (line_intersection, stack_frame_index_intersection)
+
+
+def MatchListToResultList(matches):
+  """Convert list of matches to the list of result objects.
+
+  Args:
+    matches: A list of match objects along with its stack priority and revision
+             number/git hash
+  Returns:
+    A list of result object.
+
+  """
+  result_list = []
+
+  for _, cl, match in matches:
+    suspected_cl = cl
+    revision_url = match.revision_url
+    component_name = match.component_name
+    author = match.author
+    reason = match.reason
+    review_url = match.review_url
+    reviewers = match.reviewers
+    # For matches, line content do not exist.
+    line_content = None
+
+    result = Result(suspected_cl, revision_url, component_name, author, reason,
+                    review_url, reviewers, line_content)
+    result_list.append(result)
+
+  return result_list
+
+
+def BlameListToResultList(blame_list):
+  """Convert blame list to the list of result objects.
+
+  Args:
+    blame_list: A list of blame objects.
+
+  Returns:
+    A list of result objects.
+  """
+  result_list = []
+
+  for blame in blame_list:
+    suspected_cl = blame.revision
+    revision_url = blame.url
+    component_name = blame.component_name
+    author = blame.author
+    reason = (
+        'The CL changes line %s of file %s from stack %d.' %
+        (blame.line_number, blame.file, blame.stack_frame_index))
+    # Blame object does not have review url and reviewers.
+    review_url = None
+    reviewers = None
+    line_content = blame.content
+
+    result = Result(suspected_cl, revision_url, component_name, author, reason,
+                    review_url, reviewers, line_content)
+    result_list.append(result)
+
+  return result_list
+
+
+def ResultListToJSON(result_list):
+  """Converts result list to JSON format.
+
+  Args:
+    result_list: A list of result objects
+
+  Returns:
+    A string, JSON format of the result_list.
+
+  """
+  return json.dumps([result.ToDictionary() for result in result_list])
diff --git a/tools/findit/stacktrace.py b/tools/findit/stacktrace.py
index 5b7f935..1048991 100644
--- a/tools/findit/stacktrace.py
+++ b/tools/findit/stacktrace.py
@@ -1,4 +1,4 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -13,17 +13,19 @@ class StackFrame(object):
 
   Attributes:
     index: An index of the stack frame.
-    component: A component this line represents, such as blink, chrome, etc.
+    component_path: The path of the component this frame represents.
+    component_name: The name of the component this frame represents.
     file_name: The name of the file that crashed.
     function: The function that caused the crash.
     file_path: The path of the crashed file.
     crashed_line_number: The line of the file that caused the crash.
   """
 
-  def __init__(self, stack_frame_index, component, file_name,
-               function, file_path, crashed_line_number):
+  def __init__(self, stack_frame_index, component_path, component_name,
+               file_name, function, file_path, crashed_line_number):
     self.index = stack_frame_index
-    self.component = component
+    self.component_path = component_path
+    self.component_name = component_name
     self.file_name = file_name
     self.function = function
     self.file_path = file_path
@@ -55,8 +57,9 @@ class Stacktrace(object):
   one callstacks.
   """
 
-  def __init__(self, stacktrace, build_type):
-    self.stack_list = []
+  def __init__(self, stacktrace, build_type, parsed_deps):
+    self.stack_list = None
+    self.parsed_deps = parsed_deps
     self.ParseStacktrace(stacktrace, build_type)
 
   def ParseStacktrace(self, stacktrace, build_type):
@@ -72,7 +75,6 @@ class Stacktrace(object):
     """
     # If the passed in string is empty, the object does not represent anything.
     if not stacktrace:
-      self.stack_list = None
       return
 
     # Reset the stack list.
@@ -256,21 +258,29 @@ class Stacktrace(object):
 
     # Normalize the file path so that it can be compared to repository path.
     file_name = os.path.basename(file_path)
-    (component, file_path) = crash_utils.NormalizePathLinux(file_path)
+    (component_path, component_name, file_path) = (
+        crash_utils.NormalizePathLinux(file_path, self.parsed_deps))
 
-    # FIXME(jeun): Add other components.
-    if not (component == 'blink' or component == 'chromium'):
+    # If this component is not supported, ignore this line.
+    if not component_path:
       return None
 
     # Return a new stack frame object with the parsed information.
-    return StackFrame(stack_frame_index, component, file_name, function,
-                      file_path, crashed_line_number)
+    return StackFrame(stack_frame_index, component_path, component_name,
+                      file_name, function, file_path, crashed_line_number)
 
   def __getitem__(self, index):
     return self.stack_list[index]
 
   def GetCrashStack(self):
-    for callstack in self.stack_list:
-      # Only the crash stack has the priority 0.
-      if callstack.priority == 0:
-        return callstack
+    """Returns the callstack with the highest priority.
+
+    Crash stack has priority 0, and allocation/freed/other thread stacks
+    get priority 1.
+
+    Returns:
+      The highest priority callstack in the stacktrace.
+    """
+    sorted_stacklist = sorted(self.stack_list,
+                              key=lambda callstack: callstack.priority)
+    return sorted_stacklist[0]
author	jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-08-15 04:19:26 +0000
committer	jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-08-15 04:20:36 +0000
commit	dd128b9b633f3f4098afd3df0481cd2cbb25acdb (patch)
tree	2fcb7a543ebed7a892d6233447cfaca72fcd29cc /tools/findit
parent	f62b3777106ee31c9a08e8077c7f02706492e376 (diff)
download	chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.zip chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.gz chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.bz2