diff options
author | jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-08-15 04:19:26 +0000 |
---|---|---|
committer | jeun@chromium.org <jeun@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-08-15 04:20:36 +0000 |
commit | dd128b9b633f3f4098afd3df0481cd2cbb25acdb (patch) | |
tree | 2fcb7a543ebed7a892d6233447cfaca72fcd29cc /tools/findit | |
parent | f62b3777106ee31c9a08e8077c7f02706492e376 (diff) | |
download | chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.zip chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.gz chromium_src-dd128b9b633f3f4098afd3df0481cd2cbb25acdb.tar.bz2 |
[Findit] changed stacktrace, crash_util and component dictionary to support any Git repository in DEPS file.
BUG=
Review URL: https://codereview.chromium.org/472033002
Cr-Commit-Position: refs/heads/master@{#289778}
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@289778 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/findit')
-rw-r--r-- | tools/findit/chromium_deps.py | 61 | ||||
-rw-r--r-- | tools/findit/component_dictionary.py | 47 | ||||
-rw-r--r-- | tools/findit/crash_utils.py | 225 | ||||
-rw-r--r-- | tools/findit/stacktrace.py | 44 |
4 files changed, 306 insertions, 71 deletions
diff --git a/tools/findit/chromium_deps.py b/tools/findit/chromium_deps.py index 0de6721..6bad2c4 100644 --- a/tools/findit/chromium_deps.py +++ b/tools/findit/chromium_deps.py @@ -2,14 +2,18 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. +import base64 import https import utils -DEPS_FILE_URL = 'https://src.chromium.org/chrome/trunk/src/DEPS?p=%s' +DEPS_FILE_URL_SVN = 'https://src.chromium.org/chrome/trunk/src/DEPS?p=%s' +DEPS_FILE_URL_GIT = ( + 'https://chromium.googlesource.com/chromium/src/+/%s/DEPS?format=text') class _VarImpl(object): + def __init__(self, local_scope): self._local_scope = local_scope @@ -70,7 +74,11 @@ def _GetComponentName(path): def _GetContentOfDEPS(chromium_revision): - return https.SendRequest(DEPS_FILE_URL % chromium_revision) + if utils.IsGitHash(chromium_revision): + url = DEPS_FILE_URL_GIT + else: + url = DEPS_FILE_URL_SVN + return https.SendRequest(url % chromium_revision) def GetChromiumComponents(chromium_revision, @@ -85,13 +93,23 @@ def GetChromiumComponents(chromium_revision, and returns the content of the DEPS file. The returned content is assumed to be trusted input and will be evaluated as python code. + + Returns: + A map from component path to parsed component name, repository URL, + repository type and revision. """ if os_platform.lower() == 'linux': os_platform = 'unix' + is_git_hash = utils.IsGitHash(chromium_revision) + # Download the content of DEPS file in chromium. deps_content = deps_file_downloader(chromium_revision) + # Googlesource git returns text file encoded in base64, so decode it. + if is_git_hash: + deps_content = base64.b64decode(deps_content) + all_deps = {} # Parse the content of DEPS file. @@ -102,7 +120,7 @@ def GetChromiumComponents(chromium_revision, # Figure out components based on the dependencies. components = {} - for component_path in all_deps.keys(): + for component_path in all_deps: name = _GetComponentName(component_path) repository, revision = all_deps[component_path].split('@') is_git_hash = utils.IsGitHash(revision) @@ -123,47 +141,51 @@ def GetChromiumComponents(chromium_revision, 'revision': revision } - # Add chromium as a component. - # TODO(stgao): Move to git. + # Add chromium as a component, depending on the repository type. + if is_git_hash: + repository = 'https://chromium.googlesource.com/chromium/src/' + repository_type = 'git' + else: + repository = 'https://src.chromium.org/chrome/trunk' + repository_type = 'svn' + components['src/'] = { 'path': 'src/', 'name': 'chromium', - 'repository': 'https://src.chromium.org/chrome/trunk', - 'repository_type': 'svn', + 'repository': repository, + 'repository_type': repository_type, 'revision': chromium_revision } return components -def GetChromiumComponentRange(chromium_revision1, - chromium_revision2, +def GetChromiumComponentRange(old_revision, + new_revision, os_platform='unix', deps_file_downloader=_GetContentOfDEPS): """Return a list of components with their revision ranges. Args: - chromium_revision1: The revision of a Chrome build. - chromium_revision2: The revision of another Chrome build. + old_revision: The old revision of a Chrome build. + new_revision: The new revision of a Chrome build. os_platform: The target platform of the Chrome build, eg. win, mac, etc. deps_file_downloader: A function that takes the chromium_revision as input, and returns the content of the DEPS file. The returned content is assumed to be trusted input and will be evaluated as python code. - """ - # TODO(stgao): support git. - chromium_revision1 = int(chromium_revision1) - chromium_revision2 = int(chromium_revision2) - old_revision = str(min(chromium_revision1, chromium_revision2)) - new_revision = str(max(chromium_revision1, chromium_revision2)) + Returns: + A map from component path to its parsed regression and other information. + """ + # Assume first revision is the old revision. old_components = GetChromiumComponents(old_revision, os_platform, deps_file_downloader) new_components = GetChromiumComponents(new_revision, os_platform, deps_file_downloader) components = {} - for path in new_components.keys(): + for path in new_components: new_component = new_components[path] old_revision = None if path in old_components: @@ -184,4 +206,5 @@ def GetChromiumComponentRange(chromium_revision1, if __name__ == '__main__': import json - print json.dumps(GetChromiumComponents(284750), sort_keys=True, indent=2) + print json.dumps(GetChromiumComponents( + 'b4b1aea80b25a3b2f7952c9d95585e880421ef2b'), sort_keys=True, indent=2) diff --git a/tools/findit/component_dictionary.py b/tools/findit/component_dictionary.py index 93ea19e..4149b75 100644 --- a/tools/findit/component_dictionary.py +++ b/tools/findit/component_dictionary.py @@ -1,4 +1,4 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright (c) 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -70,10 +70,10 @@ class FileDictionary(object): class ComponentDictionary(object): """Represents a file dictionary. - It maps each component (blink, chrome, etc) to a file dictionary. + It maps each component path to a file dictionary. """ - def __init__(self, components): + def __init__(self, callstack, components): """Initializes the dictionary with given components.""" self.component_dict = {} @@ -81,21 +81,21 @@ class ComponentDictionary(object): for component in components: self.component_dict[component] = FileDictionary() - def __iter__(self): - return iter(self.component_dict) + # Create file dict from callstack. + self.__CreateFileDictFromCallstack(callstack) def GetFileDict(self, component): """Returns a file dictionary for a given component.""" - return self.component_dict[component] + return self.component_dict.get(component) - def GenerateFileDict(self, stack_frame_list): + def __GenerateFileDict(self, stack_frame_list): """Generates file dictionary, given an instance of StackFrame list.""" # Iterate through the list of stackframe objects. for stack_frame in stack_frame_list: # If the component of this line is not in the list of components to # look for, ignore this line. - component = stack_frame.component - if component not in self.component_dict: + component_path = stack_frame.component_path + if component_path not in self.component_dict: continue # Get values of the variables @@ -106,6 +106,31 @@ class ComponentDictionary(object): function = stack_frame.function # Add the file to this component's dictionary of files. - file_dict = self.component_dict[component] + file_dict = self.component_dict[component_path] file_dict.AddFile(file_name, file_path, crashed_line_number, - stack_frame_index, function) + stack_frame_index, function) + + def __CreateFileDictFromCallstack(self, callstack, top_n_frames=15): + """Creates a file dict that maps a file to the occurrence in the stack. + + Args: + callstack: A list containing parsed result from a single stack + within a stacktrace. For example, a stacktrace from + previously-allocated thread in release build stacktrace. + top_n_frames: The number of frames to look for. + + Returns: + Component_dict, a dictionary with key as a file name and value as another + dictionary, which maps the file's path (because there can be multiple + files with same name but in different directory) to the list of this + file's place in stack, lines that this file caused a crash, and the name + of the function. + """ + + # Only look at first top_n_frames of the stacktrace, below those are likely + # to be noisy. Parse the stacktrace into the component dictionary. + stack_list = callstack.GetTopNFrames(top_n_frames) + self.__GenerateFileDict(stack_list) + + def __iter__(self): + return iter(self.component_dict) diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py index 7d6ac96..a1bafe6 100644 --- a/tools/findit/crash_utils.py +++ b/tools/findit/crash_utils.py @@ -1,48 +1,144 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright (c) 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import cgi +import ConfigParser import json +import logging import os import time -import urllib +import urllib2 + +from result import Result INFINITY = float('inf') -def NormalizePathLinux(path): +def ParseURLsFromConfig(file_name): + """Parses URLS from the config file. + + The file should be in python config format, where svn section is in the + format "svn:component_path", except for git URLs and codereview URL. + Each of the section for svn should contain changelog_url, revision_url, + diff_url and blame_url. + + Args: + file_name: The name of the file that contains URL information. + + Returns: + A dictionary that maps repository type to list of URLs. For svn, it maps + key 'svn' to another dictionary, which maps component path to the URLs + as explained above. For git, it maps to the URLs as explained above. + Codereview maps to codereview API url. + """ + config = ConfigParser.ConfigParser() + + # Get the absolute path of the config file, and read the file. If it fails, + # return none. + config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + file_name) + config.read(config_file_path) + if not config: + logging.error('Config file with URLs does not exist.') + return None + + # Iterate through the config file, check for sections. + repository_type_to_url_map = {} + for section in config.sections(): + # These two do not need another layer of dictionary, so add it and go + # to next section. + if section == 'git' or section == 'codereview': + for option in config.options(section): + if section not in repository_type_to_url_map: + repository_type_to_url_map[section] = {} + + url = config.get(section, option) + repository_type_to_url_map[section][option] = url + + continue + + # Get repository type and component name from the section name. + repository_type_and_component = section.split(':') + repository_type = repository_type_and_component[0] + component_path = repository_type_and_component[1] + + # Add 'svn' as the key, if it is not already there. + if repository_type not in repository_type_to_url_map: + repository_type_to_url_map[repository_type] = {} + url_map_for_repository = repository_type_to_url_map[repository_type] + + # Add the path to the 'svn', if it is not already there. + if component_path not in url_map_for_repository: + url_map_for_repository[component_path] = {} + type_to_url = url_map_for_repository[component_path] + + # Add all URLs to this map. + for option in config.options(section): + url = config.get(section, option) + type_to_url[option] = url + + return repository_type_to_url_map + + +def NormalizePathLinux(path, parsed_deps): """Normalizes linux path. Args: path: A string representing a path. + parsed_deps: A map from component path to its component name, repository, + etc. Returns: A tuple containing a component this path is in (e.g blink, skia, etc) and a path in that component's repository. """ + # First normalize the path by retreiving the absolute path. normalized_path = os.path.abspath(path) - if 'src/v8/' in normalized_path: - component = 'v8' - normalized_path = normalized_path.split('src/v8/')[1] + # Iterate through all component paths in the parsed DEPS, in the decreasing + # order of the length of the file path. + for component_path in sorted(parsed_deps, + key=(lambda path: -len(path))): + # New_path is the component path with 'src/' removed. + new_path = component_path + if new_path.startswith('src/') and new_path != 'src/': + new_path = new_path[len('src/'):] - # TODO(jeun): Integrate with parsing DEPS file. - if 'WebKit/' in normalized_path: - component = 'blink' - normalized_path = ''.join(path.split('WebKit/')[1:]) - else: - component = 'chromium' + # If this path is the part of file path, this file must be from this + # component. + if new_path in normalized_path: + + # Currently does not support googlecode. + if 'googlecode' in parsed_deps[component_path]['repository']: + return (None, '', '') + + # Normalize the path by stripping everything off the component's relative + # path. + normalized_path = normalized_path.split(new_path,1)[1] - if '/build/' in normalized_path: - normalized_path = normalized_path.split('/build/')[-1] + # Add 'src/' or 'Source/' at the front of the normalized path, depending + # on what prefix the component path uses. For example, blink uses + # 'Source' but chromium uses 'src/', and blink component path is + # 'src/third_party/WebKit/Source', so add 'Source/' in front of the + # normalized path. + if not normalized_path.startswith('src/') or \ + normalized_path.startswith('Source/'): - if not (normalized_path.startswith('src/') or - normalized_path.startswith('Source/')): - normalized_path = 'src/' + normalized_path + if (new_path.lower().endswith('src/') or + new_path.lower().endswith('source/')): + normalized_path = new_path.split('/')[-2] + '/' + normalized_path - return (component, normalized_path) + else: + normalized_path = 'src/' + normalized_path + + component_name = parsed_deps[component_path]['name'] + + return (component_path, component_name, normalized_path) + + # If the path does not match any component, default to chromium. + return ('src/', 'chromium', normalized_path) def SplitRange(regression): @@ -55,6 +151,9 @@ def SplitRange(regression): A list containing two numbers represented in string, for example ['1234','5678']. """ + if not regression: + return None + revisions = regression.split(':') # If regression information is not available, return none. @@ -62,10 +161,10 @@ def SplitRange(regression): return None # Strip 'r' from both start and end range. - start_range = revisions[0].lstrip('r') - end_range = revisions[1].lstrip('r') + range_start = revisions[0].lstrip('r') + range_end = revisions[1].lstrip('r') - return [start_range, end_range] + return [range_start, range_end] def LoadJSON(json_string): @@ -85,13 +184,14 @@ def LoadJSON(json_string): return data -def GetDataFromURL(url, retries=10, sleep_time=0.1): +def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=10): """Retrieves raw data from URL, tries 10 times. Args: url: URL to get data from. retries: Number of times to retry connection. sleep_time: Time in seconds to wait before retrying connection. + timeout: Time in seconds to wait before time out. Returns: None if the data retrieval fails, or the raw data. @@ -100,13 +200,16 @@ def GetDataFromURL(url, retries=10, sleep_time=0.1): for i in range(retries): # Retrieves data from URL. try: - data = urllib.urlopen(url) + data = urllib2.urlopen(url, timeout=timeout) # If retrieval is successful, return the data. if data: return data.read() # If retrieval fails, try after sleep_time second. + except urllib2.URLError: + time.sleep(sleep_time) + continue except IOError: time.sleep(sleep_time) continue @@ -205,7 +308,7 @@ def AddHyperlink(text, link): A string with hyperlink added. """ sanitized_link = cgi.escape(link, quote=True) - sanitized_text = cgi.escape(text) + sanitized_text = cgi.escape(str(text)) return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text) @@ -283,3 +386,77 @@ def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, break return (line_intersection, stack_frame_index_intersection) + + +def MatchListToResultList(matches): + """Convert list of matches to the list of result objects. + + Args: + matches: A list of match objects along with its stack priority and revision + number/git hash + Returns: + A list of result object. + + """ + result_list = [] + + for _, cl, match in matches: + suspected_cl = cl + revision_url = match.revision_url + component_name = match.component_name + author = match.author + reason = match.reason + review_url = match.review_url + reviewers = match.reviewers + # For matches, line content do not exist. + line_content = None + + result = Result(suspected_cl, revision_url, component_name, author, reason, + review_url, reviewers, line_content) + result_list.append(result) + + return result_list + + +def BlameListToResultList(blame_list): + """Convert blame list to the list of result objects. + + Args: + blame_list: A list of blame objects. + + Returns: + A list of result objects. + """ + result_list = [] + + for blame in blame_list: + suspected_cl = blame.revision + revision_url = blame.url + component_name = blame.component_name + author = blame.author + reason = ( + 'The CL changes line %s of file %s from stack %d.' % + (blame.line_number, blame.file, blame.stack_frame_index)) + # Blame object does not have review url and reviewers. + review_url = None + reviewers = None + line_content = blame.content + + result = Result(suspected_cl, revision_url, component_name, author, reason, + review_url, reviewers, line_content) + result_list.append(result) + + return result_list + + +def ResultListToJSON(result_list): + """Converts result list to JSON format. + + Args: + result_list: A list of result objects + + Returns: + A string, JSON format of the result_list. + + """ + return json.dumps([result.ToDictionary() for result in result_list]) diff --git a/tools/findit/stacktrace.py b/tools/findit/stacktrace.py index 5b7f935..1048991 100644 --- a/tools/findit/stacktrace.py +++ b/tools/findit/stacktrace.py @@ -1,4 +1,4 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright (c) 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -13,17 +13,19 @@ class StackFrame(object): Attributes: index: An index of the stack frame. - component: A component this line represents, such as blink, chrome, etc. + component_path: The path of the component this frame represents. + component_name: The name of the component this frame represents. file_name: The name of the file that crashed. function: The function that caused the crash. file_path: The path of the crashed file. crashed_line_number: The line of the file that caused the crash. """ - def __init__(self, stack_frame_index, component, file_name, - function, file_path, crashed_line_number): + def __init__(self, stack_frame_index, component_path, component_name, + file_name, function, file_path, crashed_line_number): self.index = stack_frame_index - self.component = component + self.component_path = component_path + self.component_name = component_name self.file_name = file_name self.function = function self.file_path = file_path @@ -55,8 +57,9 @@ class Stacktrace(object): one callstacks. """ - def __init__(self, stacktrace, build_type): - self.stack_list = [] + def __init__(self, stacktrace, build_type, parsed_deps): + self.stack_list = None + self.parsed_deps = parsed_deps self.ParseStacktrace(stacktrace, build_type) def ParseStacktrace(self, stacktrace, build_type): @@ -72,7 +75,6 @@ class Stacktrace(object): """ # If the passed in string is empty, the object does not represent anything. if not stacktrace: - self.stack_list = None return # Reset the stack list. @@ -256,21 +258,29 @@ class Stacktrace(object): # Normalize the file path so that it can be compared to repository path. file_name = os.path.basename(file_path) - (component, file_path) = crash_utils.NormalizePathLinux(file_path) + (component_path, component_name, file_path) = ( + crash_utils.NormalizePathLinux(file_path, self.parsed_deps)) - # FIXME(jeun): Add other components. - if not (component == 'blink' or component == 'chromium'): + # If this component is not supported, ignore this line. + if not component_path: return None # Return a new stack frame object with the parsed information. - return StackFrame(stack_frame_index, component, file_name, function, - file_path, crashed_line_number) + return StackFrame(stack_frame_index, component_path, component_name, + file_name, function, file_path, crashed_line_number) def __getitem__(self, index): return self.stack_list[index] def GetCrashStack(self): - for callstack in self.stack_list: - # Only the crash stack has the priority 0. - if callstack.priority == 0: - return callstack + """Returns the callstack with the highest priority. + + Crash stack has priority 0, and allocation/freed/other thread stacks + get priority 1. + + Returns: + The highest priority callstack in the stacktrace. + """ + sorted_stacklist = sorted(self.stack_list, + key=lambda callstack: callstack.priority) + return sorted_stacklist[0] |