# Copyright (c) 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import xml.dom.minidom as minidom from xml.parsers.expat import ExpatError import crash_utils from repository_parser_interface import ParserInterface # This number is 6 because each linediff page in src.chromium.org should # contain the following tables: table with revision number, table with actual # diff, table with dropdown menu, table with legend, a border table and a table # containing page information. NUM_TABLES_IN_LINEDIFF_PAGE = 6 # Each of the linediff info should contain 3 tds, one for changed line number, # and two for line contents before/after. NUM_TDS_IN_LINEDIFF_PAGE = 3 class SVNParser(ParserInterface): """Parser for SVN repository using chromium.org, for components in config. Attributes: url_map: A map from component to the urls, where urls are for changelog, revision, line diff and annotation. """ def __init__(self, url_map): self.component_to_urls_map = url_map def ParseChangelog(self, component, range_start, range_end): file_to_revision_map = {} revision_map = {} # Check if the current component is supported by reading the components # parsed from config file. If it is not, fail. url_map = self.component_to_urls_map.get(component) if not url_map: return (revision_map, file_to_revision_map) # Retrieve data from the url, return empty map if fails. revision_range_str = '%s:%s' % (range_start, range_end) url = url_map['changelog_url'] % revision_range_str response = crash_utils.GetDataFromURL(url) if not response: return (revision_map, file_to_revision_map) # Parse xml out of the returned string. If it fails, return empty map. try: xml_revisions = minidom.parseString(response) except ExpatError: return (revision_map, file_to_revision_map) # Iterate through the returned XML object. revisions = xml_revisions.getElementsByTagName('logentry') for revision in revisions: # Create new revision object for each of the revision. revision_object = {} # Set author of the CL. revision_object['author'] = revision.getElementsByTagName( 'author')[0].firstChild.nodeValue # Get the revision number from xml. revision_number = int(revision.getAttribute('revision')) # Iterate through the changed paths in the CL. paths = revision.getElementsByTagName('paths') if paths: for changed_path in paths[0].getElementsByTagName('path'): # Get path and file change type from the xml. file_path = changed_path.firstChild.nodeValue file_change_type = changed_path.getAttribute('action') if file_path.startswith('/trunk/'): file_path = file_path[len('/trunk/'):] # Add file to the map. if file_path not in file_to_revision_map: file_to_revision_map[file_path] = [] file_to_revision_map[file_path].append( (revision_number, file_change_type)) # Set commit message of the CL. revision_object['message'] = revision.getElementsByTagName('msg')[ 0].firstChild.nodeValue # Set url of this CL. revision_url = url_map['revision_url'] % revision_number revision_object['url'] = revision_url # Add this CL to the revision map. revision_map[revision_number] = revision_object return (revision_map, file_to_revision_map) def ParseLineDiff(self, path, component, file_change_type, revision_number): changed_line_numbers = [] changed_line_contents = [] url_map = self.component_to_urls_map.get(component) if not url_map: return (None, None, None) # If the file is added (not modified), treat it as if it is not changed. backup_url = url_map['revision_url'] % revision_number if file_change_type == 'A': return (backup_url, changed_line_numbers, changed_line_contents) # Retrieve data from the url. If no data is retrieved, return empty lists. url = url_map['diff_url'] % (path, revision_number - 1, revision_number, revision_number) data = crash_utils.GetDataFromURL(url) if not data: return (backup_url, changed_line_numbers, changed_line_contents) line_diff_html = minidom.parseString(data) tables = line_diff_html.getElementsByTagName('table') # If there are not NUM_TABLES tables in the html page, there should be an # error in the html page. if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: return (backup_url, changed_line_numbers, changed_line_contents) # Diff content is in the second table. Each line of the diff content # is in