diff options
-rw-r--r-- | media/tools/bug_hunter/bug_hunter.py | 372 | ||||
-rw-r--r-- | media/tools/bug_hunter/bug_hunter_test.py | 97 | ||||
-rw-r--r-- | media/tools/bug_hunter/bug_hunter_unittest.py | 183 |
3 files changed, 652 insertions, 0 deletions
diff --git a/media/tools/bug_hunter/bug_hunter.py b/media/tools/bug_hunter/bug_hunter.py new file mode 100644 index 0000000..96a4289 --- /dev/null +++ b/media/tools/bug_hunter/bug_hunter.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script queries the Chromium issue tracker and e-mails the results. + +It queries issue tracker using Issue Tracker API. The query +parameters can be specified by command-line arguments. For example, with the +following command: + + 'python bug_hunter.py -q video Status:Unconfirmed OR audio Status:Unconfirmed + -s sender@chromium.org -r receiver@chromium.org -v 100 -u days' + +You will find all 'Unconfirmed' issues created in the last 100 days containing +'video' or 'audio' in their content/comments. The content of these issues are +sent to receiver@chromium.org. + +TODO(imasaki): users can specify the interval as say: "100d" for "100 days". + +There are two limitations in the current implementation of issue tracker API +and UI: +* only outermost OR is valid. For example, the query + 'video OR audio Status:Unconfirmed' is translated into + 'video OR (audio AND Status:Unconfirmed)' +* brackets are not supported. For example, the query + '(video OR audio) Status:Unconfirmed' does not work. + +You need to install following to run this script + gdata-python-client (http://code.google.com/p/gdata-python-client/) + rfc3339.py (http://henry.precheur.org/projects/rfc3339) + +Links: +* Chromium issue tracker: http://code.google.com/p/chromium/issues/list +* Issue tracker API: http://code.google.com/p/support/wiki/IssueTrackerAPI +* Search tips for the issue tracker: + http://code.google.com/p/chromium/issues/searchtips +""" + +import csv +import datetime +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +import logging +from operator import itemgetter +import optparse +import re +import smtplib +import socket +import sys +import urllib + +try: + import gdata.data + import gdata.projecthosting.client +except ImportError: + logging.error('gdata-client needs to be installed. Please install\n' + 'and try again (http://code.google.com/p/gdata-python-client/)') + sys.exit(1) + +try: + import rfc3339 +except ImportError: + logging.error('rfc3339 needs to be installed. Please install\n' + 'and try again (http://henry.precheur.org/projects/rfc3339)') + sys.exit(1) + +# A list of default values. +_DEFAULT_INTERVAL_UNIT = 'hours' +_DEFAULT_ISSUE_ELEMENT_IN_EMAIL = ('author', 'status', 'state', 'content', + 'comments', 'labels', 'urls') +_DEFAULT_PROJECT_NAME = 'chromium' +_DEFAULT_QUERY_TITLE = 'potential media bugs' +_DEFAULT_QUERY = ('video -has:Feature -has:Owner -label:nomedia ' + 'status:Unconfirmed OR audio -has:Feature -has:Owner ' + '-label:nomedia status:Unconfirmed') +_DEFAULT_OUTPUT_FILENAME = 'output.csv' +_DETAULT_MAX_COMMENTS = 1000 + +_INTERVAL_UNIT_CHOICES = ('hours', 'days', 'weeks') + +# URLs in this list are excluded from URL extraction from bug +# content/comments. Each list element should not contain the url ending in +# '/'. For example, the element should be 'http://www.google.com' but not +# 'http://www.google.com/' +_URL_EXCLUSION_LIST = ('http://www.youtube.com/html5', + 'http://www.google.com') +_ISSUE_ELEMENT_IN_EMAIL_CHOICES = ('issue_id', 'author', 'status', 'state', + 'content', 'comments', 'labels', 'urls') + + +def ParseArgs(): + """Returns options dictionary from parsed command line arguments.""" + parser = optparse.OptionParser() + + parser.add_option('-e', '--email-entries', + help=('A comma-separated list of issue entries that are ' + 'sent in the email content. ' + 'Possible strings are %s. Default: %%default.' % + ', '.join(_ISSUE_ELEMENT_IN_EMAIL_CHOICES)), + default=','.join(_DEFAULT_ISSUE_ELEMENT_IN_EMAIL)) + parser.add_option('-l', '--max-comments', + help=('The maximum number of comments returned for each ' + 'issue in a reverse chronological order. ' + 'Default: %default.'), + type='int', default=_DETAULT_MAX_COMMENTS) + parser.add_option('-o', '--output-filename', + help=('Filename for result output in CSV format. ' + 'Default: %default.'), + default=_DEFAULT_OUTPUT_FILENAME, metavar='FILE') + parser.add_option('-p', '--project-name', default=_DEFAULT_PROJECT_NAME, + help='Project name string. Default: %default') + parser.add_option('-q', '--query', default=_DEFAULT_QUERY, + help=('Query to be used to find bugs. The detail can be ' + 'found in Chromium Issue tracker page ' + 'http://code.google.com/p/chromium/issues/searchtips.' + ' Default: "%default".')) + parser.add_option('-r', '--receiver-email-address', + help="Receiver's email address (Required).") + parser.add_option('-s', '--sender-email-address', + help="Sender's email address (Required).") + parser.add_option('-t', '--query-title', + default=_DEFAULT_QUERY_TITLE, dest='query_title', + help=('Query title string used in the subject of the ' + 'result email. Default: %default.')) + parser.add_option('-u', '--interval_unit', default=_DEFAULT_INTERVAL_UNIT, + choices=_INTERVAL_UNIT_CHOICES, + help=('Unit name for |interval_value|. Valid options are ' + '%s. Default: %%default' % ( + ', '.join(_INTERVAL_UNIT_CHOICES)))) + parser.add_option('-v', '--interval-value', type='int', + help=('Interval value to find bugs. ' + 'The script looks for bugs during ' + 'that interval (up to now). This option is used in ' + 'conjunction with |--interval_unit| option. ' + 'The script looks for all bugs if this is not ' + 'specified.')) + + options = parser.parse_args()[0] + + options.email_entries = options.email_entries.split(',') + options.email_entries = [entry for entry in options.email_entries + if entry in _ISSUE_ELEMENT_IN_EMAIL_CHOICES] + if not options.email_entries: + logging.warning('No issue elements in email in option. ' + 'Default email entries will be used.') + options.email_entries = _DEFAULT_ISSUE_ELEMENT_IN_EMAIL + logging.info('The following is the issue elements in email: %s ' + ( + ', '.join(options.email_entries))) + return options + + +class BugHunter(object): + """This class queries issue trackers and e-mails the results.""" + + _ISSUE_SEARCH_LINK_BASE = ('http://code.google.com/p/chromium/issues/list?' + 'can=2&colspec=ID+Pri+Mstone+ReleaseBlock+Area' + '+Feature+Status+Owner+Summary&cells=tiles' + '&sort=-id') + # TODO(imasaki): Convert these into template library. + _EMAIL_ISSUE_TEMPLATE = ('<li><a href="http://crbug.com/%(issue_id)s">' + '%(issue_id)s %(title)s</a> ') + _EMAIL_SUBJECT_TEMPLATE = ('BugHunter found %(n_issues)d %(query_title)s ' + 'bug%(plural)s%(time_msg)s!') + _EMAIL_MSG_TEMPLATE = ('<a href="%(link_base)s&q=%(unquote_query_text)s">' + 'Used Query</a>: %(query_text)s<br><br>' + 'The number of issues : %(n_issues)d<br>' + '<ul>%(issues)s</ul>') + + def __init__(self, options): + """Sets up initial state for Bug Hunter. + + Args: + options: Command-line options. + """ + self._client = gdata.projecthosting.client.ProjectHostingClient() + self._options = options + self._issue_template = BugHunter._EMAIL_ISSUE_TEMPLATE + for entry in options.email_entries: + self._issue_template += '%%(%s)s ' % entry + self._issue_template += '</li>' + + def GetComments(self, issue_id, max_comments): + """Get comments for a issue. + + Args: + issue_id: Issue id for each issue in the issue tracker. + max_comments: The maximum number of comments to be returned. The comments + are returned in a reverse chronological order. + + Returns: + A list of (author name, comments, updated time) tuples. + """ + comments_feed = self._client.get_comments(self._options.project_name, + issue_id) + comment_list = [(comment.content.text, comment.author[0].name.text, + comment.updated.text) + for comment + in list(reversed(comments_feed.entry))[0:max_comments]] + return comment_list + + def GetIssues(self): + """Get issues from issue tracker and return them. + + Returns: + A list of issues in descending order by issue_id. Each element in the + list is a dictionary where the keys are 'issue_id', 'title', 'author', + 'status', 'state', 'content', 'comments', 'labels', 'urls'. + Returns an empty list when there is no matching issue. + """ + min_time = None + if self._options.interval_value: + # Issue Tracker Data API uses RFC 3339 timestamp format, For example: + # 2005-08-09T10:57:00-08:00 + # (http://code.google.com/p/support/wiki/IssueTrackerAPIPython) + delta = datetime.timedelta( + **{self._options.interval_unit: self._options.interval_value}) + dt = datetime.datetime.now() - delta + min_time = rfc3339.rfc3339(dt) + + query = gdata.projecthosting.client.Query(text_query=self._options.query, + max_results=1000, + published_min=min_time) + + feed = self._client.get_issues(self._options.project_name, query=query) + if not feed.entry: + logging.info('No issues available to match query %s.', + self._options.query) + return [] + issues = [] + for entry in feed.entry: + # The fully qualified id is a URL. We just want the number. + issue_id = entry.id.text.split('/')[-1] + if not issue_id.isdigit(): + logging.warning('Issue_id is not correct: %s. Skipping.', issue_id) + continue + label_list = [label.text for label in entry.label] + comments = '' + if 'comments' in self._options.email_entries: + comments = ''.join( + [''.join(comment) if not comment else '' + for comment + in self.GetComments(issue_id, self._options.max_comments)]) + content = BugHunterUtils.StripHTML(entry.content.text) + url_list = list( + set(re.findall(r'(https?://\S+)', content + comments))) + url_list = [url for url in url_list + if not url.rstrip('/') in _URL_EXCLUSION_LIST] + issues.append({'issue_id': issue_id, 'title': entry.title.text, + 'author': entry.author[0].name.text, + 'status': entry.status.text, + 'state': entry.state.text, 'content': content, + 'comments': comments, 'labels': label_list, + 'urls': url_list}) + return sorted(issues, key=itemgetter('issue_id'), reverse=True) + + def _SetUpEmailSubjectMsg(self, issues): + """Set up email subject and its content. + + Args: + issues: Please refer to the return value in GetIssues(). + + Returns: + A tuple of two strings (email subject and email content). + """ + time_msg = '' + if self._options.interval_value: + time_msg = ' in the past %s %s%s' % ( + self._options.interval_value, self._options.interval_unit[:-1], + 's' if self._options.interval_value > 1 else '') + subject = BugHunter._EMAIL_SUBJECT_TEMPLATE % { + 'n_issues': len(issues), + 'query_title': self._options.query_title, + 'plural': 's' if len(issues) > 1 else '', + 'time_msg': time_msg} + content = BugHunter._EMAIL_MSG_TEMPLATE % { + 'link_base': BugHunter._ISSUE_SEARCH_LINK_BASE, + 'unquote_query_text': urllib.quote(self._options.query), + 'query_text': self._options.query, + 'n_issues': len(issues), + 'issues': ''.join( + [self._issue_template % issue for issue in issues])} + return (subject, content) + + def SendResultEmail(self, issues): + """Send result email. + + Args: + issues: Please refer to the return value in GetIssues(). + """ + subject, content = self._SetUpEmailSubjectMsg(issues) + BugHunterUtils.SendEmail( + content, self._options.sender_email_address, + self._options.receiver_email_address, subject) + + def WriteIssuesToFileInCSV(self, issues, filename): + """Write issues to a file in CSV format. + + Args: + issues: Please refer to the return value in GetIssues(). + filename: File name for CSV file. + """ + with open(filename, 'w') as f: + writer = csv.writer(f) + # Write header first. + writer.writerow(issues[0].keys()) + for issue in issues: + writer.writerow( + [unicode(value).encode('utf-8') for value in issue.values()]) + + +class BugHunterUtils(object): + """Utility class for Bug Hunter.""" + + @staticmethod + def StripHTML(string_with_html): + """Strip HTML tags from string. + + Args: + string_with_html: A string with HTML tags. + + Returns: + A string without HTML tags. + """ + return re.sub('<[^<]+?>', '', string_with_html) + + @staticmethod + def SendEmail(message, sender_email_address, receivers_email_address, + subject): + """Send email using localhost's mail server. + + Args: + message: Email message to be sent. + sender_email_address: Sender's email address. + receivers_email_address: Receiver's email address. + subject: Email subject. + + Returns: + True if successful; False, otherwise. + """ + try: + html = '<html><head></head><body>%s</body></html>' % message + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = sender_email_address + msg['To'] = receivers_email_address + msg.attach(MIMEText(html.encode('utf-8'), 'html', _charset='utf-8')) + smtp_obj = smtplib.SMTP('localhost') + smtp_obj.sendmail(sender_email_address, receivers_email_address, + msg.as_string()) + logging.info('Successfully sent email.') + smtp_obj.quit() + return True + except smtplib.SMTPException: + logging.exception('Authentication failed, unable to send email.') + except (socket.gaierror, socket.error, socket.herror): + logging.exception('Unable to send email.') + return False + + +def Main(): + ops = ParseArgs() + bh = BugHunter(ops) + issues = bh.GetIssues() + if issues and ops.sender_email_address and ops.receiver_email_address: + bh.SendResultEmail(issues) + if issues: + bh.WriteIssuesToFileInCSV(issues, ops.output_filename) + + +if __name__ == '__main__': + Main() diff --git a/media/tools/bug_hunter/bug_hunter_test.py b/media/tools/bug_hunter/bug_hunter_test.py new file mode 100644 index 0000000..1b969e5 --- /dev/null +++ b/media/tools/bug_hunter/bug_hunter_test.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Integration tests for bug hunter.""" + +import csv +from optparse import Values +import os +import unittest + +from bug_hunter import BugHunter + +try: + import gdata.data + import gdata.projecthosting.client +except ImportError: + logging.error('gdata-client needs to be installed. Please install\n' + 'and try again (http://code.google.com/p/gdata-python-client/)') + sys.exit(1) + + +class BugHunterTest(unittest.TestCase): + """Unit tests for the Bug Hunter class.""" + _TEST_FILENAME = 'test.csv' + + def _CleanTestFile(self): + if os.path.exists(self._TEST_FILENAME): + os.remove(self._TEST_FILENAME) + + def setUp(self): + self._CleanTestFile() + + def tearDown(self): + self._CleanTestFile() + + def _GetIssue(self): + return [{'issue_id': '0', 'title': 'title', 'author': 'author', + 'status': 'status', 'state': 'state', 'content': 'content', + 'comments': [], 'labels': [], 'urls': []}] + + def _GetDefaultOption(self, set_10_days_ago, query='steps'): + ops = Values() + ops.query = query + if set_10_days_ago: + ops.interval_value = 10 + ops.interval_unit = 'days' + else: + ops.interval_value = None + ops.email_entries = ['comments'] + ops.project_name = 'chromium' + ops.query_title = 'query title' + ops.max_comments = None + return ops + + def testGetIssueReturnedIssue(self): + bh = BugHunter( + self._GetDefaultOption(False, + query=('audio opened-after:2010/10/10' + ' opened-before:2010/10/20'))) + self.assertEquals(len(bh.GetIssues()), 18) + + def testGetIssueReturnedIssueWithStatus(self): + ops = self._GetDefaultOption(False) + ops.query = 'Feature:Media* Status:Unconfirmed' + issues = BugHunter(ops).GetIssues() + for issue in issues: + self.assertEquals(issue['status'], 'Unconfirmed') + + def testGetIssueReturnNoIssue(self): + ops = self._GetDefaultOption(True) + ops.query = 'thisshouldnotmatchpleaseignorethis*' + self.assertFalse(BugHunter(ops).GetIssues()) + + def testGetComments(self): + comments = BugHunter(self._GetDefaultOption(False)).GetComments(100000, 2) + self.assertEquals(len(comments), 2) + expected_comments = [(None, 'rby...@chromium.org', + '2011-10-31T19:54:40.000Z'), + (None, 'backer@chromium.org', + '2011-10-14T13:59:37.000Z')] + self.assertEquals(comments, expected_comments) + + def testWriteIssuesToFileInCSV(self): + ops = self._GetDefaultOption(False) + bh = BugHunter(ops) + bh.WriteIssuesToFileInCSV(self._GetIssue(), self._TEST_FILENAME) + + with open(self._TEST_FILENAME, 'r') as f: + reader = csv.reader(f) + self.assertEquals(reader.next(), ['status', 'content', 'state', + 'issue_id', 'urls', 'title', 'labels', + 'author', 'comments']) + self.assertEquals(reader.next(), ['status', 'content', 'state', '0', + '[]', 'title', '[]', 'author', '[]']) + self.assertRaises(StopIteration, reader.next) diff --git a/media/tools/bug_hunter/bug_hunter_unittest.py b/media/tools/bug_hunter/bug_hunter_unittest.py new file mode 100644 index 0000000..5dc8fd4 --- /dev/null +++ b/media/tools/bug_hunter/bug_hunter_unittest.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unit Tests for bug hunter.""" + +import logging +from optparse import Values +import smtplib +import sys +import unittest + +from bug_hunter import BugHunter +from bug_hunter import BugHunterUtils + +try: + import atom.data + import gdata.data + import gdata.projecthosting.client +except ImportError: + logging.error('gdata-client needs to be installed. Please install\n' + 'and try again (http://code.google.com/p/gdata-python-client/)') + sys.exit(1) + + +class MockClient(object): + """A mock class for gdata.projecthosting.client.ProjectHostingClient. + + Mocking the very simple method invocations for get_issues() and + get_comments(). + """ + + def _CreateIssues(self, n_issues): + feed = gdata.projecthosting.data.IssuesFeed() + for i in xrange(n_issues): + feed.entry.append(gdata.projecthosting.data.IssueEntry( + title=atom.data.Title(text='title'), + content=atom.data.Content(text='http://www.content.com'), + id=atom.data.Id(text='/' + str(i)), + status=gdata.projecthosting.data.Status(text='Unconfirmed'), + state=gdata.projecthosting.data.State(text='open'), + label=[gdata.projecthosting.data.Label('label1')], + author=[atom.data.Author(name=atom.data.Name(text='author'))])) + return feed + + def get_issues(self, project_name, query): + """Get issues using mock object without calling the issue tracker API. + + Based on query argument, this returns the dummy issues. The number of + dummy issues are specified in query.text_query. + + Args: + project_name: A string for project name in the issue tracker. + query: A query object for querying the issue tracker. + + Returns: + A IssuesFeed object that contains a simple test issue. + """ + n_issues = 1 + if query.text_query.isdigit(): + n_issues = int(query.text_query) + return self._CreateIssues(n_issues) + + def get_comments(self, project_name, issue_id): + """Get comments using mock object without calling the issue tracker API. + + Args: + project_name: A string for project name in the issue tracker. + issue_id: Issue_id string. + + Returns: + A CommentsFeed object that contains a simple test comment. + """ + feed = gdata.projecthosting.data.CommentsFeed() + feed.entry = [gdata.projecthosting.data.CommentEntry( + id=atom.data.Id(text='/0'), + content=atom.data.Content(text='http://www.comments.com'), + updated=atom.data.Updated(text='Updated'), + author=[atom.data.Author(name=atom.data.Name(text='cauthor'))])] + return feed + + +class BugHunterUnitTest(unittest.TestCase): + """Unit tests for the Bug Hunter class.""" + + def setUp(self): + self._old_client = gdata.projecthosting.client.ProjectHostingClient + gdata.projecthosting.client.ProjectHostingClient = MockClient + + def tearDown(self): + gdata.projecthosting.client.ProjectHostingClient = self._old_client + + def _GetDefaultOption(self, set_10_days_ago, query='steps'): + ops = Values() + ops.query = query + if set_10_days_ago: + ops.interval_value = 10 + ops.interval_unit = 'days' + else: + ops.interval_value = None + ops.email_entries = ['comments'] + ops.project_name = 'chromium' + ops.query_title = 'query title' + ops.max_comments = None + return ops + + def _GetIssue(self, n_issues): + issues = [] + for i in xrange(n_issues): + issues.append({'issue_id': str(i), 'title': 'title', 'author': 'author', + 'status': 'status', 'state': 'state', + 'content': 'content', 'comments': [], + 'labels': [], 'urls': []}) + return issues + + def testSetUpEmailSubjectMsg(self): + bh = BugHunter(self._GetDefaultOption(False)) + subject, content = bh._SetUpEmailSubjectMsg(self._GetIssue(1)) + self.assertEquals(subject, + 'BugHunter found 1 query title bug!') + self.assertEquals(content, + ('<a href="http://code.google.com/p/chromium/issues/' + 'list?can=2&colspec=ID+Pri+Mstone+ReleaseBlock+Area+' + 'Feature+Status+Owner+Summary&cells=tiles&sort=-id&' + 'q=steps">Used Query</a>: steps<br><br>The number of ' + 'issues : 1<br><ul><li><a href="http://crbug.com/0">0 ' + 'title</a> [] </li></ul>')) + + def testSetUpEmailSubjectMsgMultipleIssues(self): + bh = BugHunter(self._GetDefaultOption(False)) + subject, content = bh._SetUpEmailSubjectMsg(self._GetIssue(2)) + self.assertEquals(subject, + 'BugHunter found 2 query title bugs!') + + def testSetUpEmailSubjectMsgWith10DaysAgoAndAssertSubject(self): + bh = BugHunter(self._GetDefaultOption(True)) + subject, _ = bh._SetUpEmailSubjectMsg(self._GetIssue(1)) + self.assertEquals(subject, + ('BugHunter found 1 query title bug in the past 10 ' + 'days!')) + + def testGetIssuesWithMockClient(self): + bh = BugHunter(self._GetDefaultOption(False, + query=('dummy'))) + expected_issues = [{'issue_id': '0', 'title': 'title', 'author': 'author', + 'status': 'Unconfirmed', 'state': 'open', + 'content': 'http://www.content.com', + 'comments': '', 'labels': ['label1'], + 'urls': ['http://www.content.com']}] + self.assertEquals(expected_issues, bh.GetIssues()) + + +class MockSmtp(object): + """A mock class for SMTP.""" + + def __init__(self, server): + pass + + def sendmail(self, sender_email_address, receivers_email_addresses, + msg): + # TODO(imasaki): Do something here. + return True + + def quit(self): + pass + + +class BugHunterUtilsTest(unittest.TestCase): + """Unit tests for the Bug Hunter utility.""" + + def testStripHTML(self): + self.assertEquals(BugHunterUtils.StripHTML('<p>X</p>'), 'X') + + def testStripHTMLEmpty(self): + self.assertEquals(BugHunterUtils.StripHTML(''), '') + + def testSendEmail(self): + smtplib.SMTP = MockSmtp + self.assertEqual(BugHunterUtils.SendEmail('message', 'sender_email_address', + 'receivers_email_addresses', + 'subject'), + True) |