summaryrefslogtreecommitdiffstats
path: root/tools/buildbot/pylibs/buildbot/changes/bonsaipoller.py
blob: 2e319bb4c4e40a3cfe7a19f4fd775f336541d055 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import time
from xml.dom import minidom

from twisted.python import log, failure
from twisted.internet import reactor
from twisted.internet.task import LoopingCall
from twisted.web.client import getPage

from buildbot.changes import base, changes

class InvalidResultError(Exception):
    def __init__(self, value="InvalidResultError"):
        self.value = value
    def __str__(self):
        return repr(self.value)

class EmptyResult(Exception):
    pass

class NoMoreCiNodes(Exception):
    pass

class NoMoreFileNodes(Exception):
    pass

class BonsaiResult:
    """I hold a list of CiNodes"""
    def __init__(self, nodes=[]):
        self.nodes = nodes

    def __cmp__(self, other):
        if len(self.nodes) != len(other.nodes):
            return False
        for i in range(len(self.nodes)):
            if self.nodes[i].log != other.nodes[i].log \
              or self.nodes[i].who != other.nodes[i].who \
              or self.nodes[i].date != other.nodes[i].date \
              or len(self.nodes[i].files) != len(other.nodes[i].files):
                return -1

	        for j in range(len(self.nodes[i].files)):
	            if self.nodes[i].files[j].revision \
	              != other.nodes[i].files[j].revision \
	              or self.nodes[i].files[j].filename \
	              != other.nodes[i].files[j].filename:
	                return -1

        return 0

class CiNode:
    """I hold information baout one <ci> node, including a list of files"""
    def __init__(self, log="", who="", date=0, files=[]):
        self.log = log
        self.who = who
        self.date = date
        self.files = files

class FileNode:
    """I hold information about one <f> node"""
    def __init__(self, revision="", filename=""):
        self.revision = revision
        self.filename = filename

class BonsaiParser:
    """I parse the XML result from a bonsai cvsquery."""

    def __init__(self, data):
        try:
        # this is a fix for non-ascii characters
        # because bonsai does not give us an encoding to work with
        # it impossible to be 100% sure what to decode it as but latin1 covers
        # the broadest base
            data = data.decode("latin1")
            data = data.encode("ascii", "replace")
            self.dom = minidom.parseString(data)
            log.msg(data)
        except:
            raise InvalidResultError("Malformed XML in result")

        self.ciNodes = self.dom.getElementsByTagName("ci")
        self.currentCiNode = None # filled in by _nextCiNode()
        self.fileNodes = None # filled in by _nextCiNode()
        self.currentFileNode = None # filled in by _nextFileNode()
        self.bonsaiResult = self._parseData()

    def getData(self):
        return self.bonsaiResult

    def _parseData(self):
        """Returns data from a Bonsai cvsquery in a BonsaiResult object"""
        nodes = []
        try:
            while self._nextCiNode():
                files = []
                try:
                    while self._nextFileNode():
                        files.append(FileNode(self._getRevision(),
                                              self._getFilename()))
                except NoMoreFileNodes:
                    pass
                except InvalidResultError:
                    raise
                cinode = CiNode(self._getLog(), self._getWho(),
                                self._getDate(), files)
                # hack around bonsai xml output bug for empty check-in comments
                if not cinode.log and nodes and \
                        not nodes[-1].log and \
                        cinode.who == nodes[-1].who and \
                        cinode.date == nodes[-1].date:
                    nodes[-1].files += cinode.files
                else:
                    nodes.append(cinode)

        except NoMoreCiNodes:
            pass
        except InvalidResultError, EmptyResult:
            raise

        return BonsaiResult(nodes)


    def _nextCiNode(self):
        """Iterates to the next <ci> node and fills self.fileNodes with
           child <f> nodes"""
        try:
            self.currentCiNode = self.ciNodes.pop(0)
            if len(self.currentCiNode.getElementsByTagName("files")) > 1:
                raise InvalidResultError("Multiple <files> for one <ci>")

            self.fileNodes = self.currentCiNode.getElementsByTagName("f")
        except IndexError:
            # if there was zero <ci> nodes in the result
            if not self.currentCiNode:
                raise EmptyResult
            else:
                raise NoMoreCiNodes

        return True

    def _nextFileNode(self):
        """Iterates to the next <f> node"""
        try:
            self.currentFileNode = self.fileNodes.pop(0)
        except IndexError:
            raise NoMoreFileNodes

        return True

    def _getLog(self):
        """Returns the log of the current <ci> node"""
        logs = self.currentCiNode.getElementsByTagName("log")
        if len(logs) < 1:
            raise InvalidResultError("No log present")
        elif len(logs) > 1:
            raise InvalidResultError("Multiple logs present")

        # catch empty check-in comments
        if logs[0].firstChild:
            return logs[0].firstChild.data
        return ''

    def _getWho(self):
        """Returns the e-mail address of the commiter"""
        # convert unicode string to regular string
        return str(self.currentCiNode.getAttribute("who"))

    def _getDate(self):
        """Returns the date (unix time) of the commit"""
        # convert unicode number to regular one
        try:
            commitDate = int(self.currentCiNode.getAttribute("date"))
        except ValueError:
            raise InvalidResultError

        return commitDate

    def _getFilename(self):
        """Returns the filename of the current <f> node"""
        try:
            filename = self.currentFileNode.firstChild.data
        except AttributeError:
            raise InvalidResultError("Missing filename")

        return filename

    def _getRevision(self):
        return self.currentFileNode.getAttribute("rev")


class BonsaiPoller(base.ChangeSource):
    """This source will poll a bonsai server for changes and submit
    them to the change master."""

    compare_attrs = ["bonsaiURL", "pollInterval", "tree",
                     "module", "branch", "cvsroot"]

    parent = None # filled in when we're added
    loop = None
    volatile = ['loop']
    working = False

    def __init__(self, bonsaiURL, module, branch, tree="default",
                 cvsroot="/cvsroot", pollInterval=30):
        """
        @type   bonsaiURL:      string
        @param  bonsaiURL:      The base URL of the Bonsai server
                                (ie. http://bonsai.mozilla.org)
        @type   module:         string
        @param  module:         The module to look for changes in. Commonly
                                this is 'all'
        @type   branch:         string
        @param  branch:         The branch to look for changes in. This must
                                match the
                                'branch' option for the Scheduler.
        @type   tree:           string
        @param  tree:           The tree to look for changes in. Commonly this
                                is 'all'
        @type   cvsroot:        string
        @param  cvsroot:        The cvsroot of the repository. Usually this is
                                '/cvsroot'
        @type   pollInterval:   int
        @param  pollInterval:   The time (in seconds) between queries for
                                changes
        """

        self.bonsaiURL = bonsaiURL
        self.module = module
        self.branch = branch
        self.tree = tree
        self.cvsroot = cvsroot
        self.pollInterval = pollInterval
        self.lastChange = time.time()
        self.lastPoll = time.time()

    def startService(self):
        self.loop = LoopingCall(self.poll)
        base.ChangeSource.startService(self)

        reactor.callLater(0, self.loop.start, self.pollInterval)

    def stopService(self):
        self.loop.stop()
        return base.ChangeSource.stopService(self)

    def describe(self):
        str = ""
        str += "Getting changes from the Bonsai service running at %s " \
                % self.bonsaiURL
        str += "<br>Using tree: %s, branch: %s, and module: %s" % (self.tree, \
                self.branch, self.module)
        return str

    def poll(self):
        if self.working:
            log.msg("Not polling Bonsai because last poll is still working")
        else:
            self.working = True
            d = self._get_changes()
            d.addCallback(self._process_changes)
            d.addCallbacks(self._finished_ok, self._finished_failure)
        return

    def _finished_ok(self, res):
        assert self.working
        self.working = False

        # check for failure -- this is probably never hit but the twisted docs
        # are not clear enough to be sure. it is being kept "just in case"
        if isinstance(res, failure.Failure):
            log.msg("Bonsai poll failed: %s" % res)
        return res

    def _finished_failure(self, res):
        log.msg("Bonsai poll failed: %s" % res)
        assert self.working
        self.working = False
        return None # eat the failure

    def _make_url(self):
        args = ["treeid=%s" % self.tree, "module=%s" % self.module,
                "branch=%s" % self.branch, "branchtype=match",
                "sortby=Date", "date=explicit",
                "mindate=%d" % self.lastChange,
                "maxdate=%d" % int(time.time()),
                "cvsroot=%s" % self.cvsroot, "xml=1"]
        # build the bonsai URL
        url = self.bonsaiURL
        url += "/cvsquery.cgi?"
        url += "&".join(args)

        return url

    def _get_changes(self):
        url = self._make_url()
        log.msg("Polling Bonsai tree at %s" % url)

        self.lastPoll = time.time()
        # get the page, in XML format
        return getPage(url, timeout=self.pollInterval)

    def _process_changes(self, query):
        try:
            bp = BonsaiParser(query)
            result = bp.getData()
        except InvalidResultError, e:
            log.msg("Could not process Bonsai query: " + e.value)
            return
        except EmptyResult:
            return

        for cinode in result.nodes:
            files = [file.filename + ' (revision '+file.revision+')'
                     for file in cinode.files]
            c = changes.Change(who = cinode.who,
                               files = files,
                               comments = cinode.log,
                               when = cinode.date,
                               branch = self.branch)
            self.parent.addChange(c)
            self.lastChange = self.lastPoll