summaryrefslogtreecommitdiffstats
path: root/tools/code_coverage/process_coverage.py
blob: e42e95b96bbce56409c01be11bd9bb664b151bb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
#!/usr/bin/python2.4
#
# Copyright 2008, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#        * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#        * Redistributions in binary form must reproduce the above
#     copyright notice, this list of conditions and the following disclaimer
#     in the documentation and/or other materials provided with the
#     distribution.
#        * Neither the name of Google Inc. nor the names of its
#     contributors may be used to endorse or promote products derived from
#     this software without specific prior written permission.
#
#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
#     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#     LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
#     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


"""Script to clean the lcov files and convert it to HTML

TODO(niranjan): Add usage information here
"""


import optparse
import os
import shutil
import subprocess
import sys
import tempfile
import time
import urllib2


# These are source files that were generated during compile time. We want to
# remove references to these files from the lcov file otherwise genhtml will
# throw an error.
win32_srcs_exclude = ['parse.y',
                      'xpathgrammar.cpp',
                      'cssgrammar.cpp',
                      'csspropertynames.gperf']

# Number of lines of a new coverage data set 
# to send at a time to the dashboard.
POST_CHUNK_SIZE = 50

# Number of post request failures to allow before exiting.
MAX_FAILURES = 5

def CleanPathNames(dir):
  """Clean the pathnames of the HTML generated by genhtml.

  This method is required only for code coverage on Win32. Due to a known issue
  with reading from CIFS shares mounted on Linux, genhtml appends a ^M to every
  file name it reads from the Windows share, causing corrupt filenames in
  genhtml's output folder.

  Args:
    dir: Output folder of the genhtml output.

  Returns:
    None
  """
  # Stip off the ^M characters that get appended to the file name
  for dirpath, dirname, filenames in os.walk(dir):
    for file in filenames:
      file_clean = file.replace('\r', '')
      if file_clean != file:
        os.rename(file, file_clean)


def GenerateHtml(lcov_path, dash_root):
  """Runs genhtml to convert lcov data to human readable HTML.

  This script expects the LCOV file name to be in the format:
  chrome_<platform>_<revision#>.lcov.
  This method parses the file name and then sets up the correct folder
  hierarchy for the coverage data and then runs genhtml to get the actual HTML
  formatted coverage data.

  Args:
    lcov_path: Path of the lcov data file.
    dash_root: Root location of the dashboard.

  Returns:
    Code coverage percentage on sucess.
    None on failure.
  """
  # Parse the LCOV file name.
  filename = os.path.basename(lcov_path).split('.')[0]
  buffer = filename.split('_')
  dash_root = dash_root.rstrip('/') # Remove trailing '/'

  # Set up correct folder heirarchy in the dashboard root
  # TODO(niranjan): Check the formatting using a regexp
  if len(buffer) >= 3: # Check if filename has right formatting
    platform = buffer[len(buffer) - 2]
    revision = buffer[len(buffer) - 1]
    if os.path.exists(os.path.join(dash_root, platform)) == False:
      os.mkdir(os.path.join(dash_root, platform))
    output_dir = os.path.join(dash_root, platform, revision)
    os.mkdir(output_dir)
  else:
    # TODO(niranjan): Add failure logging here.
    return None # File not formatted correctly

  # Run genhtml
  os.system('/usr/bin/genhtml -o %s %s' % (output_dir, lcov_path))
  # TODO(niranjan): Check the exit status of the genhtml command.
  # TODO(niranjan): Parse the stdout and return coverage percentage.
  CleanPathNames(output_dir)
  return 'dummy' # TODO(niranjan): Return actual percentage.


def CleanWin32Lcov(lcov_path, src_root):
  """Cleanup the lcov data generated on Windows.

  This method fixes up the paths inside the lcov file from the Win32 specific
  paths to the actual paths of the mounted CIFS share. The lcov files generated
  on Windows have the following format:

  SF:c:\chrome_src\src\skia\sgl\skscan_antihair.cpp
  DA:97,0
  DA:106,0
  DA:107,0
  DA:109,0
  ...
  end_of_record

  This method changes the source-file (SF) lines to a format compatible with
  genhtml on Linux by fixing paths. This method also removes references to
  certain dynamically generated files to be excluded from the code ceverage.

  Args:
    lcov_path: Path of the Win32 lcov file to be cleaned.
    src_root: Location of the source and symbols dir.
  Returns:
    None
  """
  strip_flag = False
  lcov = open(lcov_path, 'r')
  loc_csv_file = open(lcov_path + '.csv', 'w')
  (tmpfile_id, tmpfile_name) = tempfile.mkstemp()
  tmpfile = open(tmpfile_name, 'w')
  src_root = src_root.rstrip('/')       # Remove trailing '/'
  for line in lcov:
    if line.startswith('SF'):
      # We want to exclude certain auto-generated files otherwise genhtml will
      # fail to convert lcov to HTML.
      for exp in win32_srcs_exclude:
        if line.rfind(exp) != -1:
          strip_flag = True # Indicates that we want to remove this section

      # Now we normalize the paths
      # e.g. Change SF:c:\foo\src\... to SF:/chrome_src/...
      parse_buffer = line.split(':')
      buffer = '%s:%s%s' % (parse_buffer[0],
                            src_root,
                            parse_buffer[2])
      buffer = buffer.replace('\\', '/')
      line = buffer.replace('\r', '')

      # We want an accurate count of the lines of code in a given file so that
      # we can estimate the code coverage perscentage accurately. We use a
      # third party script cloc.pl which gives that count and then just parse
      # its command line output to filter out the other unnecessary data.
      # TODO(niranjan): Find out a better way of doing this.
      buffer = buffer.lstrip('SF:')
      file_for_loc = buffer.replace('\r\n', '')
      # TODO(niranjan): Add a check to see if cloc is present on the machine.
      command = ["perl",
                 "cloc.pl",
                 file_for_loc]
      output = subprocess.Popen(command,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT).communicate()[0]
      if output.rfind('error:'):
        return None
      
      tmp_buf1 = output.split('=')
      tmp_buf2 = tmp_buf1[len(tmp_buf1) - 2].split('x')[0].split(' ')
      loc = tmp_buf2[len(tmp_buf2) - 2]
      loc_csv_file.write('%s,%s\r\n' % (file_for_loc, loc))

    # Write to the temp file if the section to write is valid
    if strip_flag == False:
      # Also write this to the 'clean' LCOV file
      tmpfile.write('%s' % (line))

    # Reset the strip flag
    if line.endswith('end_of_record'):
      strip_flag = False

  # Close the files and replace the lcov file by the 'clean' tmpfile
  tmpfile.close()
  lcov.close()
  loc_csv_file.close()
  shutil.move(tmpfile_name, lcov_path)


def ParseCoverageDataForDashboard(lcov_path):
  """Parse code coverage data into coverage results per source node.

  Use lcov and linecount data to create a map of source nodes to
  corresponding total and tested line counts.

  Args:
    lcov_path: File path to lcov coverage data.
 
  Returns:
    List of strings with comma separated source node and coverage.
  """
  results = {}
  linecount_path = lcov_path + '.csv'
  assert(os.path.exists(linecount_path),
         'linecount csv does not exist at: %s' % linecount_path)
  csv_file = open(linecount_path, 'r')
  linecounts = csv_file.readlines()
  csv_file.close()
  lcov_file = open(lcov_path, 'r')
  srcfile_index = 0
  for line in lcov_file:
    line = line.strip()

    # Set the current srcfile name for a new src file declaration.
    if line[:len('SF:')] == 'SF:':
      instrumented_set = {}
      executed_set = {}
      srcfile_name = line[len('SF:'):]
    
    # Mark coverage data points hashlist style for the current src file.
    if line[:len('DA:')] == 'DA:':
      line_info = line[len('DA:'):].split(',')
      assert(len(line_info) == 2, 'DA: line format unexpected - %s' % line)
      (line_num, line_was_executed) = line_info
      instrumented_set[line_num] = True
      # line_was_executed is '0' or '1'
      if int(line_was_executed):
        executed_set[line_num] = True
    
    # Update results for the current src file at record end.
    if line == 'end_of_record':
      instrumented = len(instrumented_set.keys())
      executed = len(executed_set.keys())
      parent_directory = srcfile_name[:srcfile_name.rfind('/') + 1]
      linecount_point = linecounts[srcfile_index].strip().split(',')
      assert(len(linecount_point) == 2, 
             'lintcount format unexpected - %s' % linecounts[srcfile_index])
      (linecount_path, linecount_count) = linecount_point
      srcfile_index += 1
      
      # Sanity check that path names in the lcov and linecount are lined up.
      if linecount_path[-10:] != srcfile_name[-10:]:
        print 'NAME MISMATCH: %s :: %s' % (srcfile_name, linecount_path)
      if instrumented > int(linecount_count):
        linecount_count = instrumented

      # Keep counts the same way that it is done in the genhtml utility.
      # Count the coverage of a file towards the file,
      # the parent directory, and the source root.
      AddResults(results, srcfile_name, int(linecount_count), executed)
      AddResults(results, parent_directory, int(linecount_count), executed)
      AddResults(results, '/', instrumented, executed)

  lcov_file.close()
  keys = results.keys()
  keys.sort()
  # The first key (sorted) will be the base directory '/'
  # but its full path may be '/mnt/chrome_src/src/'
  # using this offset will ignore the part '/mnt/chrome_src/src'.
  # Offset is the last '/' that isn't the last character for the 
  # first directory name in results (position 1 in keys).
  offset = len(keys[1][:keys[1][:-1].rfind('/')])
  lines = []
  for key in keys:
    if len(key) > offset:
      node_path = key[offset:]
    else:
      node_path = key
    (total, covered) = results[key]
    percent = float(covered) * 100 / total
    lines.append('%s,%.2f' % (node_path, percent))
  return lines


def AddResults(results, location, lines_total, lines_executed):
  """Add resulting line tallies to a location's total.
  
  Args:
    results: Map of node location to corresponding coverage data.
    location: Source node string.
    lines_total: Number of lines to add to the total count for this node.
    lines_executed: Number of lines to add to the executed count for this node.
  """
  if results.has_key(location):
    (i, e) = results[location]
    results[location] = (i + lines_total, e + lines_executed)
  else:
    results[location] = (lines_total, lines_executed)


def PostResultsToDashboard(lcov_path, results, post_url):
  """Post coverage results to coverage dashboard.
  
  Args:
    lcov_path: File path for lcov data in the expected format:
        <project>_<platform>_<cl#>.coverage.lcov
    results: string list in the appropriate posting format.
  """
  project_platform_cl = lcov_path.split('.')[0].split('_')
  assert(len(project_platform_cl) == 3,
         'lcov_path not in expected format: %s' % lcov_path)
  (project, platform, cl_string) = project_platform_cl
  project_name = '%s-%s' % (project, platform)
  url = '%s/newdata.do?project=%s&cl=%s' % (post_url, project_name, cl_string)

  # Send POSTs of POST_CHUNK_SIZE lines of the result set until
  # there is no more data and last_loop is set to True.
  last_loop = False
  cur_line = 0
  while not last_loop:
    body = '\n'.join(results[cur_line:cur_line + POST_CHUNK_SIZE])
    cur_line += POST_CHUNK_SIZE
    last_loop = (cur_line >= len(results))
    req = urllib2.Request('%s&last=%s' % (url, str(last_loop)), body)
    req.add_header('Content-Type', 'text/plain')
    SendPost(req)


# Global counter for the current number of request failures.
num_fails = 0

def SendPost(req):
  """Execute a post request and retry for up to MAX_FAILURES.
  
  Args:
    req: A urllib2 request object.

  Raises:
    URLError: If urlopen throws after too many retries.
    HTTPError: If urlopen throws after too many retries.
  """
  global num_fails
  try:
    urllib2.urlopen(req)
    # Reset failure count.
    num_fails = 0
  except (urllib2.URLError, urllib2.HTTPError):
    num_fails += 1
    if num_fails < MAX_FAILURES:
      print 'fail, retrying (%d)' % num_fails
      time.sleep(5)
      SendPost(req)
    else:
      print 'POST request exceeded allowed retries.'
      raise


def main():
  if sys.platform[:5] != 'linux': # Run this only on Linux
    print 'This script is supported only on Linux'
    os.exit(1)

  # Command line parsing
  parser = optparse.OptionParser()
  parser.add_option('-p',
                    '--platform',
                    dest='platform',
                    default=None,
                    help=('Platform that the locv file was generated on. Must'
                          'be one of {win32, linux2, macosx}'))
  parser.add_option('-s',
                    '--source',
                    dest='src_dir',
                    default=None,
                    help='Path to the source code and symbols')
  parser.add_option('-d',
                    '--dash_root',
                    dest='dash_root',
                    default=None,
                    help='Root directory for the dashboard')
  parser.add_option('-l',
                    '--lcov',
                    dest='lcov_path',
                    default=None,
                    help='Location of the LCOV file to process')
  parser.add_option('-u',
                    '--post_url',
                    dest='post_url',
                    default=None,
                    help='Base URL of the coverage dashboard')
  (options, args) = parser.parse_args()

  if options.platform == None:
    parser.error('Platform not specified')
  if options.lcov_path == None:
    parser.error('lcov file path not specified')
  if options.src_dir == None:
    parser.error('Source directory not specified')
  if options.dash_root == None:
    parser.error('Dashboard root not specified')
  if options.post_url == None:
    parser.error('Post URL not specified')
  if options.platform == 'win32':
    CleanWin32Lcov(options.lcov_path, options.src_dir)
    percent = GenerateHtml(options.lcov_path, options.dash_root)
    if percent == None:
      # TODO(niranjan): Add logging.
      print 'Failed to generate code coverage'
      os.exit(1)
    else:
      # TODO(niranjan): Do something with the code coverage numbers
      pass
  else:
    print 'Unsupported platform'
    os.exit(1)
  
  # Prep coverage results for dashboard and post new set.
  parsed_data = ParseCoverageDataForDashboard(options.lcov_path)
  PostResultsToDashboard(options.lcov_path, parsed_data, options.post_url)


if __name__ == '__main__':
  main()