summaryrefslogtreecommitdiffstats
path: root/tools/code_coverage/croc.py
blob: 1b9908a5f890b5a6210a486fd2af1e2d1b5de27f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Crocodile - compute coverage numbers for Chrome coverage dashboard."""

import optparse
import os
import platform
import re
import sys
import croc_html
import croc_scan


class CrocError(Exception):
  """Coverage error."""


class CrocStatError(CrocError):
  """Error evaluating coverage stat."""

#------------------------------------------------------------------------------


class CoverageStats(dict):
  """Coverage statistics."""

  # Default dictionary values for this stat.
  DEFAULTS = { 'files_covered': 0,
               'files_instrumented': 0,
               'files_executable': 0,
               'lines_covered': 0,
               'lines_instrumented': 0,
               'lines_executable': 0 }

  def Add(self, coverage_stats):
    """Adds a contribution from another coverage stats dict.

    Args:
      coverage_stats: Statistics to add to this one.
    """
    for k, v in coverage_stats.iteritems():
      if k in self:
        self[k] += v
      else:
        self[k] = v

  def AddDefaults(self):
    """Add some default stats which might be assumed present.

    Do not clobber if already present.  Adds resilience when evaling a
    croc file which expects certain stats to exist."""
    for k, v in self.DEFAULTS.iteritems():
      if not k in self:
        self[k] = v

#------------------------------------------------------------------------------


class CoveredFile(object):
  """Information about a single covered file."""

  def __init__(self, filename, **kwargs):
    """Constructor.

    Args:
      filename: Full path to file, '/'-delimited.
      kwargs: Keyword args are attributes for file.
    """
    self.filename = filename
    self.attrs = dict(kwargs)

    # Move these to attrs?
    self.local_path = None      # Local path to file
    self.in_lcov = False        # Is file instrumented?

    # No coverage data for file yet
    self.lines = {}     # line_no -> None=executable, 0=instrumented, 1=covered
    self.stats = CoverageStats()

  def UpdateCoverage(self):
    """Updates the coverage summary based on covered lines."""
    exe = instr = cov = 0
    for l in self.lines.itervalues():
      exe += 1
      if l is not None:
        instr += 1
        if l == 1:
          cov += 1

    # Add stats that always exist
    self.stats = CoverageStats(lines_executable=exe,
                               lines_instrumented=instr,
                               lines_covered=cov,
                               files_executable=1)

    # Add conditional stats
    if cov:
      self.stats['files_covered'] = 1
    if instr or self.in_lcov:
      self.stats['files_instrumented'] = 1

#------------------------------------------------------------------------------


class CoveredDir(object):
  """Information about a directory containing covered files."""

  def __init__(self, dirpath):
    """Constructor.

    Args:
      dirpath: Full path of directory, '/'-delimited.
    """
    self.dirpath = dirpath

    # List of covered files directly in this dir, indexed by filename (not
    # full path)
    self.files = {}

    # List of subdirs, indexed by filename (not full path)
    self.subdirs = {}

    # Dict of CoverageStats objects summarizing all children, indexed by group
    self.stats_by_group = {'all': CoverageStats()}
    # TODO: by language

  def GetTree(self, indent=''):
    """Recursively gets stats for the directory and its children.

    Args:
      indent: indent prefix string.

    Returns:
      The tree as a string.
    """
    dest = []

    # Compile all groupstats
    groupstats = []
    for group in sorted(self.stats_by_group):
      s = self.stats_by_group[group]
      if not s.get('lines_executable'):
        continue        # Skip groups with no executable lines
      groupstats.append('%s:%d/%d/%d' % (
          group, s.get('lines_covered', 0),
          s.get('lines_instrumented', 0),
          s.get('lines_executable', 0)))

    outline = '%s%-30s   %s' % (indent,
                                os.path.split(self.dirpath)[1] + '/',
                                '   '.join(groupstats))
    dest.append(outline.rstrip())

    for d in sorted(self.subdirs):
      dest.append(self.subdirs[d].GetTree(indent=indent + '  '))

    return '\n'.join(dest)

#------------------------------------------------------------------------------


class Coverage(object):
  """Code coverage for a group of files."""

  def __init__(self):
    """Constructor."""
    self.files = {}             # Map filename --> CoverageFile
    self.root_dirs = []         # (root, altname)
    self.rules = []             # (regexp, dict of RHS attrs)
    self.tree = CoveredDir('')
    self.print_stats = []       # Dicts of args to PrintStat()

    # Functions which need to be replaced for unit testing
    self.add_files_walk = os.walk         # Walk function for AddFiles()
    self.scan_file = croc_scan.ScanFile   # Source scanner for AddFiles()

  def CleanupFilename(self, filename):
    """Cleans up a filename.

    Args:
      filename: Input filename.

    Returns:
      The cleaned up filename.

    Changes all path separators to '/'.
    Makes relative paths (those starting with '../' or './' absolute.
    Replaces all instances of root dirs with alternate names.
    """
    # Change path separators
    filename = filename.replace('\\', '/')

    # Windows doesn't care about case sensitivity.
    if platform.system() in ['Windows', 'Microsoft']:
      filename = filename.lower()

    # If path is relative, make it absolute
    # TODO: Perhaps we should default to relative instead, and only understand
    # absolute to be files starting with '\', '/', or '[A-Za-z]:'?
    if filename.split('/')[0] in ('.', '..'):
      filename = os.path.abspath(filename).replace('\\', '/')

    # Replace alternate roots
    for root, alt_name in self.root_dirs:
      # Windows doesn't care about case sensitivity.
      if platform.system() in ['Windows', 'Microsoft']:
        root = root.lower()
      filename = re.sub('^' + re.escape(root) + '(?=(/|$))',
                        alt_name, filename)
    return filename

  def ClassifyFile(self, filename):
    """Applies rules to a filename, to see if we care about it.

    Args:
      filename: Input filename.

    Returns:
      A dict of attributes for the file, accumulated from the right hand sides
          of rules which fired.
    """
    attrs = {}

    # Process all rules
    for regexp, rhs_dict in self.rules:
      if regexp.match(filename):
        attrs.update(rhs_dict)

    return attrs
    # TODO: Files can belong to multiple groups?
    #   (test/source)
    #   (mac/pc/win)
    #   (media_test/all_tests)
    #   (small/med/large)
    # How to handle that?

  def AddRoot(self, root_path, alt_name='_'):
    """Adds a root directory.

    Args:
      root_path: Root directory to add.
      alt_name: If specified, name of root dir.  Otherwise, defaults to '_'.

    Raises:
      ValueError: alt_name was blank.
    """
    # Alt name must not be blank.  If it were, there wouldn't be a way to
    # reverse-resolve from a root-replaced path back to the local path, since
    # '' would always match the beginning of the candidate filename, resulting
    # in an infinite loop.
    if not alt_name:
      raise ValueError('AddRoot alt_name must not be blank.')

    # Clean up root path based on existing rules
    self.root_dirs.append([self.CleanupFilename(root_path), alt_name])

  def AddRule(self, path_regexp, **kwargs):
    """Adds a rule.

    Args:
      path_regexp: Regular expression to match for filenames.  These are
          matched after root directory replacement.
      kwargs: Keyword arguments are attributes to set if the rule applies.

    Keyword arguments currently supported:
      include: If True, includes matches; if False, excludes matches.  Ignored
          if None.
      group: If not None, sets group to apply to matches.
      language: If not None, sets file language to apply to matches.
    """

    # Compile regexp ahead of time
    self.rules.append([re.compile(path_regexp), dict(kwargs)])

  def GetCoveredFile(self, filename, add=False):
    """Gets the CoveredFile object for the filename.

    Args:
      filename: Name of file to find.
      add: If True, will add the file if it's not present.  This applies the
          transformations from AddRoot() and AddRule(), and only adds the file
          if a rule includes it, and it has a group and language.

    Returns:
      The matching CoveredFile object, or None if not present.
    """
    # Clean filename
    filename = self.CleanupFilename(filename)

    # Check for existing match
    if filename in self.files:
      return self.files[filename]

    # File isn't one we know about.  If we can't add it, give up.
    if not add:
      return None

    # Check rules to see if file can be added.  Files must be included and
    # have a group and language.
    attrs = self.ClassifyFile(filename)
    if not (attrs.get('include')
            and attrs.get('group')
            and attrs.get('language')):
      return None

    # Add the file
    f = CoveredFile(filename, **attrs)
    self.files[filename] = f

    # Return the newly covered file
    return f

  def RemoveCoveredFile(self, cov_file):
    """Removes the file from the covered file list.

    Args:
      cov_file: A file object returned by GetCoveredFile().
    """
    self.files.pop(cov_file.filename)

  def ParseLcovData(self, lcov_data):
    """Adds coverage from LCOV-formatted data.

    Args:
      lcov_data: An iterable returning lines of data in LCOV format.  For
          example, a file or list of strings.
    """
    cov_file = None
    cov_lines = None
    for line in lcov_data:
      line = line.strip()
      if line.startswith('SF:'):
        # Start of data for a new file; payload is filename
        cov_file = self.GetCoveredFile(line[3:], add=True)
        if cov_file:
          cov_lines = cov_file.lines
          cov_file.in_lcov = True       # File was instrumented
      elif not cov_file:
        # Inside data for a file we don't care about - so skip it
        pass
      elif line.startswith('DA:'):
        # Data point - that is, an executable line in current file
        line_no, is_covered = map(int, line[3:].split(','))
        if is_covered:
          # Line is covered
          cov_lines[line_no] = 1
        elif cov_lines.get(line_no) != 1:
          # Line is not covered, so track it as uncovered
          cov_lines[line_no] = 0
      elif line == 'end_of_record':
        cov_file.UpdateCoverage()
        cov_file = None
      # (else ignore other line types)

  def ParseLcovFile(self, input_filename):
    """Adds coverage data from a .lcov file.

    Args:
      input_filename: Input filename.
    """
    # TODO: All manner of error checking
    lcov_file = None
    try:
      lcov_file = open(input_filename, 'rt')
      self.ParseLcovData(lcov_file)
    finally:
      if lcov_file:
        lcov_file.close()

  def GetStat(self, stat, group='all', default=None):
    """Gets a statistic from the coverage object.

    Args:
      stat: Statistic to get.  May also be an evaluatable python expression,
          using the stats.  For example, 'stat1 - stat2'.
      group: File group to match; if 'all', matches all groups.
      default: Value to return if there was an error evaluating the stat.  For
          example, if the stat does not exist.  If None, raises
          CrocStatError.

    Returns:
      The evaluated stat, or None if error.

    Raises:
      CrocStatError: Error evaluating stat.
    """
    # TODO: specify a subdir to get the stat from, then walk the tree to
    # print the stats from just that subdir

    # Make sure the group exists
    if group not in self.tree.stats_by_group:
      if default is None:
        raise CrocStatError('Group %r not found.' % group)
      else:
        return default

    stats = self.tree.stats_by_group[group]
    # Unit tests use real dicts, not CoverageStats objects,
    # so we can't AddDefaults() on them.
    if group == 'all' and hasattr(stats, 'AddDefaults'):
      stats.AddDefaults()
    try:
      return eval(stat, {'__builtins__': {'S': self.GetStat}}, stats)
    except Exception, e:
      if default is None:
        raise CrocStatError('Error evaluating stat %r: %s' % (stat, e))
      else:
        return default

  def PrintStat(self, stat, format=None, outfile=sys.stdout, **kwargs):
    """Prints a statistic from the coverage object.

    Args:
      stat: Statistic to get.  May also be an evaluatable python expression,
          using the stats.  For example, 'stat1 - stat2'.
      format: Format string to use when printing stat.  If None, prints the
          stat and its evaluation.
      outfile: File stream to output stat to; defaults to stdout.
      kwargs: Additional args to pass to GetStat().
    """
    s = self.GetStat(stat, **kwargs)
    if format is None:
      outfile.write('GetStat(%r) = %s\n' % (stat, s))
    else:
      outfile.write(format % s + '\n')

  def AddFiles(self, src_dir):
    """Adds files to coverage information.

    LCOV files only contains files which are compiled and instrumented as part
    of running coverage.  This function finds missing files and adds them.

    Args:
      src_dir: Directory on disk at which to start search.  May be a relative
          path on disk starting with '.' or '..', or an absolute path, or a
          path relative to an alt_name for one of the roots
          (for example, '_/src').  If the alt_name matches more than one root,
          all matches will be attempted.

    Note that dirs not underneath one of the root dirs and covered by an
    inclusion rule will be ignored.
    """
    # Check for root dir alt_names in the path and replace with the actual
    # root dirs, then recurse.
    found_root = False
    for root, alt_name in self.root_dirs:
      replaced_root = re.sub('^' + re.escape(alt_name) + '(?=(/|$))', root,
                             src_dir)
      if replaced_root != src_dir:
        found_root = True
        self.AddFiles(replaced_root)
    if found_root:
      return    # Replaced an alt_name with a root_dir, so already recursed.

    for (dirpath, dirnames, filenames) in self.add_files_walk(src_dir):
      # Make a copy of the dirnames list so we can modify the original to
      # prune subdirs we don't need to walk.
      for d in list(dirnames):
        # Add trailing '/' to directory names so dir-based regexps can match
        # '/' instead of needing to specify '(/|$)'.
        dpath = self.CleanupFilename(dirpath + '/' + d) + '/'
        attrs = self.ClassifyFile(dpath)
        if not attrs.get('include'):
          # Directory has been excluded, so don't traverse it
          # TODO: Document the slight weirdness caused by this: If you
          # AddFiles('./A'), and the rules include 'A/B/C/D' but not 'A/B',
          # then it won't recurse into './A/B' so won't find './A/B/C/D'.
          # Workarounds are to AddFiles('./A/B/C/D') or AddFiles('./A/B/C').
          # The latter works because it explicitly walks the contents of the
          # path passed to AddFiles(), so it finds './A/B/C/D'.
          dirnames.remove(d)

      for f in filenames:
        local_path = dirpath + '/' + f

        covf = self.GetCoveredFile(local_path, add=True)
        if not covf:
          continue

        # Save where we found the file, for generating line-by-line HTML output
        covf.local_path = local_path

        if covf.in_lcov:
          # File already instrumented and doesn't need to be scanned
          continue

        if not covf.attrs.get('add_if_missing', 1):
          # Not allowed to add the file
          self.RemoveCoveredFile(covf)
          continue

        # Scan file to find potentially-executable lines
        lines = self.scan_file(covf.local_path, covf.attrs.get('language'))
        if lines:
          for l in lines:
            covf.lines[l] = None
          covf.UpdateCoverage()
        else:
          # File has no executable lines, so don't count it
          self.RemoveCoveredFile(covf)

  def AddConfig(self, config_data, lcov_queue=None, addfiles_queue=None):
    """Adds JSON-ish config data.

    Args:
      config_data: Config data string.
      lcov_queue: If not None, object to append lcov_files to instead of
          parsing them immediately.
      addfiles_queue: If not None, object to append add_files to instead of
          processing them immediately.
    """
    # TODO: All manner of error checking
    cfg = eval(config_data, {'__builtins__': {}}, {})

    for rootdict in cfg.get('roots', []):
      self.AddRoot(rootdict['root'], alt_name=rootdict.get('altname', '_'))

    for ruledict in cfg.get('rules', []):
      regexp = ruledict.pop('regexp')
      self.AddRule(regexp, **ruledict)

    for add_lcov in cfg.get('lcov_files', []):
      if lcov_queue is not None:
        lcov_queue.append(add_lcov)
      else:
        self.ParseLcovFile(add_lcov)

    for add_path in cfg.get('add_files', []):
      if addfiles_queue is not None:
        addfiles_queue.append(add_path)
      else:
        self.AddFiles(add_path)

    self.print_stats += cfg.get('print_stats', [])

  def ParseConfig(self, filename, **kwargs):
    """Parses a configuration file.

    Args:
      filename: Config filename.
      kwargs: Additional parameters to pass to AddConfig().
    """
    # TODO: All manner of error checking
    f = None
    try:
      f = open(filename, 'rt')
      # Need to strip CR's from CRLF-terminated lines or posix systems can't
      # eval the data.
      config_data = f.read().replace('\r\n', '\n')
      # TODO: some sort of include syntax.
      #
      # Needs to be done at string-time rather than at eval()-time, so that
      # it's possible to include parts of dicts.  Path from a file to its
      # include should be relative to the dir containing the file.
      #
      # Or perhaps it could be done after eval.  In that case, there'd be an
      # 'include' section with a list of files to include.  Those would be
      # eval()'d and recursively pre- or post-merged with the including file.
      #
      # Or maybe just don't worry about it, since multiple configs can be
      # specified on the command line.
      self.AddConfig(config_data, **kwargs)
    finally:
      if f:
        f.close()

  def UpdateTreeStats(self):
    """Recalculates the tree stats from the currently covered files.

    Also calculates coverage summary for files.
    """
    self.tree = CoveredDir('')
    for cov_file in self.files.itervalues():
      # Add the file to the tree
      fdirs = cov_file.filename.split('/')
      parent = self.tree
      ancestors = [parent]
      for d in fdirs[:-1]:
        if d not in parent.subdirs:
          if parent.dirpath:
            parent.subdirs[d] = CoveredDir(parent.dirpath + '/' + d)
          else:
            parent.subdirs[d] = CoveredDir(d)
        parent = parent.subdirs[d]
        ancestors.append(parent)
      # Final subdir actually contains the file
      parent.files[fdirs[-1]] = cov_file

      # Now add file's contribution to coverage by dir
      for a in ancestors:
        # Add to 'all' group
        a.stats_by_group['all'].Add(cov_file.stats)

        # Add to group file belongs to
        group = cov_file.attrs.get('group')
        if group not in a.stats_by_group:
          a.stats_by_group[group] = CoverageStats()
        cbyg = a.stats_by_group[group]
        cbyg.Add(cov_file.stats)

  def PrintTree(self):
    """Prints the tree stats."""
    # Print the tree
    print 'Lines of code coverage by directory:'
    print self.tree.GetTree()

#------------------------------------------------------------------------------


def Main(argv):
  """Main routine.

  Args:
    argv: list of arguments

  Returns:
    exit code, 0 for normal exit.
  """
  # Parse args
  parser = optparse.OptionParser()
  parser.add_option(
      '-i', '--input', dest='inputs', type='string', action='append',
      metavar='FILE',
      help='read LCOV input from FILE')
  parser.add_option(
      '-r', '--root', dest='roots', type='string', action='append',
      metavar='ROOT[=ALTNAME]',
      help='add ROOT directory, optionally map in coverage results as ALTNAME')
  parser.add_option(
      '-c', '--config', dest='configs', type='string', action='append',
      metavar='FILE',
      help='read settings from configuration FILE')
  parser.add_option(
      '-a', '--addfiles', dest='addfiles', type='string', action='append',
      metavar='PATH',
      help='add files from PATH to coverage data')
  parser.add_option(
      '-t', '--tree', dest='tree', action='store_true',
      help='print tree of code coverage by group')
  parser.add_option(
      '-u', '--uninstrumented', dest='uninstrumented', action='store_true',
      help='list uninstrumented files')
  parser.add_option(
      '-m', '--html', dest='html_out', type='string', metavar='PATH',
      help='write HTML output to PATH')
  parser.add_option(
      '-b', '--base_url', dest='base_url', type='string', metavar='URL',
      help='include URL in base tag of HTML output')

  parser.set_defaults(
      inputs=[],
      roots=[],
      configs=[],
      addfiles=[],
      tree=False,
      html_out=None,
  )

  options = parser.parse_args(args=argv)[0]

  cov = Coverage()

  # Set root directories for coverage
  for root_opt in options.roots:
    if '=' in root_opt:
      cov.AddRoot(*root_opt.split('='))
    else:
      cov.AddRoot(root_opt)

  # Read config files
  for config_file in options.configs:
    cov.ParseConfig(config_file, lcov_queue=options.inputs,
                    addfiles_queue=options.addfiles)

  # Parse lcov files
  for input_filename in options.inputs:
    cov.ParseLcovFile(input_filename)

  # Add missing files
  for add_path in options.addfiles:
    cov.AddFiles(add_path)

  # Print help if no files specified
  if not cov.files:
    print 'No covered files found.'
    parser.print_help()
    return 1

  # Update tree stats
  cov.UpdateTreeStats()

  # Print uninstrumented filenames
  if options.uninstrumented:
    print 'Uninstrumented files:'
    for f in sorted(cov.files):
      covf = cov.files[f]
      if not covf.in_lcov:
        print '  %-6s %-6s %s' % (covf.attrs.get('group'),
                                  covf.attrs.get('language'), f)

  # Print tree stats
  if options.tree:
    cov.PrintTree()

  # Print stats
  for ps_args in cov.print_stats:
    cov.PrintStat(**ps_args)

  # Generate HTML
  if options.html_out:
    html = croc_html.CrocHtml(cov, options.html_out, options.base_url)
    html.Write()

  # Normal exit
  return 0


if __name__ == '__main__':
  sys.exit(Main(sys.argv))