summaryrefslogtreecommitdiffstats
path: root/webkit/tools/layout_tests/dedup-tests.py
blob: ea5de925ad76dba4a159615e42ca93603559b0fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python

"""dedup-tests -- print test results duplicated between win and linux.

Because the outputs are very similar, we fall back on Windows outputs
if there isn't an expected output for Linux layout tests.  This means
that any file that is duplicated between the Linux and Windows directories
is redundant.

This command dumps out all such files.  You can use it like:
  dedup-tests.py   # print out the bad files
  dedup-tests.py | xargs git rm   # delete the bad files
"""

import collections
import os.path
import subprocess
import sys

# A map of file hash => set of all files with that hash.
hashes = collections.defaultdict(set)

# Fill in the map.
cmd = ['git', 'ls-tree', '-r', 'HEAD', 'webkit/data/layout_tests/']
try:
  git = subprocess.Popen(cmd, stdout=subprocess.PIPE)
except OSError, e:
  if e.errno == 2:  # No such file or directory.
    print >>sys.stderr, "Error: 'No such file' when running git."
    print >>sys.stderr, "This script requires git."
    sys.exit(1)
  raise e

for line in git.stdout:
  attrs, file = line.strip().split('\t')
  _, _, hash = attrs.split(' ')
  hashes[hash].add(file)

# Dump out duplicated files.
for cluster in hashes.values():
  if len(cluster) < 2:
    continue
  for file in cluster:
    if '/chromium-linux/' in file:
      if file.replace('/chromium-linux/', '/chromium-win/') in cluster:
        print file