summaryrefslogtreecommitdiffstats
path: root/android_webview/tools/copyright_scanner.py
diff options
context:
space:
mode:
Diffstat (limited to 'android_webview/tools/copyright_scanner.py')
-rw-r--r--android_webview/tools/copyright_scanner.py162
1 files changed, 89 insertions, 73 deletions
diff --git a/android_webview/tools/copyright_scanner.py b/android_webview/tools/copyright_scanner.py
index 90da30d..7e4ef0c 100644
--- a/android_webview/tools/copyright_scanner.py
+++ b/android_webview/tools/copyright_scanner.py
@@ -6,14 +6,13 @@
"""
import itertools
-import os
-import re
-def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
+def FindFiles(input_api, root_dir, start_paths_list, excluded_dirs_list):
"""Similar to UNIX utility find(1), searches for files in the directories.
Automatically leaves out only source code files.
Args:
+ input_api: InputAPI, as in presubmit scripts.
root_dir: The root directory, to which all other paths are relative.
start_paths_list: The list of paths to start search from. Each path can
be a file or a directory.
@@ -28,7 +27,7 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
return True
return False
- files_whitelist_re = re.compile(
+ files_whitelist_re = input_api.re.compile(
r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)'
'|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?'
'|tex|mli?)$')
@@ -36,66 +35,75 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
base_path_len = len(root_dir)
for path in start_paths_list:
- full_path = os.path.join(root_dir, path)
- if os.path.isfile(full_path):
+ full_path = input_api.os_path.join(root_dir, path)
+ if input_api.os_path.isfile(full_path):
if files_whitelist_re.search(path):
files.append(path)
else:
- for dirpath, dirnames, filenames in os.walk(full_path):
+ for dirpath, dirnames, filenames in input_api.os_walk(full_path):
# Remove excluded subdirs for faster scanning.
for item in dirnames[:]:
- if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]):
+ if IsBlacklistedDir(
+ input_api.os_path.join(dirpath, item)[base_path_len + 1:]):
dirnames.remove(item)
for filename in filenames:
- filepath = os.path.join(dirpath, filename)[base_path_len + 1:]
+ filepath = \
+ input_api.os_path.join(dirpath, filename)[base_path_len + 1:]
if files_whitelist_re.search(filepath) and \
not IsBlacklistedDir(filepath):
files.append(filepath)
return files
-python_multiline_string_double_re = re.compile(
- r'"""[^"]*(?:"""|$)', flags=re.MULTILINE)
-python_multiline_string_single_re = re.compile(
- r"'''[^']*(?:'''|$)", flags=re.MULTILINE)
-automatically_generated_re = re.compile(
- r'(All changes made in this file will be lost'
- '|DO NOT (EDIT|delete this file)'
- '|Generated (at|automatically|data)'
- '|Automatically generated'
- '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE)
-
-def _IsGeneratedFile(header):
- header = header.upper()
- if '"""' in header:
- header = python_multiline_string_double_re.sub('', header)
- if "'''" in header:
- header = python_multiline_string_single_re.sub('', header)
- # First do simple strings lookup to save time.
- if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
- return True
- if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
- 'GENERATED' in header:
- return automatically_generated_re.search(header)
- return False
-
-
-GENERATED_FILE = 'GENERATED FILE'
-NO_COPYRIGHT = '*No copyright*'
+class _GeneratedFilesDetector(object):
+ GENERATED_FILE = 'GENERATED FILE'
+ NO_COPYRIGHT = '*No copyright*'
+
+ def __init__(self, input_api):
+ self.python_multiline_string_double_re = \
+ input_api.re.compile(r'"""[^"]*(?:"""|$)', flags=input_api.re.MULTILINE)
+ self.python_multiline_string_single_re = \
+ input_api.re.compile(r"'''[^']*(?:'''|$)", flags=input_api.re.MULTILINE)
+ self.automatically_generated_re = input_api.re.compile(
+ r'(All changes made in this file will be lost'
+ '|DO NOT (EDIT|delete this file)'
+ '|Generated (at|automatically|data)'
+ '|Automatically generated'
+ '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=input_api.re.IGNORECASE)
+
+ def IsGeneratedFile(self, header):
+ header = header.upper()
+ if '"""' in header:
+ header = self.python_multiline_string_double_re.sub('', header)
+ if "'''" in header:
+ header = self.python_multiline_string_single_re.sub('', header)
+ # First do simple strings lookup to save time.
+ if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
+ return True
+ if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
+ 'GENERATED' in header:
+ return self.automatically_generated_re.search(header)
+ return False
+
class _CopyrightsScanner(object):
- _c_comment_re = re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
- _copyright_indicator = r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
- _full_copyright_indicator_re = \
- re.compile(r'(?:\W|^)' + _copyright_indicator + r'(?::\s*|\s+)(\w.*)$', \
- re.IGNORECASE)
- _copyright_disindicator_re = \
- re.compile(r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', re.IGNORECASE)
-
- def __init__(self):
+ @staticmethod
+ def StaticInit(input_api):
+ _CopyrightsScanner._c_comment_re = \
+ input_api.re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
+ _CopyrightsScanner._copyright_indicator = \
+ r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
+ _CopyrightsScanner._full_copyright_indicator_re = input_api.re.compile(
+ r'(?:\W|^)' + _CopyrightsScanner._copyright_indicator + \
+ r'(?::\s*|\s+)(\w.*)$', input_api.re.IGNORECASE)
+ _CopyrightsScanner._copyright_disindicator_re = input_api.re.compile(
+ r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', input_api.re.IGNORECASE)
+
+ def __init__(self, input_api):
self.max_line_numbers_proximity = 3
self.last_a_item_line_number = -200
self.last_b_item_line_number = -100
+ self.re = input_api.re
def _CloseLineNumbers(self, a, b):
return 0 <= a - b <= self.max_line_numbers_proximity
@@ -131,17 +139,20 @@ class _CopyrightsScanner(object):
not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):
copyr = m.group(0)
# Prettify the authorship string.
- copyr = re.sub(r'([,.])?\s*$/', '', copyr)
- copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE)
- copyr = re.sub(r'^\s+', '', copyr)
- copyr = re.sub(r'\s{2,}', ' ', copyr)
- copyr = re.sub(r'\\@', '@', copyr)
+ copyr = self.re.sub(r'([,.])?\s*$/', '', copyr)
+ copyr = self.re.sub(
+ _CopyrightsScanner._copyright_indicator, '', copyr, \
+ flags=self.re.IGNORECASE)
+ copyr = self.re.sub(r'^\s+', '', copyr)
+ copyr = self.re.sub(r'\s{2,}', ' ', copyr)
+ copyr = self.re.sub(r'\\@', '@', copyr)
return copyr
-def FindCopyrights(root_dir, files_to_scan):
+def FindCopyrights(input_api, root_dir, files_to_scan):
"""Determines code autorship, and finds generated files.
Args:
+ input_api: InputAPI, as in presubmit scripts.
root_dir: The root directory, to which all other paths are relative.
files_to_scan: The list of file names to scan.
Returns:
@@ -150,47 +161,52 @@ def FindCopyrights(root_dir, files_to_scan):
entry -- 'GENERATED_FILE' string. If the file has no copyright info,
the corresponding list contains 'NO_COPYRIGHT' string.
"""
+ generated_files_detector = _GeneratedFilesDetector(input_api)
+ _CopyrightsScanner.StaticInit(input_api)
copyrights = []
for file_name in files_to_scan:
linenum = 0
- header = ''
+ header = []
file_copyrights = []
- scanner = _CopyrightsScanner()
- with open(os.path.join(root_dir, file_name), 'r') as f:
- for l in f.readlines():
- linenum += 1
- if linenum <= 25:
- header += l
- c = scanner.MatchLine(linenum, l)
- if c:
- file_copyrights.append(c)
- if _IsGeneratedFile(header):
- copyrights.append([GENERATED_FILE])
- elif file_copyrights:
- copyrights.append(file_copyrights)
- else:
- copyrights.append([NO_COPYRIGHT])
+ scanner = _CopyrightsScanner(input_api)
+ contents = input_api.ReadFile(
+ input_api.os_path.join(root_dir, file_name), 'r')
+ for l in contents.split('\n'):
+ linenum += 1
+ if linenum <= 25:
+ header.append(l)
+ c = scanner.MatchLine(linenum, l)
+ if c:
+ file_copyrights.append(c)
+ if generated_files_detector.IsGeneratedFile('\n'.join(header)):
+ copyrights.append([_GeneratedFilesDetector.GENERATED_FILE])
+ elif file_copyrights:
+ copyrights.append(file_copyrights)
+ else:
+ copyrights.append([_GeneratedFilesDetector.NO_COPYRIGHT])
return copyrights
-def FindCopyrightViolations(root_dir, files_to_scan):
+def FindCopyrightViolations(input_api, root_dir, files_to_scan):
"""Looks for files that are not belong exlusively to the Chromium Authors.
Args:
+ input_api: InputAPI, as in presubmit scripts.
root_dir: The root directory, to which all other paths are relative.
files_to_scan: The list of file names to scan.
Returns:
The list of file names that contain non-Chromium copyrights.
"""
- copyrights = FindCopyrights(root_dir, files_to_scan)
+ copyrights = FindCopyrights(input_api, root_dir, files_to_scan)
offending_files = []
- allowed_copyrights_re = re.compile(
+ allowed_copyrights_re = input_api.re.compile(
r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '
'All rights reserved.*)$')
for f, cs in itertools.izip(files_to_scan, copyrights):
- if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT:
+ if cs[0] == _GeneratedFilesDetector.GENERATED_FILE or \
+ cs[0] == _GeneratedFilesDetector.NO_COPYRIGHT:
continue
for c in cs:
if not allowed_copyrights_re.match(c):
- offending_files.append(os.path.normpath(f))
+ offending_files.append(input_api.os_path.normpath(f))
break
return offending_files