diff options
Diffstat (limited to 'android_webview/tools/copyright_scanner.py')
-rw-r--r-- | android_webview/tools/copyright_scanner.py | 162 |
1 files changed, 89 insertions, 73 deletions
diff --git a/android_webview/tools/copyright_scanner.py b/android_webview/tools/copyright_scanner.py index 90da30d..7e4ef0c 100644 --- a/android_webview/tools/copyright_scanner.py +++ b/android_webview/tools/copyright_scanner.py @@ -6,14 +6,13 @@ """ import itertools -import os -import re -def FindFiles(root_dir, start_paths_list, excluded_dirs_list): +def FindFiles(input_api, root_dir, start_paths_list, excluded_dirs_list): """Similar to UNIX utility find(1), searches for files in the directories. Automatically leaves out only source code files. Args: + input_api: InputAPI, as in presubmit scripts. root_dir: The root directory, to which all other paths are relative. start_paths_list: The list of paths to start search from. Each path can be a file or a directory. @@ -28,7 +27,7 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list): return True return False - files_whitelist_re = re.compile( + files_whitelist_re = input_api.re.compile( r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' '|tex|mli?)$') @@ -36,66 +35,75 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list): base_path_len = len(root_dir) for path in start_paths_list: - full_path = os.path.join(root_dir, path) - if os.path.isfile(full_path): + full_path = input_api.os_path.join(root_dir, path) + if input_api.os_path.isfile(full_path): if files_whitelist_re.search(path): files.append(path) else: - for dirpath, dirnames, filenames in os.walk(full_path): + for dirpath, dirnames, filenames in input_api.os_walk(full_path): # Remove excluded subdirs for faster scanning. for item in dirnames[:]: - if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]): + if IsBlacklistedDir( + input_api.os_path.join(dirpath, item)[base_path_len + 1:]): dirnames.remove(item) for filename in filenames: - filepath = os.path.join(dirpath, filename)[base_path_len + 1:] + filepath = \ + input_api.os_path.join(dirpath, filename)[base_path_len + 1:] if files_whitelist_re.search(filepath) and \ not IsBlacklistedDir(filepath): files.append(filepath) return files -python_multiline_string_double_re = re.compile( - r'"""[^"]*(?:"""|$)', flags=re.MULTILINE) -python_multiline_string_single_re = re.compile( - r"'''[^']*(?:'''|$)", flags=re.MULTILINE) -automatically_generated_re = re.compile( - r'(All changes made in this file will be lost' - '|DO NOT (EDIT|delete this file)' - '|Generated (at|automatically|data)' - '|Automatically generated' - '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE) - -def _IsGeneratedFile(header): - header = header.upper() - if '"""' in header: - header = python_multiline_string_double_re.sub('', header) - if "'''" in header: - header = python_multiline_string_single_re.sub('', header) - # First do simple strings lookup to save time. - if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header: - return True - if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \ - 'GENERATED' in header: - return automatically_generated_re.search(header) - return False - - -GENERATED_FILE = 'GENERATED FILE' -NO_COPYRIGHT = '*No copyright*' +class _GeneratedFilesDetector(object): + GENERATED_FILE = 'GENERATED FILE' + NO_COPYRIGHT = '*No copyright*' + + def __init__(self, input_api): + self.python_multiline_string_double_re = \ + input_api.re.compile(r'"""[^"]*(?:"""|$)', flags=input_api.re.MULTILINE) + self.python_multiline_string_single_re = \ + input_api.re.compile(r"'''[^']*(?:'''|$)", flags=input_api.re.MULTILINE) + self.automatically_generated_re = input_api.re.compile( + r'(All changes made in this file will be lost' + '|DO NOT (EDIT|delete this file)' + '|Generated (at|automatically|data)' + '|Automatically generated' + '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=input_api.re.IGNORECASE) + + def IsGeneratedFile(self, header): + header = header.upper() + if '"""' in header: + header = self.python_multiline_string_double_re.sub('', header) + if "'''" in header: + header = self.python_multiline_string_single_re.sub('', header) + # First do simple strings lookup to save time. + if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header: + return True + if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \ + 'GENERATED' in header: + return self.automatically_generated_re.search(header) + return False + class _CopyrightsScanner(object): - _c_comment_re = re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''') - _copyright_indicator = r'(?:copyright|copr\.|\xc2\xa9|\(c\))' - _full_copyright_indicator_re = \ - re.compile(r'(?:\W|^)' + _copyright_indicator + r'(?::\s*|\s+)(\w.*)$', \ - re.IGNORECASE) - _copyright_disindicator_re = \ - re.compile(r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', re.IGNORECASE) - - def __init__(self): + @staticmethod + def StaticInit(input_api): + _CopyrightsScanner._c_comment_re = \ + input_api.re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''') + _CopyrightsScanner._copyright_indicator = \ + r'(?:copyright|copr\.|\xc2\xa9|\(c\))' + _CopyrightsScanner._full_copyright_indicator_re = input_api.re.compile( + r'(?:\W|^)' + _CopyrightsScanner._copyright_indicator + \ + r'(?::\s*|\s+)(\w.*)$', input_api.re.IGNORECASE) + _CopyrightsScanner._copyright_disindicator_re = input_api.re.compile( + r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', input_api.re.IGNORECASE) + + def __init__(self, input_api): self.max_line_numbers_proximity = 3 self.last_a_item_line_number = -200 self.last_b_item_line_number = -100 + self.re = input_api.re def _CloseLineNumbers(self, a, b): return 0 <= a - b <= self.max_line_numbers_proximity @@ -131,17 +139,20 @@ class _CopyrightsScanner(object): not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)): copyr = m.group(0) # Prettify the authorship string. - copyr = re.sub(r'([,.])?\s*$/', '', copyr) - copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE) - copyr = re.sub(r'^\s+', '', copyr) - copyr = re.sub(r'\s{2,}', ' ', copyr) - copyr = re.sub(r'\\@', '@', copyr) + copyr = self.re.sub(r'([,.])?\s*$/', '', copyr) + copyr = self.re.sub( + _CopyrightsScanner._copyright_indicator, '', copyr, \ + flags=self.re.IGNORECASE) + copyr = self.re.sub(r'^\s+', '', copyr) + copyr = self.re.sub(r'\s{2,}', ' ', copyr) + copyr = self.re.sub(r'\\@', '@', copyr) return copyr -def FindCopyrights(root_dir, files_to_scan): +def FindCopyrights(input_api, root_dir, files_to_scan): """Determines code autorship, and finds generated files. Args: + input_api: InputAPI, as in presubmit scripts. root_dir: The root directory, to which all other paths are relative. files_to_scan: The list of file names to scan. Returns: @@ -150,47 +161,52 @@ def FindCopyrights(root_dir, files_to_scan): entry -- 'GENERATED_FILE' string. If the file has no copyright info, the corresponding list contains 'NO_COPYRIGHT' string. """ + generated_files_detector = _GeneratedFilesDetector(input_api) + _CopyrightsScanner.StaticInit(input_api) copyrights = [] for file_name in files_to_scan: linenum = 0 - header = '' + header = [] file_copyrights = [] - scanner = _CopyrightsScanner() - with open(os.path.join(root_dir, file_name), 'r') as f: - for l in f.readlines(): - linenum += 1 - if linenum <= 25: - header += l - c = scanner.MatchLine(linenum, l) - if c: - file_copyrights.append(c) - if _IsGeneratedFile(header): - copyrights.append([GENERATED_FILE]) - elif file_copyrights: - copyrights.append(file_copyrights) - else: - copyrights.append([NO_COPYRIGHT]) + scanner = _CopyrightsScanner(input_api) + contents = input_api.ReadFile( + input_api.os_path.join(root_dir, file_name), 'r') + for l in contents.split('\n'): + linenum += 1 + if linenum <= 25: + header.append(l) + c = scanner.MatchLine(linenum, l) + if c: + file_copyrights.append(c) + if generated_files_detector.IsGeneratedFile('\n'.join(header)): + copyrights.append([_GeneratedFilesDetector.GENERATED_FILE]) + elif file_copyrights: + copyrights.append(file_copyrights) + else: + copyrights.append([_GeneratedFilesDetector.NO_COPYRIGHT]) return copyrights -def FindCopyrightViolations(root_dir, files_to_scan): +def FindCopyrightViolations(input_api, root_dir, files_to_scan): """Looks for files that are not belong exlusively to the Chromium Authors. Args: + input_api: InputAPI, as in presubmit scripts. root_dir: The root directory, to which all other paths are relative. files_to_scan: The list of file names to scan. Returns: The list of file names that contain non-Chromium copyrights. """ - copyrights = FindCopyrights(root_dir, files_to_scan) + copyrights = FindCopyrights(input_api, root_dir, files_to_scan) offending_files = [] - allowed_copyrights_re = re.compile( + allowed_copyrights_re = input_api.re.compile( r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' 'All rights reserved.*)$') for f, cs in itertools.izip(files_to_scan, copyrights): - if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT: + if cs[0] == _GeneratedFilesDetector.GENERATED_FILE or \ + cs[0] == _GeneratedFilesDetector.NO_COPYRIGHT: continue for c in cs: if not allowed_copyrights_re.match(c): - offending_files.append(os.path.normpath(f)) + offending_files.append(input_api.os_path.normpath(f)) break return offending_files |