diff options
author | maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-11-27 20:56:51 +0000 |
---|---|---|
committer | maruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-11-27 20:56:51 +0000 |
commit | 2fac37585d3dfa0d7cf7a976698aae7627186573 (patch) | |
tree | d2f233a6afa5d8093f15f2f3c56cc4c009b05904 /chrome/tools | |
parent | 071302929ab813f647e51253af4e885b33eab463 (diff) | |
download | chromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.zip chromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.tar.gz chromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.tar.bz2 |
Fix python scripts in src/chrome/
Make sure that:
- shebang is only present for executable files
- shebang is #!/usr/bin/env python
- __main__ is only present for executable files
- file's executable bit is coherent
Also fix EOF LF to be only one.
Minor python style fixes.
TBR=nirnimesh@chromium.org
BUG=105108
TEST=
Review URL: http://codereview.chromium.org/8680018
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@111658 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/tools')
19 files changed, 452 insertions, 470 deletions
diff --git a/chrome/tools/automated_ui_test_tools/ui_action_generator.py b/chrome/tools/automated_ui_test_tools/ui_action_generator.py index 098a68d..b455554 100644..100755 --- a/chrome/tools/automated_ui_test_tools/ui_action_generator.py +++ b/chrome/tools/automated_ui_test_tools/ui_action_generator.py @@ -1,5 +1,4 @@ -#!/usr/bin/python - +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -68,10 +67,7 @@ def CreateUIActionList(actions_per_command, num_commands, given_seed=None): def ParseCommandLine(): - """Parses the command line. - - Returns: - List of options and their values, and unparsed args. + """Returns the list of options and their values, and unparsed args. """ parser = optparse.OptionParser() parser.add_option('-o', '--output', dest='output_file', type='string', @@ -102,7 +98,8 @@ def main(): f.write(command_list) f.close() print command_list + return 0 if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/chrome/tools/build/appid.py b/chrome/tools/build/appid.py index f471a4e..d052bc3 100644..100755 --- a/chrome/tools/build/appid.py +++ b/chrome/tools/build/appid.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2009 The Chromium Authors. All rights reserved. +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/chrome/tools/build/generate_policy_source.py b/chrome/tools/build/generate_policy_source.py index ad234f2..e405713 100644..100755 --- a/chrome/tools/build/generate_policy_source.py +++ b/chrome/tools/build/generate_policy_source.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -51,7 +51,7 @@ def main(): if len(args) != 3: print "exactly platform, chromium_os flag and input file must be specified." parser.print_help() - sys.exit(2) + return 2 template_file_contents = _LoadJSONFile(args[2]); if opts.header_path is not None: _WritePolicyConstantHeader(template_file_contents, args, opts); @@ -63,6 +63,7 @@ def main(): _WriteProtobuf(template_file_contents, args, opts.proto_path) if opts.decoder_path is not None: _WriteProtobufParser(template_file_contents, args, opts.decoder_path) + return 0 #------------------ shared helpers ---------------------------------# @@ -462,6 +463,5 @@ def _WriteProtobufParser(template_file_contents, args, outfilepath): f.write(CPP_FOOT) -#------------------ main() -----------------------------------------# if __name__ == '__main__': - main(); + sys.exit(main()) diff --git a/chrome/tools/build/win/create_installer_archive.py b/chrome/tools/build/win/create_installer_archive.py index 2b59fa0..5a4e96c 100755 --- a/chrome/tools/build/win/create_installer_archive.py +++ b/chrome/tools/build/win/create_installer_archive.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/chrome/tools/build/win/dependencies.py b/chrome/tools/build/win/dependencies.py index 353c89c..01d9254 100755 --- a/chrome/tools/build/win/dependencies.py +++ b/chrome/tools/build/win/dependencies.py @@ -1,5 +1,5 @@ -#!/usr/bin/python -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -40,6 +40,7 @@ def RunSystemCommand(cmd): except: raise Error("Failed to execute: " + cmd) + def RunDumpbin(binary_file): """Runs dumpbin and parses its output. @@ -196,8 +197,19 @@ def VerifyDependents(pe_name, dependents, delay_loaded, list_file, verbose): return max(deps_result, delayed_result) -def main(options, args): +def main(): # PE means portable executable. It's any .DLL, .EXE, .SYS, .AX, etc. + usage = "usage: %prog [options] input output" + option_parser = optparse.OptionParser(usage=usage) + option_parser.add_option("-d", + "--debug", + dest="debug", + action="store_true", + default=False, + help="Display debugging information") + options, args = option_parser.parse_args() + if len(args) != 2: + option_parser.error("Incorrect number of arguments") pe_name = args[0] deps_file = args[1] dependents, delay_loaded = RunDumpbin(pe_name) @@ -211,15 +223,4 @@ def main(options, args): if '__main__' == __name__: - usage = "usage: %prog [options] input output" - option_parser = optparse.OptionParser(usage = usage) - option_parser.add_option("-d", - "--debug", - dest="debug", - action="store_true", - default=False, - help="Display debugging information") - options, args = option_parser.parse_args() - if len(args) != 2: - option_parser.error("Incorrect number of arguments") - sys.exit(main(options, args)) + sys.exit(main()) diff --git a/chrome/tools/build/win/make_policy_zip.py b/chrome/tools/build/win/make_policy_zip.py index 60037c3..0822483 100644..100755 --- a/chrome/tools/build/win/make_policy_zip.py +++ b/chrome/tools/build/win/make_policy_zip.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -12,6 +13,7 @@ import os import sys import zipfile + def add_files_to_zip(zip_file, base_dir, file_list): """Pack a list of files into a zip archive, that is already opened for writing. @@ -26,6 +28,7 @@ def add_files_to_zip(zip_file, base_dir, file_list): zip_file.write(base_dir + file_path, file_path) return 0 + def get_grd_outputs(grit_cmd, grit_defines, grd_file, grd_strip_path_prefix): grit_path = os.path.join(os.getcwd(), os.path.dirname(grit_cmd)) sys.path.append(grit_path) @@ -37,6 +40,7 @@ def get_grd_outputs(grit_cmd, grit_defines, grd_file, grd_strip_path_prefix): result.append(item[len(grd_strip_path_prefix):]) return result + def main(argv): """Pack a list of files into a zip archive. @@ -73,6 +77,6 @@ def main(argv): finally: zip_file.close() + if '__main__' == __name__: sys.exit(main(sys.argv)) - diff --git a/chrome/tools/build/win/scan_server_dlls.py b/chrome/tools/build/win/scan_server_dlls.py index 2adf8d6..68ebb93 100644..100755 --- a/chrome/tools/build/win/scan_server_dlls.py +++ b/chrome/tools/build/win/scan_server_dlls.py @@ -1,12 +1,11 @@ -#!/usr/bin/python -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Script used to scan for server DLLs at build time and build a header included by setup.exe. This header contains an array of the names of the DLLs that need registering at install time. - """ import ConfigParser @@ -128,19 +127,11 @@ def RunSystemCommand(cmd): raise "Error while running cmd: %s" % cmd -def main(options): +def main(): """Main method that reads input file, scans <build_output>\servers for matches to files described in the input file. A header file for the setup project is then generated. """ - config = Readconfig(options.output_dir, options.input_file) - registered_dll_list = ScanServerDlls(config, options.distribution, - options.output_dir) - CreateRegisteredDllIncludeFile(registered_dll_list, - options.header_output_dir) - - -if '__main__' == __name__: option_parser = optparse.OptionParser() option_parser.add_option('-o', '--output_dir', help='Build Output directory') option_parser.add_option('-x', '--header_output_dir', @@ -150,4 +141,13 @@ if '__main__' == __name__: help='Name of Chromium Distribution. Optional.') options, args = option_parser.parse_args() - sys.exit(main(options)) + config = Readconfig(options.output_dir, options.input_file) + registered_dll_list = ScanServerDlls(config, options.distribution, + options.output_dir) + CreateRegisteredDllIncludeFile(registered_dll_list, + options.header_output_dir) + return 0 + + +if '__main__' == __name__: + sys.exit(main()) diff --git a/chrome/tools/build/win/sln_deps.py b/chrome/tools/build/win/sln_deps.py index ef7803e..ebb371a 100755 --- a/chrome/tools/build/win/sln_deps.py +++ b/chrome/tools/build/win/sln_deps.py @@ -1,5 +1,5 @@ -#!/usr/bin/python -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -63,7 +63,7 @@ def ScanSlnFile(filename): return projects -def main(filename, project_to_scan, reverse): +def sln_deps(filename, project_to_scan, reverse): """Displays the project's dependencies.""" project_to_scan = project_to_scan.lower() @@ -91,9 +91,10 @@ def main(filename, project_to_scan, reverse): deps_name = [projects[d].name for d in project.deps] print "\n".join(str(" " + name) for name in sorted(deps_name, key=str.lower)) + return 0 -if __name__ == '__main__': +def main(): usage = "usage: %prog [options] solution [project]" description = ("Display the dependencies of a project in human readable" @@ -116,5 +117,8 @@ if __name__ == '__main__': project_to_scan = "" if len(args) == 2: project_to_scan = args[1] - main(args[0], project_to_scan, options.reverse) + return sln_deps(args[0], project_to_scan, options.reverse) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chrome/tools/build/win/sort_sln.py b/chrome/tools/build/win/sort_sln.py deleted file mode 100755 index ea88ce4..0000000 --- a/chrome/tools/build/win/sort_sln.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import sys - -if len(sys.argv) != 2: - print """Usage: sort_sln.py <SOLUTIONNAME>.sln -to sort the solution file to a normalized scheme. Do this before checking in -changes to a solution file to avoid having a lot of unnecessary diffs.""" - sys.exit(1) - -filename = sys.argv[1] -print "Sorting " + filename; - -try: - sln = open(filename, "r"); -except IOError: - print "Unable to open " + filename + " for reading." - sys.exit(1) - -output = "" -seclines = None -while 1: - line = sln.readline() - if not line: - break - - if seclines is not None: - # Process the end of a section, dump the sorted lines - if line.lstrip().startswith('End'): - output = output + ''.join(sorted(seclines)) - seclines = None - # Process within a section - else: - seclines.append(line) - continue - - # Process the start of a section - if (line.lstrip().startswith('GlobalSection') or - line.lstrip().startswith('ProjectSection')): - if seclines: raise Exception('Already in a section') - seclines = [] - - output = output + line - -sln.close() -try: - sln = open(filename, "w") - sln.write(output) -except IOError: - print "Unable to write to " + filename - sys.exit(1); -print "Done." - diff --git a/chrome/tools/check_grd_for_unused_strings.py b/chrome/tools/check_grd_for_unused_strings.py index 3bc57d7..b0f8cb8 100755 --- a/chrome/tools/check_grd_for_unused_strings.py +++ b/chrome/tools/check_grd_for_unused_strings.py @@ -1,6 +1,5 @@ -#!/usr/bin/python - -# Copyright (c) 2010 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -111,7 +110,7 @@ def CheckForUnusedGrdIDsInSources(grd_files, src_dirs): return 0 -if __name__ == '__main__': +def main(): # script lives in src/chrome/tools chrome_tools_dir = os.path.dirname(os.path.abspath(sys.argv[0])) src_dir = os.path.dirname(os.path.dirname(chrome_tools_dir)) @@ -162,4 +161,8 @@ if __name__ == '__main__': os.path.join(src_dir, 'third_party', 'mozilla_security_manager'), ] - sys.exit(CheckForUnusedGrdIDsInSources(grd_files, src_dirs)) + return CheckForUnusedGrdIDsInSources(grd_files, src_dirs) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chrome/tools/extract_actions.py b/chrome/tools/extract_actions.py index 650f6e4..09bab23 100755 --- a/chrome/tools/extract_actions.py +++ b/chrome/tools/extract_actions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -439,6 +439,8 @@ def main(argv): if hash_output: print "Done. Do not forget to add chromeactions.txt to your changelist" + return 0 + if '__main__' == __name__: - main(sys.argv) + sys.exit(main(sys.argv)) diff --git a/chrome/tools/extract_histograms.py b/chrome/tools/extract_histograms.py index 8c002f3..82fc9e9 100755 --- a/chrome/tools/extract_histograms.py +++ b/chrome/tools/extract_histograms.py @@ -1,5 +1,5 @@ -#!/usr/bin/python -# Copyright (c) 2009 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -35,6 +35,7 @@ def GrepForHistograms(path, histograms): if match: histograms.add(match.group(1)) + def WalkDirectory(root_path, histograms): for path, dirs, files in os.walk(root_path): if '.svn' in dirs: @@ -44,6 +45,7 @@ def WalkDirectory(root_path, histograms): if ext == '.cc': GrepForHistograms(os.path.join(path, file), histograms) + def main(argv): histograms = set() @@ -53,6 +55,8 @@ def main(argv): # Print out the histograms as a sorted list. for histogram in sorted(histograms): print histogram + return 0 + if '__main__' == __name__: - main(sys.argv) + sys.exit(main(sys.argv)) diff --git a/chrome/tools/history-viz.py b/chrome/tools/history-viz.py index 6f82126..fccbb31 100755 --- a/chrome/tools/history-viz.py +++ b/chrome/tools/history-viz.py @@ -1,5 +1,5 @@ -#!/usr/bin/python -# Copyright (c) 2009 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -17,7 +17,18 @@ import subprocess import sys import urlparse -class URL: + +# Some transition types, copied from page_transition_types.h. +TRANS_TYPES = { + 0: 'link', + 1: 'typed', + 2: 'most-visited', + 3: 'auto subframe', + 7: 'form', +} + + +class URL(object): """Represents a broken-down URL from our most visited database.""" def __init__(self, id, url): @@ -67,7 +78,8 @@ class URL: lines.append(line) return '\n'.join(lines) -class Edge: + +class Edge(object): """Represents an edge in the history graph, connecting two pages. If a link is traversed twice, it is one Edge with two entries in @@ -97,6 +109,7 @@ class Edge: # edge['chain'] = chain return all + def ClusterBy(objs, pred): """Group a list of objects by a predicate. @@ -109,12 +122,14 @@ def ClusterBy(objs, pred): clusters[cluster].append(obj) return clusters -def EscapeDot(str): + +def EscapeDot(string): """Escape a string suitable for embedding in a graphviz graph.""" # TODO(evanm): this is likely not sufficient. - return str.replace('\n', '\\n') + return string.replace('\n', '\\n') + -class SQLite: +class SQLite(object): """Trivial interface to executing SQLite queries. Spawns a new process with each call.""" def __init__(self, file=None): @@ -132,6 +147,7 @@ class SQLite: row = line.strip().split('\t') yield row + def LoadHistory(filename): db = SQLite(filename) @@ -157,85 +173,81 @@ def LoadHistory(filename): return urls, edges -# Some transition types, copied from page_transition_types.h. -TRANS_TYPES = { - 0: 'link', - 1: 'typed', - 2: 'most-visited', - 3: 'auto subframe', - 7: 'form', -} -urls, edges = LoadHistory(sys.argv[1]) - -print 'digraph G {' -print ' graph [rankdir=LR]' # Display left to right. -print ' node [shape=box]' # Display nodes as boxes. -print ' subgraph { rank=source; 0 [label="start"] }' - -# Output all the nodes within graph clusters. -hosts = ClusterBy(urls.values(), lambda url: url.host) -for i, (host, urls) in enumerate(hosts.items()): - # Cluster all URLs under this host if it has more than one entry. - host_clustered = len(urls) > 1 - if host_clustered: - print 'subgraph clusterhost%d {' % i - print ' label="%s"' % host - paths = ClusterBy(urls, lambda url: url.path) - for j, (path, urls) in enumerate(paths.items()): - # Cluster all URLs under this host if it has more than one entry. - path_clustered = host_clustered and len(urls) > 1 - if path_clustered: - print ' subgraph cluster%d%d {' % (i, j) - print ' label="%s"' % path - for url in urls: - if url.id == '0': continue # We already output the special start node. - pretty = url.PrettyPrint(include_host=not host_clustered, - include_path=not path_clustered) - print ' %s [label="%s"]' % (url.id, EscapeDot(pretty)) - if path_clustered: - print ' }' - if host_clustered: - print '}' - -# Output all the edges between nodes. -for src, dsts in edges.items(): - for dst, edge in dsts.items(): - # Gather up all the transitions into the label. - label = [] # Label for the edge. - transitions = edge.Transitions() - for trans, count in transitions.items(): - text = '' - if count > 1: - text = '%dx ' % count - base_type = trans & 0xFF - redir = (trans & 0xC0000000) != 0 - start = (trans & 0x10000000) != 0 - end = (trans & 0x20000000) != 0 - if start or end: - if start: - text += '<' - if end: - text += '>' - text += ' ' - if redir: - text += 'R ' - text += TRANS_TYPES.get(base_type, 'trans%d' % base_type) - label.append(text) - if len(label) == 0: - continue - - edgeattrs = [] # Graphviz attributes for the edge. - # If the edge is from the start and the transitions are fishy, make it - # display as a dotted line. - if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0): - edgeattrs.append('style=dashed') - if len(label) > 0: - edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label))) - - out = '%s -> %s' % (src, dst) - if len(edgeattrs) > 0: - out += ' [%s]' % ','.join(edgeattrs) - print out -print '}' +def main(): + urls, edges = LoadHistory(sys.argv[1]) + print 'digraph G {' + print ' graph [rankdir=LR]' # Display left to right. + print ' node [shape=box]' # Display nodes as boxes. + print ' subgraph { rank=source; 0 [label="start"] }' + # Output all the nodes within graph clusters. + hosts = ClusterBy(urls.values(), lambda url: url.host) + for i, (host, urls) in enumerate(hosts.items()): + # Cluster all URLs under this host if it has more than one entry. + host_clustered = len(urls) > 1 + if host_clustered: + print 'subgraph clusterhost%d {' % i + print ' label="%s"' % host + paths = ClusterBy(urls, lambda url: url.path) + for j, (path, urls) in enumerate(paths.items()): + # Cluster all URLs under this host if it has more than one entry. + path_clustered = host_clustered and len(urls) > 1 + if path_clustered: + print ' subgraph cluster%d%d {' % (i, j) + print ' label="%s"' % path + for url in urls: + if url.id == '0': continue # We already output the special start node. + pretty = url.PrettyPrint(include_host=not host_clustered, + include_path=not path_clustered) + print ' %s [label="%s"]' % (url.id, EscapeDot(pretty)) + if path_clustered: + print ' }' + if host_clustered: + print '}' + + # Output all the edges between nodes. + for src, dsts in edges.items(): + for dst, edge in dsts.items(): + # Gather up all the transitions into the label. + label = [] # Label for the edge. + transitions = edge.Transitions() + for trans, count in transitions.items(): + text = '' + if count > 1: + text = '%dx ' % count + base_type = trans & 0xFF + redir = (trans & 0xC0000000) != 0 + start = (trans & 0x10000000) != 0 + end = (trans & 0x20000000) != 0 + if start or end: + if start: + text += '<' + if end: + text += '>' + text += ' ' + if redir: + text += 'R ' + text += TRANS_TYPES.get(base_type, 'trans%d' % base_type) + label.append(text) + if len(label) == 0: + continue + + edgeattrs = [] # Graphviz attributes for the edge. + # If the edge is from the start and the transitions are fishy, make it + # display as a dotted line. + if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0): + edgeattrs.append('style=dashed') + if len(label) > 0: + edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label))) + + out = '%s -> %s' % (src, dst) + if len(edgeattrs) > 0: + out += ' [%s]' % ','.join(edgeattrs) + print out + print '}' + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chrome/tools/inconsistent-eol.py b/chrome/tools/inconsistent-eol.py index 4ab3596..ef25245 100755 --- a/chrome/tools/inconsistent-eol.py +++ b/chrome/tools/inconsistent-eol.py @@ -1,5 +1,5 @@ -#!/usr/bin/python -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -38,6 +38,7 @@ def CountChars(text, str): logging.debug(len(split) - 1) return len(split) - 1 + def PrevailingEOLName(crlf, cr, lf): """Describe the most common line ending. @@ -56,6 +57,7 @@ def PrevailingEOLName(crlf, cr, lf): return 'crlf' return 'lf' + def FixEndings(file, crlf, cr, lf): """Change the file's line endings to CRLF or LF, whichever is more common.""" most = max(crlf, cr, lf) @@ -99,7 +101,8 @@ def ProcessFiles(filelist): print '%s: mostly %s' % (filename, PrevailingEOLName(crlf, cr, lf)) FixEndings(filename, crlf, cr, lf) -def main(options, args): + +def process(options, args): """Process the files.""" if not args or len(args) < 1: raise Error('No files given.') @@ -111,8 +114,10 @@ def main(options, args): else: filelist = args ProcessFiles(filelist) + return 0 -if '__main__' == __name__: + +def main(): if DEBUGGING: debug_level = logging.DEBUG else: @@ -131,5 +136,8 @@ if '__main__' == __name__: default=False, help="Force any files with CRLF to LF instead.") options, args = option_parser.parse_args() + return process(options, args) + - sys.exit(main(options, args)) +if '__main__' == __name__: + sys.exit(main()) diff --git a/chrome/tools/process_dumps_linux.py b/chrome/tools/process_dumps_linux.py index 3e3bf3e..1f0ba9d 100755 --- a/chrome/tools/process_dumps_linux.py +++ b/chrome/tools/process_dumps_linux.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -265,7 +265,9 @@ def main_linux(options, args): return 0 -if '__main__' == __name__: +def main(): + if not sys.platform.startswith('linux'): + return 1 parser = optparse.OptionParser() parser.add_option('', '--processor-dir', type='string', default='', help='The directory where the processor is installed. ' @@ -291,8 +293,8 @@ if '__main__' == __name__: 'Default: chrome') (options, args) = parser.parse_args() + return main_linux(options, args) - if sys.platform.startswith('linux'): - sys.exit(main_linux(options, args)) - else: - sys.exit(1) + +if '__main__' == __name__: + sys.exit(main()) diff --git a/chrome/tools/webforms_aggregator.py b/chrome/tools/webforms_aggregator.py index 3d5327b..16e5273 100644..100755 --- a/chrome/tools/webforms_aggregator.py +++ b/chrome/tools/webforms_aggregator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -722,7 +722,6 @@ class ThreadedCrawler(object): def main(): - # Command line options. usage = 'usage: %prog [options] single_url_or_urls_filename' parser = optparse.OptionParser(usage) parser.add_option( @@ -734,7 +733,7 @@ def main(): if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: print 'Wrong log_level argument.' parser.print_help() - sys.exit(1) + return 1 options.log_level = getattr(logging, options.log_level) if len(args) != 1: @@ -762,7 +761,8 @@ def main(): logger.info('Started at: %s\n', t0) logger.info('Ended at: %s\n', t1) logger.info('Total execution time: %s\n', delta_t) + return 0 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/chrome/tools/webforms_aggregator_tests.py b/chrome/tools/webforms_aggregator_tests.py index fc12dc3..2eb26bb 100644..100755 --- a/chrome/tools/webforms_aggregator_tests.py +++ b/chrome/tools/webforms_aggregator_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/chrome/tools/webforms_aggregator_unittests.py b/chrome/tools/webforms_aggregator_unittests.py index 68169eb..00ea2bd 100644..100755 --- a/chrome/tools/webforms_aggregator_unittests.py +++ b/chrome/tools/webforms_aggregator_unittests.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/chrome/tools/webforms_extractor.py b/chrome/tools/webforms_extractor.py index 71fed7c..1dd1d95 100644..100755 --- a/chrome/tools/webforms_extractor.py +++ b/chrome/tools/webforms_extractor.py @@ -1,253 +1,254 @@ -#!/usr/bin/python
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be found
-# in the LICENSE file.
-
-"""Extracts registration forms from the corresponding HTML files.
-
-Used for extracting forms within HTML files. This script is used in
-conjunction with the webforms_aggregator.py script, which aggregates web pages
-with fillable forms (i.e registration forms).
-
-The purpose of this script is to extract out all non-form elements that may be
-causing parsing errors and timeout issues when running browser_tests.
-
-This script extracts all forms from a HTML file.
-If there are multiple forms per downloaded site, multiple files are created
-for each form.
-
-Used as a standalone script but assumes that it is run from the directory in
-which it is checked into.
-
-Usage: forms_extractor.py [options]
-
-Options:
- -l LOG_LEVEL, --log_level=LOG_LEVEL,
- LOG_LEVEL: debug, info, warning or error [default: error]
- -j, --js extracts javascript elements from web form.
- -h, --help show this help message and exit
-"""
-
-import glob
-import logging
-from optparse import OptionParser
-import os
-import re
-import sys
-
-
-class FormsExtractor(object):
- """Extracts HTML files, leaving only registration forms from the HTML file."""
- _HTML_FILES_PATTERN = r'*.html'
- _HTML_FILE_PREFIX = r'grabber-'
- _FORM_FILE_PREFIX = r'grabber-stripped-'
-
- _REGISTRATION_PAGES_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
- 'heuristics', 'input')
- _EXTRACTED_FORMS_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
- 'heuristics', 'input')
-
- logger = logging.getLogger(__name__)
- log_handlers = {'StreamHandler': None}
-
- # This pattern is used for retrieving the form location comment located at the
- # top of each downloaded HTML file indicating where the form originated from.
- _RE_FORM_LOCATION_PATTERN = re.compile(
- ur"""
- <!--Form\s{1}Location: # Starting of form location comment.
- .*? # Any characters (non-greedy).
- --> # Ending of the form comment.
- """, re.U | re.S | re.I | re.X)
-
- # This pattern is used for removing all script code.
- _RE_SCRIPT_PATTERN = re.compile(
- ur"""
- <script # A new opening '<script' tag.
- \b # The end of the word 'script'.
- .*? # Any characters (non-greedy).
- > # Ending of the (opening) tag: '>'.
- .*? # Any characters (non-greedy) between the tags.
- </script\s*> # The '</script>' closing tag.
- """, re.U | re.S | re.I | re.X)
-
- # This pattern is used for removing all href js code.
- _RE_HREF_JS_PATTERN = re.compile(
- ur"""
- \bhref # The word href and its beginning.
- \s*=\s* # The '=' with all whitespace before and after it.
- (?P<quote>[\'\"]) # A single or double quote which is captured.
- \s*javascript\s*: # The word 'javascript:' with any whitespace possible.
- .*? # Any characters (non-greedy) between the quotes.
- \1 # The previously captured single or double quote.
- """, re.U | re.S | re.I | re.X)
-
- _RE_EVENT_EXPR = (
- ur"""
- \b # The beginning of a new word.
- on\w+? # All words starting with 'on' (non-greedy)
- # example: |onmouseover|.
- \s*=\s* # The '=' with all whitespace before and after it.
- (?P<quote>[\'\"]) # A captured single or double quote.
- .*? # Any characters (non-greedy) between the quotes.
- \1 # The previously captured single or double quote.
- """)
-
- # This pattern is used for removing code with js events, such as |onload|.
- # By adding the leading |ur'<[^<>]*?'| and the trailing |'ur'[^<>]*?>'| the
- # pattern matches to strings such as '<tr class="nav"
- # onmouseover="mOvr1(this);" onmouseout="mOut1(this);">'
- _RE_TAG_WITH_EVENTS_PATTERN = re.compile(
- ur"""
- < # Matches character '<'.
- [^<>]*? # Matches any characters except '<' and '>' (non-greedy).""" +
- _RE_EVENT_EXPR +
- ur"""
- [^<>]*? # Matches any characters except '<' and '>' (non-greedy).
- > # Matches character '>'.
- """, re.U | re.S | re.I | re.X)
-
- # Adds whitespace chars at the end of the matched event. Also match trailing
- # whitespaces for JS events. Do not match leading whitespace.
- # For example: |< /form>| is invalid HTML and does not exist but |</form >| is
- # considered valid HTML.
- _RE_EVENT_PATTERN = re.compile(
- _RE_EVENT_EXPR + ur'\s*', re.U | re.S | re.I | re.X)
-
- # This pattern is used for finding form elements.
- _RE_FORM_PATTERN = re.compile(
- ur"""
- <form # A new opening '<form' tag.
- \b # The end of the word 'form'.
- .*? # Any characters (non-greedy).
- > # Ending of the (opening) tag: '>'.
- .*? # Any characters (non-greedy) between the tags.
- </form\s*> # The '</form>' closing tag.
- """, re.U | re.S | re.I | re.X)
-
- def __init__(self, input_dir=_REGISTRATION_PAGES_DIR,
- output_dir=_EXTRACTED_FORMS_DIR, logging_level=None):
- """Creates a FormsExtractor object.
-
- Args:
- input_dir: the directory of HTML files.
- output_dir: the directory where the registration form files will be
- saved.
- logging_level: verbosity level, default is None.
-
- Raises:
- IOError exception if input directory doesn't exist.
- """
- if logging_level:
- if not self.log_handlers['StreamHandler']:
- console = logging.StreamHandler()
- console.setLevel(logging.DEBUG)
- self.log_handlers['StreamHandler'] = console
- self.logger.addHandler(console)
- self.logger.setLevel(logging_level)
- else:
- if self.log_handlers['StreamHandler']:
- self.logger.removeHandler(self.log_handlers['StreamHandler'])
- self.log_handlers['StreamHandler'] = None
-
- self._input_dir = input_dir
- self._output_dir = output_dir
- if not os.path.isdir(self._input_dir):
- error_msg = 'Directory "%s" doesn\'t exist.' % self._input_dir
- self.logger.error('Error: %s', error_msg)
- raise IOError(error_msg)
- if not os.path.isdir(output_dir):
- os.makedirs(output_dir)
- self._form_location_comment = ''
-
- def _SubstituteAllEvents(self, matchobj):
- """Remove all js events that are present as attributes within a tag.
-
- Args:
- matchobj: A regexp |re.MatchObject| containing text that has at least one
- event. Example: |<tr class="nav" onmouseover="mOvr1(this);"
- onmouseout="mOut1(this);">|.
-
- Returns:
- The text containing the tag with all the attributes except for the tags
- with events. Example: |<tr class="nav">|.
- """
- tag_with_all_attrs = matchobj.group(0)
- return self._RE_EVENT_PATTERN.sub('', tag_with_all_attrs)
-
- def Extract(self, strip_js_only):
- """Extracts and saves the extracted registration forms.
-
- Iterates through all the HTML files.
-
- Args:
- strip_js_only: If True, only Javascript is stripped from the HTML content.
- Otherwise, all non-form elements are stripped.
- """
- pathname_pattern = os.path.join(self._input_dir, self._HTML_FILES_PATTERN)
- html_files = [f for f in glob.glob(pathname_pattern) if os.path.isfile(f)]
- for filename in html_files:
- self.logger.info('Stripping file "%s" ...', filename)
- with open(filename, 'U') as f:
- html_content = self._RE_TAG_WITH_EVENTS_PATTERN.sub(
- self._SubstituteAllEvents,
- self._RE_HREF_JS_PATTERN.sub(
- '', self._RE_SCRIPT_PATTERN.sub('', f.read())))
-
- form_filename = os.path.split(filename)[1] # Path dropped.
- form_filename = form_filename.replace(self._HTML_FILE_PREFIX, '', 1)
- (form_filename, extension) = os.path.splitext(form_filename)
- form_filename = (self._FORM_FILE_PREFIX + form_filename +
- '%s' + extension)
- form_filename = os.path.join(self._output_dir, form_filename)
- if strip_js_only:
- form_filename = form_filename % ''
- try:
- with open(form_filename, 'w') as f:
- f.write(html_content)
- except IOError as e:
- self.logger.error('Error: %s', e)
- continue
- else: # Remove all non form elements.
- match = self._RE_FORM_LOCATION_PATTERN.search(html_content)
- if match:
- form_location_comment = match.group() + os.linesep
- else:
- form_location_comment = ''
- forms_iterator = self._RE_FORM_PATTERN.finditer(html_content)
- for form_number, form_match in enumerate(forms_iterator, start=1):
- form_content = form_match.group()
- numbered_form_filename = form_filename % form_number
- try:
- with open(numbered_form_filename, 'w') as f:
- f.write(form_location_comment)
- f.write(form_content)
- except IOError as e:
- self.logger.error('Error: %s', e)
- continue
- self.logger.info('\tFile "%s" extracted SUCCESSFULLY!', filename)
-
-
-def main():
- # Command line options.
- parser = OptionParser()
- parser.add_option(
- '-l', '--log_level', metavar='LOG_LEVEL', default='error',
- help='LOG_LEVEL: debug, info, warning or error [default: %default]')
- parser.add_option(
- '-j', '--js', dest='js', action='store_true', default=False,
- help='Removes all javascript elements [default: %default]')
-
- (options, args) = parser.parse_args()
- options.log_level = options.log_level.upper()
- if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']:
- print 'Wrong log_level argument.'
- parser.print_help()
- sys.exit(1)
-
- options.log_level = getattr(logging, options.log_level)
- extractor = FormsExtractor(logging_level=options.log_level)
- extractor.Extract(options.js)
-
-
-if __name__ == '__main__':
- main()
+#!/usr/bin/env python +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +"""Extracts registration forms from the corresponding HTML files. + +Used for extracting forms within HTML files. This script is used in +conjunction with the webforms_aggregator.py script, which aggregates web pages +with fillable forms (i.e registration forms). + +The purpose of this script is to extract out all non-form elements that may be +causing parsing errors and timeout issues when running browser_tests. + +This script extracts all forms from a HTML file. +If there are multiple forms per downloaded site, multiple files are created +for each form. + +Used as a standalone script but assumes that it is run from the directory in +which it is checked into. + +Usage: forms_extractor.py [options] + +Options: + -l LOG_LEVEL, --log_level=LOG_LEVEL, + LOG_LEVEL: debug, info, warning or error [default: error] + -j, --js extracts javascript elements from web form. + -h, --help show this help message and exit +""" + +import glob +import logging +from optparse import OptionParser +import os +import re +import sys + + +class FormsExtractor(object): + """Extracts HTML files, leaving only registration forms from the HTML file.""" + _HTML_FILES_PATTERN = r'*.html' + _HTML_FILE_PREFIX = r'grabber-' + _FORM_FILE_PREFIX = r'grabber-stripped-' + + _REGISTRATION_PAGES_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill', + 'heuristics', 'input') + _EXTRACTED_FORMS_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill', + 'heuristics', 'input') + + logger = logging.getLogger(__name__) + log_handlers = {'StreamHandler': None} + + # This pattern is used for retrieving the form location comment located at the + # top of each downloaded HTML file indicating where the form originated from. + _RE_FORM_LOCATION_PATTERN = re.compile( + ur""" + <!--Form\s{1}Location: # Starting of form location comment. + .*? # Any characters (non-greedy). + --> # Ending of the form comment. + """, re.U | re.S | re.I | re.X) + + # This pattern is used for removing all script code. + _RE_SCRIPT_PATTERN = re.compile( + ur""" + <script # A new opening '<script' tag. + \b # The end of the word 'script'. + .*? # Any characters (non-greedy). + > # Ending of the (opening) tag: '>'. + .*? # Any characters (non-greedy) between the tags. + </script\s*> # The '</script>' closing tag. + """, re.U | re.S | re.I | re.X) + + # This pattern is used for removing all href js code. + _RE_HREF_JS_PATTERN = re.compile( + ur""" + \bhref # The word href and its beginning. + \s*=\s* # The '=' with all whitespace before and after it. + (?P<quote>[\'\"]) # A single or double quote which is captured. + \s*javascript\s*: # The word 'javascript:' with any whitespace possible. + .*? # Any characters (non-greedy) between the quotes. + \1 # The previously captured single or double quote. + """, re.U | re.S | re.I | re.X) + + _RE_EVENT_EXPR = ( + ur""" + \b # The beginning of a new word. + on\w+? # All words starting with 'on' (non-greedy) + # example: |onmouseover|. + \s*=\s* # The '=' with all whitespace before and after it. + (?P<quote>[\'\"]) # A captured single or double quote. + .*? # Any characters (non-greedy) between the quotes. + \1 # The previously captured single or double quote. + """) + + # This pattern is used for removing code with js events, such as |onload|. + # By adding the leading |ur'<[^<>]*?'| and the trailing |'ur'[^<>]*?>'| the + # pattern matches to strings such as '<tr class="nav" + # onmouseover="mOvr1(this);" onmouseout="mOut1(this);">' + _RE_TAG_WITH_EVENTS_PATTERN = re.compile( + ur""" + < # Matches character '<'. + [^<>]*? # Matches any characters except '<' and '>' (non-greedy).""" + + _RE_EVENT_EXPR + + ur""" + [^<>]*? # Matches any characters except '<' and '>' (non-greedy). + > # Matches character '>'. + """, re.U | re.S | re.I | re.X) + + # Adds whitespace chars at the end of the matched event. Also match trailing + # whitespaces for JS events. Do not match leading whitespace. + # For example: |< /form>| is invalid HTML and does not exist but |</form >| is + # considered valid HTML. + _RE_EVENT_PATTERN = re.compile( + _RE_EVENT_EXPR + ur'\s*', re.U | re.S | re.I | re.X) + + # This pattern is used for finding form elements. + _RE_FORM_PATTERN = re.compile( + ur""" + <form # A new opening '<form' tag. + \b # The end of the word 'form'. + .*? # Any characters (non-greedy). + > # Ending of the (opening) tag: '>'. + .*? # Any characters (non-greedy) between the tags. + </form\s*> # The '</form>' closing tag. + """, re.U | re.S | re.I | re.X) + + def __init__(self, input_dir=_REGISTRATION_PAGES_DIR, + output_dir=_EXTRACTED_FORMS_DIR, logging_level=None): + """Creates a FormsExtractor object. + + Args: + input_dir: the directory of HTML files. + output_dir: the directory where the registration form files will be + saved. + logging_level: verbosity level, default is None. + + Raises: + IOError exception if input directory doesn't exist. + """ + if logging_level: + if not self.log_handlers['StreamHandler']: + console = logging.StreamHandler() + console.setLevel(logging.DEBUG) + self.log_handlers['StreamHandler'] = console + self.logger.addHandler(console) + self.logger.setLevel(logging_level) + else: + if self.log_handlers['StreamHandler']: + self.logger.removeHandler(self.log_handlers['StreamHandler']) + self.log_handlers['StreamHandler'] = None + + self._input_dir = input_dir + self._output_dir = output_dir + if not os.path.isdir(self._input_dir): + error_msg = 'Directory "%s" doesn\'t exist.' % self._input_dir + self.logger.error('Error: %s', error_msg) + raise IOError(error_msg) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + self._form_location_comment = '' + + def _SubstituteAllEvents(self, matchobj): + """Remove all js events that are present as attributes within a tag. + + Args: + matchobj: A regexp |re.MatchObject| containing text that has at least one + event. Example: |<tr class="nav" onmouseover="mOvr1(this);" + onmouseout="mOut1(this);">|. + + Returns: + The text containing the tag with all the attributes except for the tags + with events. Example: |<tr class="nav">|. + """ + tag_with_all_attrs = matchobj.group(0) + return self._RE_EVENT_PATTERN.sub('', tag_with_all_attrs) + + def Extract(self, strip_js_only): + """Extracts and saves the extracted registration forms. + + Iterates through all the HTML files. + + Args: + strip_js_only: If True, only Javascript is stripped from the HTML content. + Otherwise, all non-form elements are stripped. + """ + pathname_pattern = os.path.join(self._input_dir, self._HTML_FILES_PATTERN) + html_files = [f for f in glob.glob(pathname_pattern) if os.path.isfile(f)] + for filename in html_files: + self.logger.info('Stripping file "%s" ...', filename) + with open(filename, 'U') as f: + html_content = self._RE_TAG_WITH_EVENTS_PATTERN.sub( + self._SubstituteAllEvents, + self._RE_HREF_JS_PATTERN.sub( + '', self._RE_SCRIPT_PATTERN.sub('', f.read()))) + + form_filename = os.path.split(filename)[1] # Path dropped. + form_filename = form_filename.replace(self._HTML_FILE_PREFIX, '', 1) + (form_filename, extension) = os.path.splitext(form_filename) + form_filename = (self._FORM_FILE_PREFIX + form_filename + + '%s' + extension) + form_filename = os.path.join(self._output_dir, form_filename) + if strip_js_only: + form_filename = form_filename % '' + try: + with open(form_filename, 'w') as f: + f.write(html_content) + except IOError as e: + self.logger.error('Error: %s', e) + continue + else: # Remove all non form elements. + match = self._RE_FORM_LOCATION_PATTERN.search(html_content) + if match: + form_location_comment = match.group() + os.linesep + else: + form_location_comment = '' + forms_iterator = self._RE_FORM_PATTERN.finditer(html_content) + for form_number, form_match in enumerate(forms_iterator, start=1): + form_content = form_match.group() + numbered_form_filename = form_filename % form_number + try: + with open(numbered_form_filename, 'w') as f: + f.write(form_location_comment) + f.write(form_content) + except IOError as e: + self.logger.error('Error: %s', e) + continue + self.logger.info('\tFile "%s" extracted SUCCESSFULLY!', filename) + + +def main(): + parser = OptionParser() + parser.add_option( + '-l', '--log_level', metavar='LOG_LEVEL', default='error', + help='LOG_LEVEL: debug, info, warning or error [default: %default]') + parser.add_option( + '-j', '--js', dest='js', action='store_true', default=False, + help='Removes all javascript elements [default: %default]') + + (options, args) = parser.parse_args() + options.log_level = options.log_level.upper() + if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: + print 'Wrong log_level argument.' + parser.print_help() + return 1 + + options.log_level = getattr(logging, options.log_level) + extractor = FormsExtractor(logging_level=options.log_level) + extractor.Extract(options.js) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) |