summaryrefslogtreecommitdiffstats
path: root/chrome/tools
diff options
context:
space:
mode:
authormaruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-11-27 20:56:51 +0000
committermaruel@chromium.org <maruel@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-11-27 20:56:51 +0000
commit2fac37585d3dfa0d7cf7a976698aae7627186573 (patch)
treed2f233a6afa5d8093f15f2f3c56cc4c009b05904 /chrome/tools
parent071302929ab813f647e51253af4e885b33eab463 (diff)
downloadchromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.zip
chromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.tar.gz
chromium_src-2fac37585d3dfa0d7cf7a976698aae7627186573.tar.bz2
Fix python scripts in src/chrome/
Make sure that: - shebang is only present for executable files - shebang is #!/usr/bin/env python - __main__ is only present for executable files - file's executable bit is coherent Also fix EOF LF to be only one. Minor python style fixes. TBR=nirnimesh@chromium.org BUG=105108 TEST= Review URL: http://codereview.chromium.org/8680018 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@111658 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/tools')
-rwxr-xr-x[-rw-r--r--]chrome/tools/automated_ui_test_tools/ui_action_generator.py11
-rwxr-xr-x[-rw-r--r--]chrome/tools/build/appid.py2
-rwxr-xr-x[-rw-r--r--]chrome/tools/build/generate_policy_source.py8
-rwxr-xr-xchrome/tools/build/win/create_installer_archive.py2
-rwxr-xr-xchrome/tools/build/win/dependencies.py31
-rwxr-xr-x[-rw-r--r--]chrome/tools/build/win/make_policy_zip.py6
-rwxr-xr-x[-rw-r--r--]chrome/tools/build/win/scan_server_dlls.py26
-rwxr-xr-xchrome/tools/build/win/sln_deps.py14
-rwxr-xr-xchrome/tools/build/win/sort_sln.py56
-rwxr-xr-xchrome/tools/check_grd_for_unused_strings.py13
-rwxr-xr-xchrome/tools/extract_actions.py6
-rwxr-xr-xchrome/tools/extract_histograms.py10
-rwxr-xr-xchrome/tools/history-viz.py186
-rwxr-xr-xchrome/tools/inconsistent-eol.py18
-rwxr-xr-xchrome/tools/process_dumps_linux.py14
-rwxr-xr-x[-rw-r--r--]chrome/tools/webforms_aggregator.py8
-rwxr-xr-x[-rw-r--r--]chrome/tools/webforms_aggregator_tests.py2
-rwxr-xr-x[-rw-r--r--]chrome/tools/webforms_aggregator_unittests.py2
-rwxr-xr-x[-rw-r--r--]chrome/tools/webforms_extractor.py507
19 files changed, 452 insertions, 470 deletions
diff --git a/chrome/tools/automated_ui_test_tools/ui_action_generator.py b/chrome/tools/automated_ui_test_tools/ui_action_generator.py
index 098a68d..b455554 100644..100755
--- a/chrome/tools/automated_ui_test_tools/ui_action_generator.py
+++ b/chrome/tools/automated_ui_test_tools/ui_action_generator.py
@@ -1,5 +1,4 @@
-#!/usr/bin/python
-
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -68,10 +67,7 @@ def CreateUIActionList(actions_per_command, num_commands, given_seed=None):
def ParseCommandLine():
- """Parses the command line.
-
- Returns:
- List of options and their values, and unparsed args.
+ """Returns the list of options and their values, and unparsed args.
"""
parser = optparse.OptionParser()
parser.add_option('-o', '--output', dest='output_file', type='string',
@@ -102,7 +98,8 @@ def main():
f.write(command_list)
f.close()
print command_list
+ return 0
if __name__ == '__main__':
- main()
+ sys.exit(main())
diff --git a/chrome/tools/build/appid.py b/chrome/tools/build/appid.py
index f471a4e..d052bc3 100644..100755
--- a/chrome/tools/build/appid.py
+++ b/chrome/tools/build/appid.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
diff --git a/chrome/tools/build/generate_policy_source.py b/chrome/tools/build/generate_policy_source.py
index ad234f2..e405713 100644..100755
--- a/chrome/tools/build/generate_policy_source.py
+++ b/chrome/tools/build/generate_policy_source.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -51,7 +51,7 @@ def main():
if len(args) != 3:
print "exactly platform, chromium_os flag and input file must be specified."
parser.print_help()
- sys.exit(2)
+ return 2
template_file_contents = _LoadJSONFile(args[2]);
if opts.header_path is not None:
_WritePolicyConstantHeader(template_file_contents, args, opts);
@@ -63,6 +63,7 @@ def main():
_WriteProtobuf(template_file_contents, args, opts.proto_path)
if opts.decoder_path is not None:
_WriteProtobufParser(template_file_contents, args, opts.decoder_path)
+ return 0
#------------------ shared helpers ---------------------------------#
@@ -462,6 +463,5 @@ def _WriteProtobufParser(template_file_contents, args, outfilepath):
f.write(CPP_FOOT)
-#------------------ main() -----------------------------------------#
if __name__ == '__main__':
- main();
+ sys.exit(main())
diff --git a/chrome/tools/build/win/create_installer_archive.py b/chrome/tools/build/win/create_installer_archive.py
index 2b59fa0..5a4e96c 100755
--- a/chrome/tools/build/win/create_installer_archive.py
+++ b/chrome/tools/build/win/create_installer_archive.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
diff --git a/chrome/tools/build/win/dependencies.py b/chrome/tools/build/win/dependencies.py
index 353c89c..01d9254 100755
--- a/chrome/tools/build/win/dependencies.py
+++ b/chrome/tools/build/win/dependencies.py
@@ -1,5 +1,5 @@
-#!/usr/bin/python
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -40,6 +40,7 @@ def RunSystemCommand(cmd):
except:
raise Error("Failed to execute: " + cmd)
+
def RunDumpbin(binary_file):
"""Runs dumpbin and parses its output.
@@ -196,8 +197,19 @@ def VerifyDependents(pe_name, dependents, delay_loaded, list_file, verbose):
return max(deps_result, delayed_result)
-def main(options, args):
+def main():
# PE means portable executable. It's any .DLL, .EXE, .SYS, .AX, etc.
+ usage = "usage: %prog [options] input output"
+ option_parser = optparse.OptionParser(usage=usage)
+ option_parser.add_option("-d",
+ "--debug",
+ dest="debug",
+ action="store_true",
+ default=False,
+ help="Display debugging information")
+ options, args = option_parser.parse_args()
+ if len(args) != 2:
+ option_parser.error("Incorrect number of arguments")
pe_name = args[0]
deps_file = args[1]
dependents, delay_loaded = RunDumpbin(pe_name)
@@ -211,15 +223,4 @@ def main(options, args):
if '__main__' == __name__:
- usage = "usage: %prog [options] input output"
- option_parser = optparse.OptionParser(usage = usage)
- option_parser.add_option("-d",
- "--debug",
- dest="debug",
- action="store_true",
- default=False,
- help="Display debugging information")
- options, args = option_parser.parse_args()
- if len(args) != 2:
- option_parser.error("Incorrect number of arguments")
- sys.exit(main(options, args))
+ sys.exit(main())
diff --git a/chrome/tools/build/win/make_policy_zip.py b/chrome/tools/build/win/make_policy_zip.py
index 60037c3..0822483 100644..100755
--- a/chrome/tools/build/win/make_policy_zip.py
+++ b/chrome/tools/build/win/make_policy_zip.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -12,6 +13,7 @@ import os
import sys
import zipfile
+
def add_files_to_zip(zip_file, base_dir, file_list):
"""Pack a list of files into a zip archive, that is already
opened for writing.
@@ -26,6 +28,7 @@ def add_files_to_zip(zip_file, base_dir, file_list):
zip_file.write(base_dir + file_path, file_path)
return 0
+
def get_grd_outputs(grit_cmd, grit_defines, grd_file, grd_strip_path_prefix):
grit_path = os.path.join(os.getcwd(), os.path.dirname(grit_cmd))
sys.path.append(grit_path)
@@ -37,6 +40,7 @@ def get_grd_outputs(grit_cmd, grit_defines, grd_file, grd_strip_path_prefix):
result.append(item[len(grd_strip_path_prefix):])
return result
+
def main(argv):
"""Pack a list of files into a zip archive.
@@ -73,6 +77,6 @@ def main(argv):
finally:
zip_file.close()
+
if '__main__' == __name__:
sys.exit(main(sys.argv))
-
diff --git a/chrome/tools/build/win/scan_server_dlls.py b/chrome/tools/build/win/scan_server_dlls.py
index 2adf8d6..68ebb93 100644..100755
--- a/chrome/tools/build/win/scan_server_dlls.py
+++ b/chrome/tools/build/win/scan_server_dlls.py
@@ -1,12 +1,11 @@
-#!/usr/bin/python
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script used to scan for server DLLs at build time and build a header
included by setup.exe. This header contains an array of the names of
the DLLs that need registering at install time.
-
"""
import ConfigParser
@@ -128,19 +127,11 @@ def RunSystemCommand(cmd):
raise "Error while running cmd: %s" % cmd
-def main(options):
+def main():
"""Main method that reads input file, scans <build_output>\servers for
matches to files described in the input file. A header file for the
setup project is then generated.
"""
- config = Readconfig(options.output_dir, options.input_file)
- registered_dll_list = ScanServerDlls(config, options.distribution,
- options.output_dir)
- CreateRegisteredDllIncludeFile(registered_dll_list,
- options.header_output_dir)
-
-
-if '__main__' == __name__:
option_parser = optparse.OptionParser()
option_parser.add_option('-o', '--output_dir', help='Build Output directory')
option_parser.add_option('-x', '--header_output_dir',
@@ -150,4 +141,13 @@ if '__main__' == __name__:
help='Name of Chromium Distribution. Optional.')
options, args = option_parser.parse_args()
- sys.exit(main(options))
+ config = Readconfig(options.output_dir, options.input_file)
+ registered_dll_list = ScanServerDlls(config, options.distribution,
+ options.output_dir)
+ CreateRegisteredDllIncludeFile(registered_dll_list,
+ options.header_output_dir)
+ return 0
+
+
+if '__main__' == __name__:
+ sys.exit(main())
diff --git a/chrome/tools/build/win/sln_deps.py b/chrome/tools/build/win/sln_deps.py
index ef7803e..ebb371a 100755
--- a/chrome/tools/build/win/sln_deps.py
+++ b/chrome/tools/build/win/sln_deps.py
@@ -1,5 +1,5 @@
-#!/usr/bin/python
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -63,7 +63,7 @@ def ScanSlnFile(filename):
return projects
-def main(filename, project_to_scan, reverse):
+def sln_deps(filename, project_to_scan, reverse):
"""Displays the project's dependencies."""
project_to_scan = project_to_scan.lower()
@@ -91,9 +91,10 @@ def main(filename, project_to_scan, reverse):
deps_name = [projects[d].name for d in project.deps]
print "\n".join(str(" " + name) for name in sorted(deps_name,
key=str.lower))
+ return 0
-if __name__ == '__main__':
+def main():
usage = "usage: %prog [options] solution [project]"
description = ("Display the dependencies of a project in human readable"
@@ -116,5 +117,8 @@ if __name__ == '__main__':
project_to_scan = ""
if len(args) == 2:
project_to_scan = args[1]
- main(args[0], project_to_scan, options.reverse)
+ return sln_deps(args[0], project_to_scan, options.reverse)
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chrome/tools/build/win/sort_sln.py b/chrome/tools/build/win/sort_sln.py
deleted file mode 100755
index ea88ce4..0000000
--- a/chrome/tools/build/win/sort_sln.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/python
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import sys
-
-if len(sys.argv) != 2:
- print """Usage: sort_sln.py <SOLUTIONNAME>.sln
-to sort the solution file to a normalized scheme. Do this before checking in
-changes to a solution file to avoid having a lot of unnecessary diffs."""
- sys.exit(1)
-
-filename = sys.argv[1]
-print "Sorting " + filename;
-
-try:
- sln = open(filename, "r");
-except IOError:
- print "Unable to open " + filename + " for reading."
- sys.exit(1)
-
-output = ""
-seclines = None
-while 1:
- line = sln.readline()
- if not line:
- break
-
- if seclines is not None:
- # Process the end of a section, dump the sorted lines
- if line.lstrip().startswith('End'):
- output = output + ''.join(sorted(seclines))
- seclines = None
- # Process within a section
- else:
- seclines.append(line)
- continue
-
- # Process the start of a section
- if (line.lstrip().startswith('GlobalSection') or
- line.lstrip().startswith('ProjectSection')):
- if seclines: raise Exception('Already in a section')
- seclines = []
-
- output = output + line
-
-sln.close()
-try:
- sln = open(filename, "w")
- sln.write(output)
-except IOError:
- print "Unable to write to " + filename
- sys.exit(1);
-print "Done."
-
diff --git a/chrome/tools/check_grd_for_unused_strings.py b/chrome/tools/check_grd_for_unused_strings.py
index 3bc57d7..b0f8cb8 100755
--- a/chrome/tools/check_grd_for_unused_strings.py
+++ b/chrome/tools/check_grd_for_unused_strings.py
@@ -1,6 +1,5 @@
-#!/usr/bin/python
-
-# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -111,7 +110,7 @@ def CheckForUnusedGrdIDsInSources(grd_files, src_dirs):
return 0
-if __name__ == '__main__':
+def main():
# script lives in src/chrome/tools
chrome_tools_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
src_dir = os.path.dirname(os.path.dirname(chrome_tools_dir))
@@ -162,4 +161,8 @@ if __name__ == '__main__':
os.path.join(src_dir, 'third_party', 'mozilla_security_manager'),
]
- sys.exit(CheckForUnusedGrdIDsInSources(grd_files, src_dirs))
+ return CheckForUnusedGrdIDsInSources(grd_files, src_dirs)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chrome/tools/extract_actions.py b/chrome/tools/extract_actions.py
index 650f6e4..09bab23 100755
--- a/chrome/tools/extract_actions.py
+++ b/chrome/tools/extract_actions.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -439,6 +439,8 @@ def main(argv):
if hash_output:
print "Done. Do not forget to add chromeactions.txt to your changelist"
+ return 0
+
if '__main__' == __name__:
- main(sys.argv)
+ sys.exit(main(sys.argv))
diff --git a/chrome/tools/extract_histograms.py b/chrome/tools/extract_histograms.py
index 8c002f3..82fc9e9 100755
--- a/chrome/tools/extract_histograms.py
+++ b/chrome/tools/extract_histograms.py
@@ -1,5 +1,5 @@
-#!/usr/bin/python
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -35,6 +35,7 @@ def GrepForHistograms(path, histograms):
if match:
histograms.add(match.group(1))
+
def WalkDirectory(root_path, histograms):
for path, dirs, files in os.walk(root_path):
if '.svn' in dirs:
@@ -44,6 +45,7 @@ def WalkDirectory(root_path, histograms):
if ext == '.cc':
GrepForHistograms(os.path.join(path, file), histograms)
+
def main(argv):
histograms = set()
@@ -53,6 +55,8 @@ def main(argv):
# Print out the histograms as a sorted list.
for histogram in sorted(histograms):
print histogram
+ return 0
+
if '__main__' == __name__:
- main(sys.argv)
+ sys.exit(main(sys.argv))
diff --git a/chrome/tools/history-viz.py b/chrome/tools/history-viz.py
index 6f82126..fccbb31 100755
--- a/chrome/tools/history-viz.py
+++ b/chrome/tools/history-viz.py
@@ -1,5 +1,5 @@
-#!/usr/bin/python
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -17,7 +17,18 @@ import subprocess
import sys
import urlparse
-class URL:
+
+# Some transition types, copied from page_transition_types.h.
+TRANS_TYPES = {
+ 0: 'link',
+ 1: 'typed',
+ 2: 'most-visited',
+ 3: 'auto subframe',
+ 7: 'form',
+}
+
+
+class URL(object):
"""Represents a broken-down URL from our most visited database."""
def __init__(self, id, url):
@@ -67,7 +78,8 @@ class URL:
lines.append(line)
return '\n'.join(lines)
-class Edge:
+
+class Edge(object):
"""Represents an edge in the history graph, connecting two pages.
If a link is traversed twice, it is one Edge with two entries in
@@ -97,6 +109,7 @@ class Edge:
# edge['chain'] = chain
return all
+
def ClusterBy(objs, pred):
"""Group a list of objects by a predicate.
@@ -109,12 +122,14 @@ def ClusterBy(objs, pred):
clusters[cluster].append(obj)
return clusters
-def EscapeDot(str):
+
+def EscapeDot(string):
"""Escape a string suitable for embedding in a graphviz graph."""
# TODO(evanm): this is likely not sufficient.
- return str.replace('\n', '\\n')
+ return string.replace('\n', '\\n')
+
-class SQLite:
+class SQLite(object):
"""Trivial interface to executing SQLite queries.
Spawns a new process with each call."""
def __init__(self, file=None):
@@ -132,6 +147,7 @@ class SQLite:
row = line.strip().split('\t')
yield row
+
def LoadHistory(filename):
db = SQLite(filename)
@@ -157,85 +173,81 @@ def LoadHistory(filename):
return urls, edges
-# Some transition types, copied from page_transition_types.h.
-TRANS_TYPES = {
- 0: 'link',
- 1: 'typed',
- 2: 'most-visited',
- 3: 'auto subframe',
- 7: 'form',
-}
-urls, edges = LoadHistory(sys.argv[1])
-
-print 'digraph G {'
-print ' graph [rankdir=LR]' # Display left to right.
-print ' node [shape=box]' # Display nodes as boxes.
-print ' subgraph { rank=source; 0 [label="start"] }'
-
-# Output all the nodes within graph clusters.
-hosts = ClusterBy(urls.values(), lambda url: url.host)
-for i, (host, urls) in enumerate(hosts.items()):
- # Cluster all URLs under this host if it has more than one entry.
- host_clustered = len(urls) > 1
- if host_clustered:
- print 'subgraph clusterhost%d {' % i
- print ' label="%s"' % host
- paths = ClusterBy(urls, lambda url: url.path)
- for j, (path, urls) in enumerate(paths.items()):
- # Cluster all URLs under this host if it has more than one entry.
- path_clustered = host_clustered and len(urls) > 1
- if path_clustered:
- print ' subgraph cluster%d%d {' % (i, j)
- print ' label="%s"' % path
- for url in urls:
- if url.id == '0': continue # We already output the special start node.
- pretty = url.PrettyPrint(include_host=not host_clustered,
- include_path=not path_clustered)
- print ' %s [label="%s"]' % (url.id, EscapeDot(pretty))
- if path_clustered:
- print ' }'
- if host_clustered:
- print '}'
-
-# Output all the edges between nodes.
-for src, dsts in edges.items():
- for dst, edge in dsts.items():
- # Gather up all the transitions into the label.
- label = [] # Label for the edge.
- transitions = edge.Transitions()
- for trans, count in transitions.items():
- text = ''
- if count > 1:
- text = '%dx ' % count
- base_type = trans & 0xFF
- redir = (trans & 0xC0000000) != 0
- start = (trans & 0x10000000) != 0
- end = (trans & 0x20000000) != 0
- if start or end:
- if start:
- text += '<'
- if end:
- text += '>'
- text += ' '
- if redir:
- text += 'R '
- text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)
- label.append(text)
- if len(label) == 0:
- continue
-
- edgeattrs = [] # Graphviz attributes for the edge.
- # If the edge is from the start and the transitions are fishy, make it
- # display as a dotted line.
- if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):
- edgeattrs.append('style=dashed')
- if len(label) > 0:
- edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))
-
- out = '%s -> %s' % (src, dst)
- if len(edgeattrs) > 0:
- out += ' [%s]' % ','.join(edgeattrs)
- print out
-print '}'
+def main():
+ urls, edges = LoadHistory(sys.argv[1])
+ print 'digraph G {'
+ print ' graph [rankdir=LR]' # Display left to right.
+ print ' node [shape=box]' # Display nodes as boxes.
+ print ' subgraph { rank=source; 0 [label="start"] }'
+ # Output all the nodes within graph clusters.
+ hosts = ClusterBy(urls.values(), lambda url: url.host)
+ for i, (host, urls) in enumerate(hosts.items()):
+ # Cluster all URLs under this host if it has more than one entry.
+ host_clustered = len(urls) > 1
+ if host_clustered:
+ print 'subgraph clusterhost%d {' % i
+ print ' label="%s"' % host
+ paths = ClusterBy(urls, lambda url: url.path)
+ for j, (path, urls) in enumerate(paths.items()):
+ # Cluster all URLs under this host if it has more than one entry.
+ path_clustered = host_clustered and len(urls) > 1
+ if path_clustered:
+ print ' subgraph cluster%d%d {' % (i, j)
+ print ' label="%s"' % path
+ for url in urls:
+ if url.id == '0': continue # We already output the special start node.
+ pretty = url.PrettyPrint(include_host=not host_clustered,
+ include_path=not path_clustered)
+ print ' %s [label="%s"]' % (url.id, EscapeDot(pretty))
+ if path_clustered:
+ print ' }'
+ if host_clustered:
+ print '}'
+
+ # Output all the edges between nodes.
+ for src, dsts in edges.items():
+ for dst, edge in dsts.items():
+ # Gather up all the transitions into the label.
+ label = [] # Label for the edge.
+ transitions = edge.Transitions()
+ for trans, count in transitions.items():
+ text = ''
+ if count > 1:
+ text = '%dx ' % count
+ base_type = trans & 0xFF
+ redir = (trans & 0xC0000000) != 0
+ start = (trans & 0x10000000) != 0
+ end = (trans & 0x20000000) != 0
+ if start or end:
+ if start:
+ text += '<'
+ if end:
+ text += '>'
+ text += ' '
+ if redir:
+ text += 'R '
+ text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)
+ label.append(text)
+ if len(label) == 0:
+ continue
+
+ edgeattrs = [] # Graphviz attributes for the edge.
+ # If the edge is from the start and the transitions are fishy, make it
+ # display as a dotted line.
+ if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):
+ edgeattrs.append('style=dashed')
+ if len(label) > 0:
+ edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))
+
+ out = '%s -> %s' % (src, dst)
+ if len(edgeattrs) > 0:
+ out += ' [%s]' % ','.join(edgeattrs)
+ print out
+ print '}'
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chrome/tools/inconsistent-eol.py b/chrome/tools/inconsistent-eol.py
index 4ab3596..ef25245 100755
--- a/chrome/tools/inconsistent-eol.py
+++ b/chrome/tools/inconsistent-eol.py
@@ -1,5 +1,5 @@
-#!/usr/bin/python
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -38,6 +38,7 @@ def CountChars(text, str):
logging.debug(len(split) - 1)
return len(split) - 1
+
def PrevailingEOLName(crlf, cr, lf):
"""Describe the most common line ending.
@@ -56,6 +57,7 @@ def PrevailingEOLName(crlf, cr, lf):
return 'crlf'
return 'lf'
+
def FixEndings(file, crlf, cr, lf):
"""Change the file's line endings to CRLF or LF, whichever is more common."""
most = max(crlf, cr, lf)
@@ -99,7 +101,8 @@ def ProcessFiles(filelist):
print '%s: mostly %s' % (filename, PrevailingEOLName(crlf, cr, lf))
FixEndings(filename, crlf, cr, lf)
-def main(options, args):
+
+def process(options, args):
"""Process the files."""
if not args or len(args) < 1:
raise Error('No files given.')
@@ -111,8 +114,10 @@ def main(options, args):
else:
filelist = args
ProcessFiles(filelist)
+ return 0
-if '__main__' == __name__:
+
+def main():
if DEBUGGING:
debug_level = logging.DEBUG
else:
@@ -131,5 +136,8 @@ if '__main__' == __name__:
default=False,
help="Force any files with CRLF to LF instead.")
options, args = option_parser.parse_args()
+ return process(options, args)
+
- sys.exit(main(options, args))
+if '__main__' == __name__:
+ sys.exit(main())
diff --git a/chrome/tools/process_dumps_linux.py b/chrome/tools/process_dumps_linux.py
index 3e3bf3e..1f0ba9d 100755
--- a/chrome/tools/process_dumps_linux.py
+++ b/chrome/tools/process_dumps_linux.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -265,7 +265,9 @@ def main_linux(options, args):
return 0
-if '__main__' == __name__:
+def main():
+ if not sys.platform.startswith('linux'):
+ return 1
parser = optparse.OptionParser()
parser.add_option('', '--processor-dir', type='string', default='',
help='The directory where the processor is installed. '
@@ -291,8 +293,8 @@ if '__main__' == __name__:
'Default: chrome')
(options, args) = parser.parse_args()
+ return main_linux(options, args)
- if sys.platform.startswith('linux'):
- sys.exit(main_linux(options, args))
- else:
- sys.exit(1)
+
+if '__main__' == __name__:
+ sys.exit(main())
diff --git a/chrome/tools/webforms_aggregator.py b/chrome/tools/webforms_aggregator.py
index 3d5327b..16e5273 100644..100755
--- a/chrome/tools/webforms_aggregator.py
+++ b/chrome/tools/webforms_aggregator.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -722,7 +722,6 @@ class ThreadedCrawler(object):
def main():
- # Command line options.
usage = 'usage: %prog [options] single_url_or_urls_filename'
parser = optparse.OptionParser(usage)
parser.add_option(
@@ -734,7 +733,7 @@ def main():
if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']:
print 'Wrong log_level argument.'
parser.print_help()
- sys.exit(1)
+ return 1
options.log_level = getattr(logging, options.log_level)
if len(args) != 1:
@@ -762,7 +761,8 @@ def main():
logger.info('Started at: %s\n', t0)
logger.info('Ended at: %s\n', t1)
logger.info('Total execution time: %s\n', delta_t)
+ return 0
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/chrome/tools/webforms_aggregator_tests.py b/chrome/tools/webforms_aggregator_tests.py
index fc12dc3..2eb26bb 100644..100755
--- a/chrome/tools/webforms_aggregator_tests.py
+++ b/chrome/tools/webforms_aggregator_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
diff --git a/chrome/tools/webforms_aggregator_unittests.py b/chrome/tools/webforms_aggregator_unittests.py
index 68169eb..00ea2bd 100644..100755
--- a/chrome/tools/webforms_aggregator_unittests.py
+++ b/chrome/tools/webforms_aggregator_unittests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
diff --git a/chrome/tools/webforms_extractor.py b/chrome/tools/webforms_extractor.py
index 71fed7c..1dd1d95 100644..100755
--- a/chrome/tools/webforms_extractor.py
+++ b/chrome/tools/webforms_extractor.py
@@ -1,253 +1,254 @@
-#!/usr/bin/python
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be found
-# in the LICENSE file.
-
-"""Extracts registration forms from the corresponding HTML files.
-
-Used for extracting forms within HTML files. This script is used in
-conjunction with the webforms_aggregator.py script, which aggregates web pages
-with fillable forms (i.e registration forms).
-
-The purpose of this script is to extract out all non-form elements that may be
-causing parsing errors and timeout issues when running browser_tests.
-
-This script extracts all forms from a HTML file.
-If there are multiple forms per downloaded site, multiple files are created
-for each form.
-
-Used as a standalone script but assumes that it is run from the directory in
-which it is checked into.
-
-Usage: forms_extractor.py [options]
-
-Options:
- -l LOG_LEVEL, --log_level=LOG_LEVEL,
- LOG_LEVEL: debug, info, warning or error [default: error]
- -j, --js extracts javascript elements from web form.
- -h, --help show this help message and exit
-"""
-
-import glob
-import logging
-from optparse import OptionParser
-import os
-import re
-import sys
-
-
-class FormsExtractor(object):
- """Extracts HTML files, leaving only registration forms from the HTML file."""
- _HTML_FILES_PATTERN = r'*.html'
- _HTML_FILE_PREFIX = r'grabber-'
- _FORM_FILE_PREFIX = r'grabber-stripped-'
-
- _REGISTRATION_PAGES_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
- 'heuristics', 'input')
- _EXTRACTED_FORMS_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
- 'heuristics', 'input')
-
- logger = logging.getLogger(__name__)
- log_handlers = {'StreamHandler': None}
-
- # This pattern is used for retrieving the form location comment located at the
- # top of each downloaded HTML file indicating where the form originated from.
- _RE_FORM_LOCATION_PATTERN = re.compile(
- ur"""
- <!--Form\s{1}Location: # Starting of form location comment.
- .*? # Any characters (non-greedy).
- --> # Ending of the form comment.
- """, re.U | re.S | re.I | re.X)
-
- # This pattern is used for removing all script code.
- _RE_SCRIPT_PATTERN = re.compile(
- ur"""
- <script # A new opening '<script' tag.
- \b # The end of the word 'script'.
- .*? # Any characters (non-greedy).
- > # Ending of the (opening) tag: '>'.
- .*? # Any characters (non-greedy) between the tags.
- </script\s*> # The '</script>' closing tag.
- """, re.U | re.S | re.I | re.X)
-
- # This pattern is used for removing all href js code.
- _RE_HREF_JS_PATTERN = re.compile(
- ur"""
- \bhref # The word href and its beginning.
- \s*=\s* # The '=' with all whitespace before and after it.
- (?P<quote>[\'\"]) # A single or double quote which is captured.
- \s*javascript\s*: # The word 'javascript:' with any whitespace possible.
- .*? # Any characters (non-greedy) between the quotes.
- \1 # The previously captured single or double quote.
- """, re.U | re.S | re.I | re.X)
-
- _RE_EVENT_EXPR = (
- ur"""
- \b # The beginning of a new word.
- on\w+? # All words starting with 'on' (non-greedy)
- # example: |onmouseover|.
- \s*=\s* # The '=' with all whitespace before and after it.
- (?P<quote>[\'\"]) # A captured single or double quote.
- .*? # Any characters (non-greedy) between the quotes.
- \1 # The previously captured single or double quote.
- """)
-
- # This pattern is used for removing code with js events, such as |onload|.
- # By adding the leading |ur'<[^<>]*?'| and the trailing |'ur'[^<>]*?>'| the
- # pattern matches to strings such as '<tr class="nav"
- # onmouseover="mOvr1(this);" onmouseout="mOut1(this);">'
- _RE_TAG_WITH_EVENTS_PATTERN = re.compile(
- ur"""
- < # Matches character '<'.
- [^<>]*? # Matches any characters except '<' and '>' (non-greedy).""" +
- _RE_EVENT_EXPR +
- ur"""
- [^<>]*? # Matches any characters except '<' and '>' (non-greedy).
- > # Matches character '>'.
- """, re.U | re.S | re.I | re.X)
-
- # Adds whitespace chars at the end of the matched event. Also match trailing
- # whitespaces for JS events. Do not match leading whitespace.
- # For example: |< /form>| is invalid HTML and does not exist but |</form >| is
- # considered valid HTML.
- _RE_EVENT_PATTERN = re.compile(
- _RE_EVENT_EXPR + ur'\s*', re.U | re.S | re.I | re.X)
-
- # This pattern is used for finding form elements.
- _RE_FORM_PATTERN = re.compile(
- ur"""
- <form # A new opening '<form' tag.
- \b # The end of the word 'form'.
- .*? # Any characters (non-greedy).
- > # Ending of the (opening) tag: '>'.
- .*? # Any characters (non-greedy) between the tags.
- </form\s*> # The '</form>' closing tag.
- """, re.U | re.S | re.I | re.X)
-
- def __init__(self, input_dir=_REGISTRATION_PAGES_DIR,
- output_dir=_EXTRACTED_FORMS_DIR, logging_level=None):
- """Creates a FormsExtractor object.
-
- Args:
- input_dir: the directory of HTML files.
- output_dir: the directory where the registration form files will be
- saved.
- logging_level: verbosity level, default is None.
-
- Raises:
- IOError exception if input directory doesn't exist.
- """
- if logging_level:
- if not self.log_handlers['StreamHandler']:
- console = logging.StreamHandler()
- console.setLevel(logging.DEBUG)
- self.log_handlers['StreamHandler'] = console
- self.logger.addHandler(console)
- self.logger.setLevel(logging_level)
- else:
- if self.log_handlers['StreamHandler']:
- self.logger.removeHandler(self.log_handlers['StreamHandler'])
- self.log_handlers['StreamHandler'] = None
-
- self._input_dir = input_dir
- self._output_dir = output_dir
- if not os.path.isdir(self._input_dir):
- error_msg = 'Directory "%s" doesn\'t exist.' % self._input_dir
- self.logger.error('Error: %s', error_msg)
- raise IOError(error_msg)
- if not os.path.isdir(output_dir):
- os.makedirs(output_dir)
- self._form_location_comment = ''
-
- def _SubstituteAllEvents(self, matchobj):
- """Remove all js events that are present as attributes within a tag.
-
- Args:
- matchobj: A regexp |re.MatchObject| containing text that has at least one
- event. Example: |<tr class="nav" onmouseover="mOvr1(this);"
- onmouseout="mOut1(this);">|.
-
- Returns:
- The text containing the tag with all the attributes except for the tags
- with events. Example: |<tr class="nav">|.
- """
- tag_with_all_attrs = matchobj.group(0)
- return self._RE_EVENT_PATTERN.sub('', tag_with_all_attrs)
-
- def Extract(self, strip_js_only):
- """Extracts and saves the extracted registration forms.
-
- Iterates through all the HTML files.
-
- Args:
- strip_js_only: If True, only Javascript is stripped from the HTML content.
- Otherwise, all non-form elements are stripped.
- """
- pathname_pattern = os.path.join(self._input_dir, self._HTML_FILES_PATTERN)
- html_files = [f for f in glob.glob(pathname_pattern) if os.path.isfile(f)]
- for filename in html_files:
- self.logger.info('Stripping file "%s" ...', filename)
- with open(filename, 'U') as f:
- html_content = self._RE_TAG_WITH_EVENTS_PATTERN.sub(
- self._SubstituteAllEvents,
- self._RE_HREF_JS_PATTERN.sub(
- '', self._RE_SCRIPT_PATTERN.sub('', f.read())))
-
- form_filename = os.path.split(filename)[1] # Path dropped.
- form_filename = form_filename.replace(self._HTML_FILE_PREFIX, '', 1)
- (form_filename, extension) = os.path.splitext(form_filename)
- form_filename = (self._FORM_FILE_PREFIX + form_filename +
- '%s' + extension)
- form_filename = os.path.join(self._output_dir, form_filename)
- if strip_js_only:
- form_filename = form_filename % ''
- try:
- with open(form_filename, 'w') as f:
- f.write(html_content)
- except IOError as e:
- self.logger.error('Error: %s', e)
- continue
- else: # Remove all non form elements.
- match = self._RE_FORM_LOCATION_PATTERN.search(html_content)
- if match:
- form_location_comment = match.group() + os.linesep
- else:
- form_location_comment = ''
- forms_iterator = self._RE_FORM_PATTERN.finditer(html_content)
- for form_number, form_match in enumerate(forms_iterator, start=1):
- form_content = form_match.group()
- numbered_form_filename = form_filename % form_number
- try:
- with open(numbered_form_filename, 'w') as f:
- f.write(form_location_comment)
- f.write(form_content)
- except IOError as e:
- self.logger.error('Error: %s', e)
- continue
- self.logger.info('\tFile "%s" extracted SUCCESSFULLY!', filename)
-
-
-def main():
- # Command line options.
- parser = OptionParser()
- parser.add_option(
- '-l', '--log_level', metavar='LOG_LEVEL', default='error',
- help='LOG_LEVEL: debug, info, warning or error [default: %default]')
- parser.add_option(
- '-j', '--js', dest='js', action='store_true', default=False,
- help='Removes all javascript elements [default: %default]')
-
- (options, args) = parser.parse_args()
- options.log_level = options.log_level.upper()
- if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']:
- print 'Wrong log_level argument.'
- parser.print_help()
- sys.exit(1)
-
- options.log_level = getattr(logging, options.log_level)
- extractor = FormsExtractor(logging_level=options.log_level)
- extractor.Extract(options.js)
-
-
-if __name__ == '__main__':
- main()
+#!/usr/bin/env python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Extracts registration forms from the corresponding HTML files.
+
+Used for extracting forms within HTML files. This script is used in
+conjunction with the webforms_aggregator.py script, which aggregates web pages
+with fillable forms (i.e registration forms).
+
+The purpose of this script is to extract out all non-form elements that may be
+causing parsing errors and timeout issues when running browser_tests.
+
+This script extracts all forms from a HTML file.
+If there are multiple forms per downloaded site, multiple files are created
+for each form.
+
+Used as a standalone script but assumes that it is run from the directory in
+which it is checked into.
+
+Usage: forms_extractor.py [options]
+
+Options:
+ -l LOG_LEVEL, --log_level=LOG_LEVEL,
+ LOG_LEVEL: debug, info, warning or error [default: error]
+ -j, --js extracts javascript elements from web form.
+ -h, --help show this help message and exit
+"""
+
+import glob
+import logging
+from optparse import OptionParser
+import os
+import re
+import sys
+
+
+class FormsExtractor(object):
+ """Extracts HTML files, leaving only registration forms from the HTML file."""
+ _HTML_FILES_PATTERN = r'*.html'
+ _HTML_FILE_PREFIX = r'grabber-'
+ _FORM_FILE_PREFIX = r'grabber-stripped-'
+
+ _REGISTRATION_PAGES_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
+ 'heuristics', 'input')
+ _EXTRACTED_FORMS_DIR = os.path.join(os.pardir, 'test', 'data', 'autofill',
+ 'heuristics', 'input')
+
+ logger = logging.getLogger(__name__)
+ log_handlers = {'StreamHandler': None}
+
+ # This pattern is used for retrieving the form location comment located at the
+ # top of each downloaded HTML file indicating where the form originated from.
+ _RE_FORM_LOCATION_PATTERN = re.compile(
+ ur"""
+ <!--Form\s{1}Location: # Starting of form location comment.
+ .*? # Any characters (non-greedy).
+ --> # Ending of the form comment.
+ """, re.U | re.S | re.I | re.X)
+
+ # This pattern is used for removing all script code.
+ _RE_SCRIPT_PATTERN = re.compile(
+ ur"""
+ <script # A new opening '<script' tag.
+ \b # The end of the word 'script'.
+ .*? # Any characters (non-greedy).
+ > # Ending of the (opening) tag: '>'.
+ .*? # Any characters (non-greedy) between the tags.
+ </script\s*> # The '</script>' closing tag.
+ """, re.U | re.S | re.I | re.X)
+
+ # This pattern is used for removing all href js code.
+ _RE_HREF_JS_PATTERN = re.compile(
+ ur"""
+ \bhref # The word href and its beginning.
+ \s*=\s* # The '=' with all whitespace before and after it.
+ (?P<quote>[\'\"]) # A single or double quote which is captured.
+ \s*javascript\s*: # The word 'javascript:' with any whitespace possible.
+ .*? # Any characters (non-greedy) between the quotes.
+ \1 # The previously captured single or double quote.
+ """, re.U | re.S | re.I | re.X)
+
+ _RE_EVENT_EXPR = (
+ ur"""
+ \b # The beginning of a new word.
+ on\w+? # All words starting with 'on' (non-greedy)
+ # example: |onmouseover|.
+ \s*=\s* # The '=' with all whitespace before and after it.
+ (?P<quote>[\'\"]) # A captured single or double quote.
+ .*? # Any characters (non-greedy) between the quotes.
+ \1 # The previously captured single or double quote.
+ """)
+
+ # This pattern is used for removing code with js events, such as |onload|.
+ # By adding the leading |ur'<[^<>]*?'| and the trailing |'ur'[^<>]*?>'| the
+ # pattern matches to strings such as '<tr class="nav"
+ # onmouseover="mOvr1(this);" onmouseout="mOut1(this);">'
+ _RE_TAG_WITH_EVENTS_PATTERN = re.compile(
+ ur"""
+ < # Matches character '<'.
+ [^<>]*? # Matches any characters except '<' and '>' (non-greedy).""" +
+ _RE_EVENT_EXPR +
+ ur"""
+ [^<>]*? # Matches any characters except '<' and '>' (non-greedy).
+ > # Matches character '>'.
+ """, re.U | re.S | re.I | re.X)
+
+ # Adds whitespace chars at the end of the matched event. Also match trailing
+ # whitespaces for JS events. Do not match leading whitespace.
+ # For example: |< /form>| is invalid HTML and does not exist but |</form >| is
+ # considered valid HTML.
+ _RE_EVENT_PATTERN = re.compile(
+ _RE_EVENT_EXPR + ur'\s*', re.U | re.S | re.I | re.X)
+
+ # This pattern is used for finding form elements.
+ _RE_FORM_PATTERN = re.compile(
+ ur"""
+ <form # A new opening '<form' tag.
+ \b # The end of the word 'form'.
+ .*? # Any characters (non-greedy).
+ > # Ending of the (opening) tag: '>'.
+ .*? # Any characters (non-greedy) between the tags.
+ </form\s*> # The '</form>' closing tag.
+ """, re.U | re.S | re.I | re.X)
+
+ def __init__(self, input_dir=_REGISTRATION_PAGES_DIR,
+ output_dir=_EXTRACTED_FORMS_DIR, logging_level=None):
+ """Creates a FormsExtractor object.
+
+ Args:
+ input_dir: the directory of HTML files.
+ output_dir: the directory where the registration form files will be
+ saved.
+ logging_level: verbosity level, default is None.
+
+ Raises:
+ IOError exception if input directory doesn't exist.
+ """
+ if logging_level:
+ if not self.log_handlers['StreamHandler']:
+ console = logging.StreamHandler()
+ console.setLevel(logging.DEBUG)
+ self.log_handlers['StreamHandler'] = console
+ self.logger.addHandler(console)
+ self.logger.setLevel(logging_level)
+ else:
+ if self.log_handlers['StreamHandler']:
+ self.logger.removeHandler(self.log_handlers['StreamHandler'])
+ self.log_handlers['StreamHandler'] = None
+
+ self._input_dir = input_dir
+ self._output_dir = output_dir
+ if not os.path.isdir(self._input_dir):
+ error_msg = 'Directory "%s" doesn\'t exist.' % self._input_dir
+ self.logger.error('Error: %s', error_msg)
+ raise IOError(error_msg)
+ if not os.path.isdir(output_dir):
+ os.makedirs(output_dir)
+ self._form_location_comment = ''
+
+ def _SubstituteAllEvents(self, matchobj):
+ """Remove all js events that are present as attributes within a tag.
+
+ Args:
+ matchobj: A regexp |re.MatchObject| containing text that has at least one
+ event. Example: |<tr class="nav" onmouseover="mOvr1(this);"
+ onmouseout="mOut1(this);">|.
+
+ Returns:
+ The text containing the tag with all the attributes except for the tags
+ with events. Example: |<tr class="nav">|.
+ """
+ tag_with_all_attrs = matchobj.group(0)
+ return self._RE_EVENT_PATTERN.sub('', tag_with_all_attrs)
+
+ def Extract(self, strip_js_only):
+ """Extracts and saves the extracted registration forms.
+
+ Iterates through all the HTML files.
+
+ Args:
+ strip_js_only: If True, only Javascript is stripped from the HTML content.
+ Otherwise, all non-form elements are stripped.
+ """
+ pathname_pattern = os.path.join(self._input_dir, self._HTML_FILES_PATTERN)
+ html_files = [f for f in glob.glob(pathname_pattern) if os.path.isfile(f)]
+ for filename in html_files:
+ self.logger.info('Stripping file "%s" ...', filename)
+ with open(filename, 'U') as f:
+ html_content = self._RE_TAG_WITH_EVENTS_PATTERN.sub(
+ self._SubstituteAllEvents,
+ self._RE_HREF_JS_PATTERN.sub(
+ '', self._RE_SCRIPT_PATTERN.sub('', f.read())))
+
+ form_filename = os.path.split(filename)[1] # Path dropped.
+ form_filename = form_filename.replace(self._HTML_FILE_PREFIX, '', 1)
+ (form_filename, extension) = os.path.splitext(form_filename)
+ form_filename = (self._FORM_FILE_PREFIX + form_filename +
+ '%s' + extension)
+ form_filename = os.path.join(self._output_dir, form_filename)
+ if strip_js_only:
+ form_filename = form_filename % ''
+ try:
+ with open(form_filename, 'w') as f:
+ f.write(html_content)
+ except IOError as e:
+ self.logger.error('Error: %s', e)
+ continue
+ else: # Remove all non form elements.
+ match = self._RE_FORM_LOCATION_PATTERN.search(html_content)
+ if match:
+ form_location_comment = match.group() + os.linesep
+ else:
+ form_location_comment = ''
+ forms_iterator = self._RE_FORM_PATTERN.finditer(html_content)
+ for form_number, form_match in enumerate(forms_iterator, start=1):
+ form_content = form_match.group()
+ numbered_form_filename = form_filename % form_number
+ try:
+ with open(numbered_form_filename, 'w') as f:
+ f.write(form_location_comment)
+ f.write(form_content)
+ except IOError as e:
+ self.logger.error('Error: %s', e)
+ continue
+ self.logger.info('\tFile "%s" extracted SUCCESSFULLY!', filename)
+
+
+def main():
+ parser = OptionParser()
+ parser.add_option(
+ '-l', '--log_level', metavar='LOG_LEVEL', default='error',
+ help='LOG_LEVEL: debug, info, warning or error [default: %default]')
+ parser.add_option(
+ '-j', '--js', dest='js', action='store_true', default=False,
+ help='Removes all javascript elements [default: %default]')
+
+ (options, args) = parser.parse_args()
+ options.log_level = options.log_level.upper()
+ if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']:
+ print 'Wrong log_level argument.'
+ parser.print_help()
+ return 1
+
+ options.log_level = getattr(logging, options.log_level)
+ extractor = FormsExtractor(logging_level=options.log_level)
+ extractor.Extract(options.js)
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())