summaryrefslogtreecommitdiffstats
path: root/tools/site_compare/commands/scrape.py
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-27 00:12:16 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-27 00:12:16 +0000
commit920c091ac3ee15079194c82ae8a7a18215f3f23c (patch)
treed28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/site_compare/commands/scrape.py
parentae2c20f398933a9e86c387dcc465ec0f71065ffc (diff)
downloadchromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2
Add tools to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/site_compare/commands/scrape.py')
-rw-r--r--tools/site_compare/commands/scrape.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/tools/site_compare/commands/scrape.py b/tools/site_compare/commands/scrape.py
new file mode 100644
index 0000000..a9b3398
--- /dev/null
+++ b/tools/site_compare/commands/scrape.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Command for scraping images from a URL or list of URLs.
+
+Prerequisites:
+ 1. The command_line package from tools/site_compare
+ 2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+ 1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+ 2. Add a file called "measurepageloadtimeextension@google.com" to
+ the default Firefox profile directory under extensions, containing
+ the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+
+from drivers import windowing
+from utils import browser_iterate
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["scrape"],
+ "Scrapes an image from a URL or series of URLs.",
+ None,
+ ExecuteScrape)
+
+ browser_iterate.SetupIterationCommandLine(cmd)
+ cmd.AddArgument(
+ ["-log", "--logfile"], "File to write text output", type="string")
+ cmd.AddArgument(
+ ["-out", "--outdir"], "Directory to store scrapes", type="string", required=True)
+
+
+def ExecuteScrape(command):
+ """Executes the Scrape command."""
+
+ def ScrapeResult(url, proc, wnd, result):
+ """Capture and save the scrape."""
+ if log_file: log_file.write(result)
+
+ # Scrape the page
+ image = windowing.ScrapeWindow(wnd)
+ filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
+ image.save(filename)
+
+ if command["--logfile"]: log_file = open(command["--logfile"], "w")
+ else: log_file = None
+
+ browser_iterate.Iterate(command, ScrapeResult)
+
+ # Close the log file and return. We're done.
+ if log_file: log_file.close()