summaryrefslogtreecommitdiffstats
path: root/tools/site_compare/utils/browser_iterate.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/site_compare/utils/browser_iterate.py')
-rw-r--r--tools/site_compare/utils/browser_iterate.py225
1 files changed, 225 insertions, 0 deletions
diff --git a/tools/site_compare/utils/browser_iterate.py b/tools/site_compare/utils/browser_iterate.py
new file mode 100644
index 0000000..65ba24f
--- /dev/null
+++ b/tools/site_compare/utils/browser_iterate.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Utility to use a browser to visit multiple URLs.
+
+Prerequisites:
+ 1. The command_line package from tools/site_compare
+ 2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+ 1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+ 2. Add a file called "measurepageloadtimeextension@google.com" to
+ the default Firefox profile directory under extensions, containing
+ the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+import scrapers
+import socket
+import time
+
+from drivers import windowing
+
+# Constants
+MAX_URL = 1024
+PORT = 42492
+
+def SetupIterationCommandLine(cmd):
+ """Adds the necessary flags for iteration to a command.
+
+ Args:
+ cmd: an object created by cmdline.AddCommand
+ """
+ cmd.AddArgument(
+ ["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
+ type="string", required=True)
+ cmd.AddArgument(
+ ["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
+ cmd.AddArgument(
+ ["-p", "--browserpath"], "Path to browser.",
+ type="string", required=False)
+ cmd.AddArgument(
+ ["-u", "--url"], "URL to visit")
+ cmd.AddArgument(
+ ["-l", "--list"], "File containing list of URLs to visit", type="readfile")
+ cmd.AddMutualExclusion(["--url", "--list"])
+ cmd.AddArgument(
+ ["-s", "--startline"], "First line of URL list", type="int")
+ cmd.AddArgument(
+ ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+ cmd.AddArgument(
+ ["-c", "--count"], "Number of lines of URL file to use", type="int")
+ cmd.AddDependency("--startline", "--list")
+ cmd.AddRequiredGroup(["--url", "--list"])
+ cmd.AddDependency("--endline", "--list")
+ cmd.AddDependency("--count", "--list")
+ cmd.AddMutualExclusion(["--count", "--endline"])
+ cmd.AddDependency("--count", "--startline")
+ cmd.AddArgument(
+ ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+ "finish loading",
+ type="int", default=300)
+ cmd.AddArgument(
+ ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+
+
+def Iterate(command, iteration_func):
+ """Iterates over a list of URLs, calling a function on each.
+
+ Args:
+ command: the command line containing the iteration flags
+ iteration_func: called for each URL with (proc, wnd, url, result)
+ """
+
+ # Retrieve the browser scraper to use to invoke the browser
+ scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
+
+ def AttachToBrowser(path, timeout):
+ """Invoke the browser process and connect to the socket."""
+ (proc, frame, wnd) = scraper.GetBrowser(path)
+
+ if not wnd: raise ValueError("Could not invoke browser.")
+
+ # Try to connect the socket. If it fails, wait and try
+ # again. Do this for ten seconds
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
+
+ for attempt in xrange(10):
+ try:
+ s.connect(("localhost", PORT))
+ except socket.error:
+ time.sleep(1)
+ continue
+ break
+
+ try:
+ s.getpeername()
+ except socket.error:
+ raise ValueError("Could not connect to browser")
+
+ if command["--size"]:
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
+
+ s.settimeout(timeout)
+
+ Iterate.proc = proc
+ Iterate.wnd = wnd
+ Iterate.s = s
+
+ def DetachFromBrowser():
+ """Close the socket and kill the process if necessary."""
+ if Iterate.s:
+ Iterate.s.close()
+ Iterate.s = None
+
+ if Iterate.proc:
+ if not windowing.WaitForProcessExit(Iterate.proc, 0):
+ try:
+ windowing.EndProcess(Iterate.proc)
+ windowing.WaitForProcessExit(Iterate.proc, 0)
+ except pywintypes.error:
+ # Exception here most likely means the process died on its own
+ pass
+ Iterate.proc = None
+
+ if command["--browserpath"]:
+ browser = command["--browserpath"]
+ else:
+ browser = None
+
+ # Read the URLs from the file
+ if command["--url"]:
+ url_list = [command["--url"]]
+ else:
+ startline = command["--startline"]
+ if command["--count"]:
+ endline = startline+command["--count"]
+ else:
+ endline = command["--endline"]
+
+ url_list = []
+ file = open(command["--list"], "r")
+
+ for line in xrange(startline-1):
+ file.readline()
+
+ for line in xrange(endline-startline):
+ url_list.append(file.readline().strip())
+
+ timeout = command["--timeout"]
+
+ # Loop through the URLs and send them through the socket
+ Iterate.s = None
+ Iterate.proc = None
+ Iterate.wnd = None
+
+ for url in url_list:
+ # Invoke the browser if necessary
+ if not Iterate.proc:
+ AttachToBrowser(browser, timeout)
+ # Send the URL and wait for a response
+ Iterate.s.send(url + "\n")
+
+ response = ""
+
+ while (response.find("\n") < 0):
+
+ try:
+ recv = Iterate.s.recv(MAX_URL)
+ response = response + recv
+
+ # Workaround for an oddity: when Firefox closes
+ # gracefully, somehow Python doesn't detect it.
+ # (Telnet does)
+ if not recv:
+ raise socket.error
+
+ except socket.timeout:
+ response = url + ",hang\n"
+ DetachFromBrowser()
+ except socket.error:
+ # If there was a socket error, it's probably a crash
+ response = url + ",crash\n"
+ DetachFromBrowser()
+
+ # If we received a timeout response, restart the browser
+ if response[-9:] == ",timeout\n":
+ DetachFromBrowser()
+
+ # Invoke the iteration function
+ iteration_func(url, Iterate.proc, Iterate.wnd, response)
+
+ # We're done
+ DetachFromBrowser()