diff options
Diffstat (limited to 'tools/site_compare/utils/browser_iterate.py')
-rw-r--r-- | tools/site_compare/utils/browser_iterate.py | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/tools/site_compare/utils/browser_iterate.py b/tools/site_compare/utils/browser_iterate.py new file mode 100644 index 0000000..65ba24f --- /dev/null +++ b/tools/site_compare/utils/browser_iterate.py @@ -0,0 +1,225 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Utility to use a browser to visit multiple URLs. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line +import scrapers +import socket +import time + +from drivers import windowing + +# Constants +MAX_URL = 1024 +PORT = 42492 + +def SetupIterationCommandLine(cmd): + """Adds the necessary flags for iteration to a command. + + Args: + cmd: an object created by cmdline.AddCommand + """ + cmd.AddArgument( + ["-b", "--browser"], "Browser to use (ie, firefox, chrome)", + type="string", required=True) + cmd.AddArgument( + ["-b1v", "--browserver"], "Version of browser", metaname="VERSION") + cmd.AddArgument( + ["-p", "--browserpath"], "Path to browser.", + type="string", required=False) + cmd.AddArgument( + ["-u", "--url"], "URL to visit") + cmd.AddArgument( + ["-l", "--list"], "File containing list of URLs to visit", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=300) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + + +def Iterate(command, iteration_func): + """Iterates over a list of URLs, calling a function on each. + + Args: + command: the command line containing the iteration flags + iteration_func: called for each URL with (proc, wnd, url, result) + """ + + # Retrieve the browser scraper to use to invoke the browser + scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) + + def AttachToBrowser(path, timeout): + """Invoke the browser process and connect to the socket.""" + (proc, frame, wnd) = scraper.GetBrowser(path) + + if not wnd: raise ValueError("Could not invoke browser.") + + # Try to connect the socket. If it fails, wait and try + # again. Do this for ten seconds + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) + + for attempt in xrange(10): + try: + s.connect(("localhost", PORT)) + except socket.error: + time.sleep(1) + continue + break + + try: + s.getpeername() + except socket.error: + raise ValueError("Could not connect to browser") + + if command["--size"]: + # Resize and reposition the frame + windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd) + + s.settimeout(timeout) + + Iterate.proc = proc + Iterate.wnd = wnd + Iterate.s = s + + def DetachFromBrowser(): + """Close the socket and kill the process if necessary.""" + if Iterate.s: + Iterate.s.close() + Iterate.s = None + + if Iterate.proc: + if not windowing.WaitForProcessExit(Iterate.proc, 0): + try: + windowing.EndProcess(Iterate.proc) + windowing.WaitForProcessExit(Iterate.proc, 0) + except pywintypes.error: + # Exception here most likely means the process died on its own + pass + Iterate.proc = None + + if command["--browserpath"]: + browser = command["--browserpath"] + else: + browser = None + + # Read the URLs from the file + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + + url_list = [] + file = open(command["--list"], "r") + + for line in xrange(startline-1): + file.readline() + + for line in xrange(endline-startline): + url_list.append(file.readline().strip()) + + timeout = command["--timeout"] + + # Loop through the URLs and send them through the socket + Iterate.s = None + Iterate.proc = None + Iterate.wnd = None + + for url in url_list: + # Invoke the browser if necessary + if not Iterate.proc: + AttachToBrowser(browser, timeout) + # Send the URL and wait for a response + Iterate.s.send(url + "\n") + + response = "" + + while (response.find("\n") < 0): + + try: + recv = Iterate.s.recv(MAX_URL) + response = response + recv + + # Workaround for an oddity: when Firefox closes + # gracefully, somehow Python doesn't detect it. + # (Telnet does) + if not recv: + raise socket.error + + except socket.timeout: + response = url + ",hang\n" + DetachFromBrowser() + except socket.error: + # If there was a socket error, it's probably a crash + response = url + ",crash\n" + DetachFromBrowser() + + # If we received a timeout response, restart the browser + if response[-9:] == ",timeout\n": + DetachFromBrowser() + + # Invoke the iteration function + iteration_func(url, Iterate.proc, Iterate.wnd, response) + + # We're done + DetachFromBrowser() |