1 files changed, 225 insertions, 0 deletions
diff --git a/tools/site_compare/utils/browser_iterate.py b/tools/site_compare/utils/browser_iterate.py
new file mode 100644
index 0000000..65ba24f
--- /dev/null
+++ b/tools/site_compare/utils/browser_iterate.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Utility to use a browser to visit multiple URLs.
+
+Prerequisites:
+  1. The command_line package from tools/site_compare
+  2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+  1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+  2. Add a file called "measurepageloadtimeextension@google.com" to
+     the default Firefox profile directory under extensions, containing
+     the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+import scrapers
+import socket
+import time
+
+from drivers import windowing
+
+# Constants
+MAX_URL = 1024
+PORT = 42492
+
+def SetupIterationCommandLine(cmd):
+  """Adds the necessary flags for iteration to a command.
+  
+  Args:
+    cmd: an object created by cmdline.AddCommand
+  """
+  cmd.AddArgument(
+    ["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
+    type="string", required=True)
+  cmd.AddArgument(
+    ["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
+  cmd.AddArgument(
+    ["-p", "--browserpath"], "Path to browser.",
+    type="string", required=False)
+  cmd.AddArgument(
+    ["-u", "--url"], "URL to visit")
+  cmd.AddArgument(
+    ["-l", "--list"], "File containing list of URLs to visit", type="readfile")
+  cmd.AddMutualExclusion(["--url", "--list"])
+  cmd.AddArgument(
+    ["-s", "--startline"], "First line of URL list", type="int")
+  cmd.AddArgument(
+    ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+  cmd.AddArgument(
+    ["-c", "--count"], "Number of lines of URL file to use", type="int")
+  cmd.AddDependency("--startline", "--list")
+  cmd.AddRequiredGroup(["--url", "--list"])
+  cmd.AddDependency("--endline", "--list")
+  cmd.AddDependency("--count", "--list")
+  cmd.AddMutualExclusion(["--count", "--endline"])
+  cmd.AddDependency("--count", "--startline")
+  cmd.AddArgument(
+    ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+    "finish loading",
+    type="int", default=300)
+  cmd.AddArgument(
+    ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+
+
+def Iterate(command, iteration_func):
+  """Iterates over a list of URLs, calling a function on each.
+  
+  Args:
+    command: the command line containing the iteration flags
+    iteration_func: called for each URL with (proc, wnd, url, result)
+  """
+  
+  # Retrieve the browser scraper to use to invoke the browser
+  scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
+        
+  def AttachToBrowser(path, timeout):
+    """Invoke the browser process and connect to the socket."""
+    (proc, frame, wnd) = scraper.GetBrowser(path)
+
+    if not wnd: raise ValueError("Could not invoke browser.")
+
+    # Try to connect the socket. If it fails, wait and try
+    # again. Do this for ten seconds
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
+
+    for attempt in xrange(10):
+      try:
+        s.connect(("localhost", PORT))
+      except socket.error:
+        time.sleep(1)
+        continue
+      break
+
+    try:
+      s.getpeername()
+    except socket.error:
+      raise ValueError("Could not connect to browser")
+
+    if command["--size"]:
+      # Resize and reposition the frame
+      windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
+      
+    s.settimeout(timeout)
+
+    Iterate.proc = proc
+    Iterate.wnd = wnd
+    Iterate.s = s
+
+  def DetachFromBrowser():
+    """Close the socket and kill the process if necessary."""
+    if Iterate.s:
+      Iterate.s.close()
+      Iterate.s = None
+
+    if Iterate.proc:
+      if not windowing.WaitForProcessExit(Iterate.proc, 0):
+        try:
+          windowing.EndProcess(Iterate.proc)
+          windowing.WaitForProcessExit(Iterate.proc, 0)
+        except pywintypes.error:
+          # Exception here most likely means the process died on its own
+          pass
+      Iterate.proc = None
+
+  if command["--browserpath"]:
+    browser = command["--browserpath"]
+  else:
+    browser = None
+    
+  # Read the URLs from the file
+  if command["--url"]:
+    url_list = [command["--url"]]
+  else:
+    startline = command["--startline"]
+    if command["--count"]:
+      endline = startline+command["--count"]
+    else:
+      endline = command["--endline"]
+
+    url_list = []
+    file = open(command["--list"], "r")
+
+    for line in xrange(startline-1):
+      file.readline()
+
+    for line in xrange(endline-startline):
+      url_list.append(file.readline().strip())
+
+  timeout = command["--timeout"]
+
+  # Loop through the URLs and send them through the socket
+  Iterate.s    = None
+  Iterate.proc = None
+  Iterate.wnd  = None
+
+  for url in url_list:
+    # Invoke the browser if necessary
+    if not Iterate.proc:
+      AttachToBrowser(browser, timeout)
+    # Send the URL and wait for a response
+    Iterate.s.send(url + "\n")
+
+    response = ""
+
+    while (response.find("\n") < 0):
+
+      try:
+        recv = Iterate.s.recv(MAX_URL)
+        response = response + recv
+        
+        # Workaround for an oddity: when Firefox closes
+        # gracefully, somehow Python doesn't detect it.
+        # (Telnet does)
+        if not recv: 
+          raise socket.error
+        
+      except socket.timeout:
+        response = url + ",hang\n"
+        DetachFromBrowser()
+      except socket.error:
+        # If there was a socket error, it's probably a crash
+        response = url + ",crash\n"
+        DetachFromBrowser()
+
+      # If we received a timeout response, restart the browser
+      if response[-9:] == ",timeout\n":
+        DetachFromBrowser()
+        
+      # Invoke the iteration function
+      iteration_func(url, Iterate.proc, Iterate.wnd, response)
+
+  # We're done  
+  DetachFromBrowser()