summaryrefslogtreecommitdiffstats
path: root/tools/site_compare
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-27 00:12:16 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-27 00:12:16 +0000
commit920c091ac3ee15079194c82ae8a7a18215f3f23c (patch)
treed28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/site_compare
parentae2c20f398933a9e86c387dcc465ec0f71065ffc (diff)
downloadchromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz
chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2
Add tools to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/site_compare')
-rw-r--r--tools/site_compare/command_line.py823
-rw-r--r--tools/site_compare/commands/__init__.py2
-rw-r--r--tools/site_compare/commands/compare2.py196
-rw-r--r--tools/site_compare/commands/maskmaker.py298
-rw-r--r--tools/site_compare/commands/measure.py78
-rw-r--r--tools/site_compare/commands/scrape.py85
-rw-r--r--tools/site_compare/commands/timeload.py170
-rw-r--r--tools/site_compare/drivers/__init__.py15
-rw-r--r--tools/site_compare/drivers/win32/__init__.py0
-rw-r--r--tools/site_compare/drivers/win32/keyboard.py223
-rw-r--r--tools/site_compare/drivers/win32/mouse.py243
-rw-r--r--tools/site_compare/drivers/win32/windowing.py386
-rw-r--r--tools/site_compare/operators/__init__.py26
-rw-r--r--tools/site_compare/operators/equals.py66
-rw-r--r--tools/site_compare/operators/equals_with_mask.py86
-rw-r--r--tools/site_compare/scrapers/__init__.py34
-rw-r--r--tools/site_compare/scrapers/chrome/__init__.py38
-rw-r--r--tools/site_compare/scrapers/chrome/chrome011010.py68
-rw-r--r--tools/site_compare/scrapers/chrome/chrome01970.py69
-rw-r--r--tools/site_compare/scrapers/chrome/chromebase.py217
-rw-r--r--tools/site_compare/scrapers/firefox/__init__.py31
-rw-r--r--tools/site_compare/scrapers/firefox/firefox2.py269
-rw-r--r--tools/site_compare/scrapers/ie/__init__.py31
-rw-r--r--tools/site_compare/scrapers/ie/ie7.py230
-rw-r--r--tools/site_compare/site_compare.py202
-rw-r--r--tools/site_compare/utils/__init__.py7
-rw-r--r--tools/site_compare/utils/browser_iterate.py225
27 files changed, 4118 insertions, 0 deletions
diff --git a/tools/site_compare/command_line.py b/tools/site_compare/command_line.py
new file mode 100644
index 0000000..b99a1c9
--- /dev/null
+++ b/tools/site_compare/command_line.py
@@ -0,0 +1,823 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Parse a command line, retrieving a command and its arguments.
+
+Supports the concept of command line commands, each with its own set
+of arguments. Supports dependent arguments and mutually exclusive arguments.
+Basically, a better optparse. I took heed of epg's WHINE() in gvn.cmdline
+and dumped optparse in favor of something better.
+"""
+
+import os.path
+import re
+import string
+import sys
+import textwrap
+import types
+
+
+def IsString(var):
+ """Little helper function to see if a variable is a string."""
+ return type(var) in types.StringTypes
+
+
+class ParseError(Exception):
+ """Encapsulates errors from parsing, string arg is description."""
+ pass
+
+
+class Command(object):
+ """Implements a single command."""
+
+ def __init__(self, names, helptext, validator=None, impl=None):
+ """Initializes Command from names and helptext, plus optional callables.
+
+ Args:
+ names: command name, or list of synonyms
+ helptext: brief string description of the command
+ validator: callable for custom argument validation
+ Should raise ParseError if it wants
+ impl: callable to be invoked when command is called
+ """
+ self.names = names
+ self.validator = validator
+ self.helptext = helptext
+ self.impl = impl
+ self.args = []
+ self.required_groups = []
+ self.arg_dict = {}
+ self.positional_args = []
+ self.cmdline = None
+
+ class Argument(object):
+ """Encapsulates an argument to a command."""
+ VALID_TYPES = ['string', 'readfile', 'int', 'flag', 'coords']
+ TYPES_WITH_VALUES = ['string', 'readfile', 'int', 'coords']
+
+ def __init__(self, names, helptext, type, metaname,
+ required, default, positional):
+ """Command-line argument to a command.
+
+ Args:
+ names: argument name, or list of synonyms
+ helptext: brief description of the argument
+ type: type of the argument. Valid values include:
+ string - a string
+ readfile - a file which must exist and be available
+ for reading
+ int - an integer
+ flag - an optional flag (bool)
+ coords - (x,y) where x and y are ints
+ metaname: Name to display for value in help, inferred if not
+ specified
+ required: True if argument must be specified
+ default: Default value if not specified
+ positional: Argument specified by location, not name
+
+ Raises:
+ ValueError: the argument name is invalid for some reason
+ """
+ if type not in Command.Argument.VALID_TYPES:
+ raise ValueError("Invalid type: %r" % type)
+
+ if required and default is not None:
+ raise ValueError("required and default are mutually exclusive")
+
+ if required and type == 'flag':
+ raise ValueError("A required flag? Give me a break.")
+
+ if metaname and type not in Command.Argument.TYPES_WITH_VALUES:
+ raise ValueError("Type %r can't have a metaname" % type)
+
+ # If no metaname is provided, infer it: use the alphabetical characters
+ # of the last provided name
+ if not metaname and type in Command.Argument.TYPES_WITH_VALUES:
+ metaname = (
+ names[-1].lstrip(string.punctuation + string.whitespace).upper())
+
+ self.names = names
+ self.helptext = helptext
+ self.type = type
+ self.required = required
+ self.default = default
+ self.positional = positional
+ self.metaname = metaname
+
+ self.mutex = [] # arguments that are mutually exclusive with
+ # this one
+ self.depends = [] # arguments that must be present for this
+ # one to be valid
+ self.present = False # has this argument been specified?
+
+ def AddDependency(self, arg):
+ """Makes this argument dependent on another argument.
+
+ Args:
+ arg: name of the argument this one depends on
+ """
+ if arg not in self.depends:
+ self.depends.append(arg)
+
+ def AddMutualExclusion(self, arg):
+ """Makes this argument invalid if another is specified.
+
+ Args:
+ arg: name of the mutually exclusive argument.
+ """
+ if arg not in self.mutex:
+ self.mutex.append(arg)
+
+ def GetUsageString(self):
+ """Returns a brief string describing the argument's usage."""
+ if not self.positional:
+ string = self.names[0]
+ if self.type in Command.Argument.TYPES_WITH_VALUES:
+ string += "="+self.metaname
+ else:
+ string = self.metaname
+
+ if not self.required:
+ string = "["+string+"]"
+
+ return string
+
+ def GetNames(self):
+ """Returns a string containing a list of the arg's names."""
+ if self.positional:
+ return self.metaname
+ else:
+ return ", ".join(self.names)
+
+ def GetHelpString(self, width=80, indent=5, names_width=20, gutter=2):
+ """Returns a help string including help for all the arguments."""
+ names = [" "*indent + line +" "*(names_width-len(line)) for line in
+ textwrap.wrap(self.GetNames(), names_width)]
+
+ helpstring = textwrap.wrap(self.helptext, width-indent-names_width-gutter)
+
+ if len(names) < len(helpstring):
+ names += [" "*(indent+names_width)]*(len(helpstring)-len(names))
+
+ if len(helpstring) < len(names):
+ helpstring += [""]*(len(names)-len(helpstring))
+
+ return "\n".join([name_line + " "*gutter + help_line for
+ name_line, help_line in zip(names, helpstring)])
+
+ def __repr__(self):
+ if self.present:
+ string = '= %r' % self.value
+ else:
+ string = "(absent)"
+
+ return "Argument %s '%s'%s" % (self.type, self.names[0], string)
+
+ # end of nested class Argument
+
+ def AddArgument(self, names, helptext, type="string", metaname=None,
+ required=False, default=None, positional=False):
+ """Command-line argument to a command.
+
+ Args:
+ names: argument name, or list of synonyms
+ helptext: brief description of the argument
+ type: type of the argument
+ metaname: Name to display for value in help, inferred if not
+ required: True if argument must be specified
+ default: Default value if not specified
+ positional: Argument specified by location, not name
+
+ Raises:
+ ValueError: the argument already exists or is invalid
+
+ Returns:
+ The newly-created argument
+ """
+ if IsString(names): names = [names]
+
+ names = [name.lower() for name in names]
+
+ for name in names:
+ if name in self.arg_dict:
+ raise ValueError("%s is already an argument"%name)
+
+ if (positional and required and
+ [arg for arg in self.args if arg.positional] and
+ not [arg for arg in self.args if arg.positional][-1].required):
+ raise ValueError(
+ "A required positional argument may not follow an optional one.")
+
+ arg = Command.Argument(names, helptext, type, metaname,
+ required, default, positional)
+
+ self.args.append(arg)
+
+ for name in names:
+ self.arg_dict[name] = arg
+
+ return arg
+
+ def GetArgument(self, name):
+ """Return an argument from a name."""
+ return self.arg_dict[name.lower()]
+
+ def AddMutualExclusion(self, args):
+ """Specifies that a list of arguments are mutually exclusive."""
+ if len(args) < 2:
+ raise ValueError("At least two arguments must be specified.")
+
+ args = [arg.lower() for arg in args]
+
+ for index in xrange(len(args)-1):
+ for index2 in xrange(index+1, len(args)):
+ self.arg_dict[args[index]].AddMutualExclusion(self.arg_dict[args[index2]])
+
+ def AddDependency(self, dependent, depends_on):
+ """Specifies that one argument may only be present if another is.
+
+ Args:
+ dependent: the name of the dependent argument
+ depends_on: the name of the argument on which it depends
+ """
+ self.arg_dict[dependent.lower()].AddDependency(
+ self.arg_dict[depends_on.lower()])
+
+ def AddMutualDependency(self, args):
+ """Specifies that a list of arguments are all mutually dependent."""
+ if len(args) < 2:
+ raise ValueError("At least two arguments must be specified.")
+
+ args = [arg.lower() for arg in args]
+
+ for (arg1, arg2) in [(arg1, arg2) for arg1 in args for arg2 in args]:
+ if arg1 == arg2: continue
+ self.arg_dict[arg1].AddDependency(self.arg_dict[arg2])
+
+ def AddRequiredGroup(self, args):
+ """Specifies that at least one of the named arguments must be present."""
+ if len(args) < 2:
+ raise ValueError("At least two arguments must be in a required group.")
+
+ args = [self.arg_dict[arg.lower()] for arg in args]
+
+ self.required_groups.append(args)
+
+ def ParseArguments(self):
+ """Given a command line, parse and validate the arguments."""
+
+ # reset all the arguments before we parse
+ for arg in self.args:
+ arg.present = False
+ arg.value = None
+
+ self.parse_errors = []
+
+ # look for arguments remaining on the command line
+ while len(self.cmdline.rargs):
+ try:
+ self.ParseNextArgument()
+ except ParseError, e:
+ self.parse_errors.append(e.args[0])
+
+ # after all the arguments are parsed, check for problems
+ for arg in self.args:
+ if not arg.present and arg.required:
+ self.parse_errors.append("'%s': required parameter was missing"
+ % arg.names[0])
+
+ if not arg.present and arg.default:
+ arg.present = True
+ arg.value = arg.default
+
+ if arg.present:
+ for mutex in arg.mutex:
+ if mutex.present:
+ self.parse_errors.append(
+ "'%s', '%s': arguments are mutually exclusive" %
+ (arg.argstr, mutex.argstr))
+
+ for depend in arg.depends:
+ if not depend.present:
+ self.parse_errors.append("'%s': '%s' must be specified as well" %
+ (arg.argstr, depend.names[0]))
+
+ # check for required groups
+ for group in self.required_groups:
+ if not [arg for arg in group if arg.present]:
+ self.parse_errors.append("%s: at least one must be present" %
+ (", ".join(["'%s'" % arg.names[-1] for arg in group])))
+
+ # if we have any validators, invoke them
+ if not self.parse_errors and self.validator:
+ try:
+ self.validator(self)
+ except ParseError, e:
+ self.parse_errors.append(e.args[0])
+
+ # Helper methods so you can treat the command like a dict
+ def __getitem__(self, key):
+ arg = self.arg_dict[key.lower()]
+
+ if arg.type == 'flag':
+ return arg.present
+ else:
+ return arg.value
+
+ def __iter__(self):
+ return [arg for arg in self.args if arg.present].__iter__()
+
+ def ArgumentPresent(self, key):
+ """Tests if an argument exists and has been specified."""
+ return key.lower() in self.arg_dict and self.arg_dict[key.lower()].present
+
+ def __contains__(self, key):
+ return self.ArgumentPresent(key)
+
+ def ParseNextArgument(self):
+ """Find the next argument in the command line and parse it."""
+ arg = None
+ value = None
+ argstr = self.cmdline.rargs.pop(0)
+
+ # First check: is this a literal argument?
+ if argstr.lower() in self.arg_dict:
+ arg = self.arg_dict[argstr.lower()]
+ if arg.type in Command.Argument.TYPES_WITH_VALUES:
+ if len(self.cmdline.rargs):
+ value = self.cmdline.rargs.pop(0)
+
+ # Second check: is this of the form "arg=val" or "arg:val"?
+ if arg is None:
+ delimiter_pos = -1
+
+ for delimiter in [':', '=']:
+ pos = argstr.find(delimiter)
+ if pos >= 0:
+ if delimiter_pos < 0 or pos < delimiter_pos:
+ delimiter_pos = pos
+
+ if delimiter_pos >= 0:
+ testarg = argstr[:delimiter_pos]
+ testval = argstr[delimiter_pos+1:]
+
+ if testarg.lower() in self.arg_dict:
+ arg = self.arg_dict[testarg.lower()]
+ argstr = testarg
+ value = testval
+
+ # Third check: does this begin an argument?
+ if arg is None:
+ for key in self.arg_dict.iterkeys():
+ if (len(key) < len(argstr) and
+ self.arg_dict[key].type in Command.Argument.TYPES_WITH_VALUES and
+ argstr[:len(key)].lower() == key):
+ value = argstr[len(key):]
+ argstr = argstr[:len(key)]
+ arg = self.arg_dict[argstr]
+
+ # Fourth check: do we have any positional arguments available?
+ if arg is None:
+ for positional_arg in [
+ testarg for testarg in self.args if testarg.positional]:
+ if not positional_arg.present:
+ arg = positional_arg
+ value = argstr
+ argstr = positional_arg.names[0]
+ break
+
+ # Push the retrieved argument/value onto the largs stack
+ if argstr: self.cmdline.largs.append(argstr)
+ if value: self.cmdline.largs.append(value)
+
+ # If we've made it this far and haven't found an arg, give up
+ if arg is None:
+ raise ParseError("Unknown argument: '%s'" % argstr)
+
+ # Convert the value, if necessary
+ if arg.type in Command.Argument.TYPES_WITH_VALUES and value is None:
+ raise ParseError("Argument '%s' requires a value" % argstr)
+
+ if value is not None:
+ value = self.StringToValue(value, arg.type, argstr)
+
+ arg.argstr = argstr
+ arg.value = value
+ arg.present = True
+
+ # end method ParseNextArgument
+
+ def StringToValue(self, value, type, argstr):
+ """Convert a string from the command line to a value type."""
+ try:
+ if type == 'string':
+ pass # leave it be
+
+ elif type == 'int':
+ try:
+ value = int(value)
+ except ValueError:
+ raise ParseError
+
+ elif type == 'readfile':
+ if not os.path.isfile(value):
+ raise ParseError("'%s': '%s' does not exist" % (argstr, value))
+
+ elif type == 'coords':
+ try:
+ value = [int(val) for val in
+ re.match("\(\s*(\d+)\s*\,\s*(\d+)\s*\)\s*\Z", value).
+ groups()]
+ except AttributeError:
+ raise ParseError
+
+ else:
+ raise ValueError("Unknown type: '%s'" % type)
+
+ except ParseError, e:
+ # The bare exception is raised in the generic case; more specific errors
+ # will arrive with arguments and should just be reraised
+ if not e.args:
+ e = ParseError("'%s': unable to convert '%s' to type '%s'" %
+ (argstr, value, type))
+ raise e
+
+ return value
+
+ def SortArgs(self):
+ """Returns a method that can be passed to sort() to sort arguments."""
+
+ def ArgSorter(arg1, arg2):
+ """Helper for sorting arguments in the usage string.
+
+ Positional arguments come first, then required arguments,
+ then optional arguments. Pylint demands this trivial function
+ have both Args: and Returns: sections, sigh.
+
+ Args:
+ arg1: the first argument to compare
+ arg2: the second argument to compare
+
+ Returns:
+ -1 if arg1 should be sorted first, +1 if it should be sorted second,
+ and 0 if arg1 and arg2 have the same sort level.
+ """
+ return ((arg2.positional-arg1.positional)*2 +
+ (arg2.required-arg1.required))
+ return ArgSorter
+
+ def GetUsageString(self, width=80, name=None):
+ """Gets a string describing how the command is used."""
+ if name is None: name = self.names[0]
+
+ initial_indent = "Usage: %s %s " % (self.cmdline.prog, name)
+ subsequent_indent = " " * len(initial_indent)
+
+ sorted_args = self.args[:]
+ sorted_args.sort(self.SortArgs())
+
+ return textwrap.fill(
+ " ".join([arg.GetUsageString() for arg in sorted_args]), width,
+ initial_indent=initial_indent,
+ subsequent_indent=subsequent_indent)
+
+ def GetHelpString(self, width=80):
+ """Returns a list of help strings for all this command's arguments."""
+ sorted_args = self.args[:]
+ sorted_args.sort(self.SortArgs())
+
+ return "\n".join([arg.GetHelpString(width) for arg in sorted_args])
+
+ # end class Command
+
+
+class CommandLine(object):
+ """Parse a command line, extracting a command and its arguments."""
+
+ def __init__(self):
+ self.commands = []
+ self.cmd_dict = {}
+
+ # Add the help command to the parser
+ help_cmd = self.AddCommand(["help", "--help", "-?", "-h"],
+ "Displays help text for a command",
+ ValidateHelpCommand,
+ DoHelpCommand)
+
+ help_cmd.AddArgument(
+ "command", "Command to retrieve help for", positional=True)
+ help_cmd.AddArgument(
+ "--width", "Width of the output", type='int', default=80)
+
+ self.Exit = sys.exit # override this if you don't want the script to halt
+ # on error or on display of help
+
+ self.out = sys.stdout # override these if you want to redirect
+ self.err = sys.stderr # output or error messages
+
+ def AddCommand(self, names, helptext, validator=None, impl=None):
+ """Add a new command to the parser.
+
+ Args:
+ names: command name, or list of synonyms
+ helptext: brief string description of the command
+ validator: method to validate a command's arguments
+ impl: callable to be invoked when command is called
+
+ Raises:
+ ValueError: raised if command already added
+
+ Returns:
+ The new command
+ """
+ if IsString(names): names = [names]
+
+ for name in names:
+ if name in self.cmd_dict:
+ raise ValueError("%s is already a command"%name)
+
+ cmd = Command(names, helptext, validator, impl)
+ cmd.cmdline = self
+
+ self.commands.append(cmd)
+ for name in names:
+ self.cmd_dict[name.lower()] = cmd
+
+ return cmd
+
+ def GetUsageString(self):
+ """Returns simple usage instructions."""
+ return "Type '%s help' for usage." % self.prog
+
+ def ParseCommandLine(self, argv=None, prog=None, execute=True):
+ """Does the work of parsing a command line.
+
+ Args:
+ argv: list of arguments, defaults to sys.args[1:]
+ prog: name of the command, defaults to the base name of the script
+ execute: if false, just parse, don't invoke the 'impl' member
+
+ Returns:
+ The command that was executed
+ """
+ if argv is None: argv = sys.argv[1:]
+ if prog is None: prog = os.path.basename(sys.argv[0]).split('.')[0]
+
+ # Store off our parameters, we may need them someday
+ self.argv = argv
+ self.prog = prog
+
+ # We shouldn't be invoked without arguments, that's just lame
+ if not len(argv):
+ self.out.writelines(self.GetUsageString())
+ self.Exit()
+ return None # in case the client overrides Exit
+
+ # Is it a valid command?
+ self.command_string = argv[0].lower()
+ if not self.command_string in self.cmd_dict:
+ self.err.write("Unknown command: '%s'\n\n" % self.command_string)
+ self.out.write(self.GetUsageString())
+ self.Exit()
+ return None # in case the client overrides Exit
+
+ self.command = self.cmd_dict[self.command_string]
+
+ # "rargs" = remaining (unparsed) arguments
+ # "largs" = already parsed, "left" of the read head
+ self.rargs = argv[1:]
+ self.largs = []
+
+ # let the command object do the parsing
+ self.command.ParseArguments()
+
+ if self.command.parse_errors:
+ # there were errors, output the usage string and exit
+ self.err.write(self.command.GetUsageString()+"\n\n")
+ self.err.write("\n".join(self.command.parse_errors))
+ self.err.write("\n\n")
+
+ self.Exit()
+
+ elif execute and self.command.impl:
+ self.command.impl(self.command)
+
+ return self.command
+
+ def __getitem__(self, key):
+ return self.cmd_dict[key]
+
+ def __iter__(self):
+ return self.cmd_dict.__iter__()
+
+
+def ValidateHelpCommand(command):
+ """Checks to make sure an argument to 'help' is a valid command."""
+ if 'command' in command and command['command'] not in command.cmdline:
+ raise ParseError("'%s': unknown command" % command['command'])
+
+
+def DoHelpCommand(command):
+ """Executed when the command is 'help'."""
+ out = command.cmdline.out
+ width = command['--width']
+
+ if 'command' not in command:
+ out.write(command.GetUsageString())
+ out.write("\n\n")
+
+ indent = 5
+ gutter = 2
+
+ command_width = (
+ max([len(cmd.names[0]) for cmd in command.cmdline.commands]) + gutter)
+
+ for cmd in command.cmdline.commands:
+ cmd_name = cmd.names[0]
+
+ initial_indent = (" "*indent + cmd_name + " "*
+ (command_width+gutter-len(cmd_name)))
+ subsequent_indent = " "*(indent+command_width+gutter)
+
+ out.write(textwrap.fill(cmd.helptext, width,
+ initial_indent=initial_indent,
+ subsequent_indent=subsequent_indent))
+ out.write("\n")
+
+ out.write("\n")
+
+ else:
+ help_cmd = command.cmdline[command['command']]
+
+ out.write(textwrap.fill(help_cmd.helptext, width))
+ out.write("\n\n")
+ out.write(help_cmd.GetUsageString(width=width))
+ out.write("\n\n")
+ out.write(help_cmd.GetHelpString(width=width))
+ out.write("\n")
+
+ command.cmdline.Exit()
+
+if __name__ == "__main__":
+ # If we're invoked rather than imported, run some tests
+ cmdline = CommandLine()
+
+ # Since we're testing, override Exit()
+ def TestExit():
+ pass
+ cmdline.Exit = TestExit
+
+ # Actually, while we're at it, let's override error output too
+ cmdline.err = open(os.path.devnull, "w")
+
+ test = cmdline.AddCommand(["test", "testa", "testb"], "test command")
+ test.AddArgument(["-i", "--int", "--integer", "--optint", "--optionalint"],
+ "optional integer parameter", type='int')
+ test.AddArgument("--reqint", "required integer parameter", type='int',
+ required=True)
+ test.AddArgument("pos1", "required positional argument", positional=True,
+ required=True)
+ test.AddArgument("pos2", "optional positional argument", positional=True)
+ test.AddArgument("pos3", "another optional positional arg",
+ positional=True)
+
+ # mutually dependent arguments
+ test.AddArgument("--mutdep1", "mutually dependent parameter 1")
+ test.AddArgument("--mutdep2", "mutually dependent parameter 2")
+ test.AddArgument("--mutdep3", "mutually dependent parameter 3")
+ test.AddMutualDependency(["--mutdep1", "--mutdep2", "--mutdep3"])
+
+ # mutually exclusive arguments
+ test.AddArgument("--mutex1", "mutually exclusive parameter 1")
+ test.AddArgument("--mutex2", "mutually exclusive parameter 2")
+ test.AddArgument("--mutex3", "mutually exclusive parameter 3")
+ test.AddMutualExclusion(["--mutex1", "--mutex2", "--mutex3"])
+
+ # dependent argument
+ test.AddArgument("--dependent", "dependent argument")
+ test.AddDependency("--dependent", "--int")
+
+ # other argument types
+ test.AddArgument("--file", "filename argument", type='readfile')
+ test.AddArgument("--coords", "coordinate argument", type='coords')
+ test.AddArgument("--flag", "flag argument", type='flag')
+
+ test.AddArgument("--req1", "part of a required group", type='flag')
+ test.AddArgument("--req2", "part 2 of a required group", type='flag')
+
+ test.AddRequiredGroup(["--req1", "--req2"])
+
+ # a few failure cases
+ exception_cases = """
+ test.AddArgument("failpos", "can't have req'd pos arg after opt",
+ positional=True, required=True)
++++
+ test.AddArgument("--int", "this argument already exists")
++++
+ test.AddDependency("--int", "--doesntexist")
++++
+ test.AddMutualDependency(["--doesntexist", "--mutdep2"])
++++
+ test.AddMutualExclusion(["--doesntexist", "--mutex2"])
++++
+ test.AddArgument("--reqflag", "required flag", required=True, type='flag')
++++
+ test.AddRequiredGroup(["--req1", "--doesntexist"])
+"""
+ for exception_case in exception_cases.split("+++"):
+ try:
+ exception_case = exception_case.strip()
+ exec exception_case # yes, I'm using exec, it's just for a test.
+ except ValueError:
+ # this is expected
+ pass
+ except KeyError:
+ # ...and so is this
+ pass
+ else:
+ print ("FAILURE: expected an exception for '%s'"
+ " and didn't get it" % exception_case)
+
+ # Let's do some parsing! first, the minimal success line:
+ MIN = "test --reqint 123 param1 --req1 "
+
+ # tuples of (command line, expected error count)
+ test_lines = [
+ ("test --int 3 foo --req1", 1), # missing required named parameter
+ ("test --reqint 3 --req1", 1), # missing required positional parameter
+ (MIN, 0), # success!
+ ("test param1 --reqint 123 --req1", 0), # success, order shouldn't matter
+ ("test param1 --reqint 123 --req2", 0), # success, any of required group ok
+ (MIN+"param2", 0), # another positional parameter is okay
+ (MIN+"param2 param3", 0), # and so are three
+ (MIN+"param2 param3 param4", 1), # but four are just too many
+ (MIN+"--int", 1), # where's the value?
+ (MIN+"--int 456", 0), # this is fine
+ (MIN+"--int456", 0), # as is this
+ (MIN+"--int:456", 0), # and this
+ (MIN+"--int=456", 0), # and this
+ (MIN+"--file c:\\windows\\system32\\kernel32.dll", 0), # yup
+ (MIN+"--file c:\\thisdoesntexist", 1), # nope
+ (MIN+"--mutdep1 a", 2), # no!
+ (MIN+"--mutdep2 b", 2), # also no!
+ (MIN+"--mutdep3 c", 2), # dream on!
+ (MIN+"--mutdep1 a --mutdep2 b", 2), # almost!
+ (MIN+"--mutdep1 a --mutdep2 b --mutdep3 c", 0), # yes
+ (MIN+"--mutex1 a", 0), # yes
+ (MIN+"--mutex2 b", 0), # yes
+ (MIN+"--mutex3 c", 0), # fine
+ (MIN+"--mutex1 a --mutex2 b", 1), # not fine
+ (MIN+"--mutex1 a --mutex2 b --mutex3 c", 3), # even worse
+ (MIN+"--dependent 1", 1), # no
+ (MIN+"--dependent 1 --int 2", 0), # ok
+ (MIN+"--int abc", 1), # bad type
+ (MIN+"--coords abc", 1), # also bad
+ (MIN+"--coords (abc)", 1), # getting warmer
+ (MIN+"--coords (abc,def)", 1), # missing something
+ (MIN+"--coords (123)", 1), # ooh, so close
+ (MIN+"--coords (123,def)", 1), # just a little farther
+ (MIN+"--coords (123,456)", 0), # finally!
+ ("test --int 123 --reqint=456 foo bar --coords(42,88) baz --req1", 0)
+ ]
+
+ badtests = 0
+
+ for (test, expected_failures) in test_lines:
+ cmdline.ParseCommandLine([x.strip() for x in test.strip().split(" ")])
+
+ if not len(cmdline.command.parse_errors) == expected_failures:
+ print "FAILED:\n issued: '%s'\n expected: %d\n received: %d\n\n" % (
+ test, expected_failures, len(cmdline.command.parse_errors))
+ badtests += 1
+
+ print "%d failed out of %d tests" % (badtests, len(test_lines))
+
+ cmdline.ParseCommandLine(["help", "test"])
+
diff --git a/tools/site_compare/commands/__init__.py b/tools/site_compare/commands/__init__.py
new file mode 100644
index 0000000..a699508
--- /dev/null
+++ b/tools/site_compare/commands/__init__.py
@@ -0,0 +1,2 @@
+#!/usr/bin/Python2.4
+
diff --git a/tools/site_compare/commands/compare2.py b/tools/site_compare/commands/compare2.py
new file mode 100644
index 0000000..6dc00c7
--- /dev/null
+++ b/tools/site_compare/commands/compare2.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare command to invoke the same page in two versions of a browser.
+
+Does the easiest compatibility test: equality comparison between two different
+versions of the same browser. Invoked with a series of command line options
+that specify which URLs to check, which browser to use, where to store results,
+etc.
+"""
+
+import os # Functions for walking the directory tree
+import tempfile # Get a temporary directory to hold intermediates
+
+import command_line
+import drivers # Functions for driving keyboard/mouse/windows, OS-specific
+import operators # Functions that, given two bitmaps as input, produce
+ # output depending on the performance of an operation
+import scrapers # Functions that know how to capture a render from
+ # particular browsers
+
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["compare2"],
+ "Compares the output of two browsers on the same URL or list of URLs",
+ ValidateCompare2,
+ ExecuteCompare2)
+
+ cmd.AddArgument(
+ ["-b1", "--browser1"], "Full path to first browser's executable",
+ type="readfile", metaname="PATH", required=True)
+ cmd.AddArgument(
+ ["-b2", "--browser2"], "Full path to second browser's executable",
+ type="readfile", metaname="PATH", required=True)
+ cmd.AddArgument(
+ ["-b", "--browser"], "Which browser to use", type="string",
+ default="chrome")
+ cmd.AddArgument(
+ ["-b1v", "--browser1ver"], "Version of first browser", metaname="VERSION")
+ cmd.AddArgument(
+ ["-b2v", "--browser2ver"], "Version of second browser", metaname="VERSION")
+ cmd.AddArgument(
+ ["-b1n", "--browser1name"], "Optional name for first browser (used in "
+ "directory to hold intermediate files)", metaname="NAME")
+ cmd.AddArgument(
+ ["-b2n", "--browser2name"], "Optional name for second browser (used in "
+ "directory to hold intermediate files)", metaname="NAME")
+ cmd.AddArgument(
+ ["-o", "--outdir"], "Directory to store scrape files", metaname="DIR")
+ cmd.AddArgument(
+ ["-u", "--url"], "URL to compare")
+ cmd.AddArgument(
+ ["-l", "--list"], "List of URLs to compare", type="readfile")
+ cmd.AddMutualExclusion(["--url", "--list"])
+ cmd.AddArgument(
+ ["-s", "--startline"], "First line of URL list", type="int")
+ cmd.AddArgument(
+ ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+ cmd.AddArgument(
+ ["-c", "--count"], "Number of lines of URL file to use", type="int")
+ cmd.AddDependency("--startline", "--list")
+ cmd.AddRequiredGroup(["--url", "--list"])
+ cmd.AddDependency("--endline", "--list")
+ cmd.AddDependency("--count", "--list")
+ cmd.AddMutualExclusion(["--count", "--endline"])
+ cmd.AddDependency("--count", "--startline")
+ cmd.AddArgument(
+ ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+ "finish loading",
+ type="int", default=60)
+ cmd.AddArgument(
+ ["-log", "--logfile"], "File to write output", type="string", required=True)
+ cmd.AddArgument(
+ ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+ cmd.AddArgument(
+ ["-m", "--maskdir"], "Path that holds masks to use for comparison")
+ cmd.AddArgument(
+ ["-d", "--diffdir"], "Path to hold the difference of comparisons that fail")
+
+
+def ValidateCompare2(command):
+ """Validate the arguments to compare2. Raises ParseError if failed."""
+ executables = [".exe", ".com", ".bat"]
+ if (os.path.splitext(command["--browser1"])[1].lower() not in executables or
+ os.path.splitext(command["--browser2"])[1].lower() not in executables):
+ raise command_line.ParseError("Browser filename must be an executable")
+
+
+def ExecuteCompare2(command):
+ """Executes the Compare2 command."""
+ if command["--url"]:
+ url_list = [command["--url"]]
+ else:
+ startline = command["--startline"]
+ if command["--count"]:
+ endline = startline+command["--count"]
+ else:
+ endline = command["--endline"]
+ url_list = [url.strip() for url in
+ open(command["--list"], "r").readlines()[startline:endline]]
+
+ log_file = open(command["--logfile"], "w")
+
+ outdir = command["--outdir"]
+ if not outdir: outdir = tempfile.gettempdir()
+
+ scrape_info_list = []
+
+ class ScrapeInfo(object):
+ """Helper class to hold information about a scrape."""
+ __slots__ = ["browser_path", "scraper", "outdir", "result"]
+
+ for index in xrange(1, 3):
+ scrape_info = ScrapeInfo()
+ scrape_info.browser_path = command["--browser%d" % index]
+ scrape_info.scraper = scrapers.GetScraper(
+ (command["--browser"], command["--browser%dver" % index]))
+
+ if command["--browser%dname" % index]:
+ scrape_info.outdir = os.path.join(outdir,
+ command["--browser%dname" % index])
+ else:
+ scrape_info.outdir = os.path.join(outdir, str(index))
+
+ drivers.windowing.PreparePath(scrape_info.outdir)
+ scrape_info_list.append(scrape_info)
+
+ compare = operators.GetOperator("equals_with_mask")
+
+ for url in url_list:
+ success = True
+
+ for scrape_info in scrape_info_list:
+ scrape_info.result = scrape_info.scraper.Scrape(
+ [url], scrape_info.outdir, command["--size"], (0, 0),
+ command["--timeout"], path=scrape_info.browser_path)
+
+ if not scrape_info.result:
+ scrape_info.result = "success"
+ else:
+ success = False
+
+ result = "unknown"
+
+ if success:
+ result = "equal"
+
+ file1 = drivers.windowing.URLtoFilename(
+ url, scrape_info_list[0].outdir, ".bmp")
+ file2 = drivers.windowing.URLtoFilename(
+ url, scrape_info_list[1].outdir, ".bmp")
+
+ comparison_result = compare.Compare(file1, file2,
+ maskdir=command["--maskdir"])
+
+ if comparison_result is not None:
+ result = "not-equal"
+
+ if command["--diffdir"]:
+ comparison_result[1].save(
+ drivers.windowing.URLtoFilename(url, command["--diffdir"], ".bmp"))
+
+ # TODO(jhaas): maybe use the logging module rather than raw file writes
+ log_file.write("%s %s %s %s\n" % (url,
+ scrape_info_list[0].result,
+ scrape_info_list[1].result,
+ result))
diff --git a/tools/site_compare/commands/maskmaker.py b/tools/site_compare/commands/maskmaker.py
new file mode 100644
index 0000000..95bdeb45
--- /dev/null
+++ b/tools/site_compare/commands/maskmaker.py
@@ -0,0 +1,298 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Component for automatically creating masks of changing areas of a website.
+
+Works by repeated invokation of a browser and scraping of the resulting page.
+Areas that differ will be added to the auto-generated mask. The mask generator
+considers the mask complete when further scrapes fail to produce any differences
+in the mask.
+"""
+
+import os # Functions for walking the directory tree
+import tempfile # Get a temporary directory to hold intermediates
+import time # Used for sleep() and naming masks by time
+
+import command_line
+import drivers
+from PIL import Image
+from PIL import ImageChops
+import scrapers
+
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["maskmaker"],
+ "Automatically generates a mask from a list of URLs",
+ ValidateMaskmaker,
+ ExecuteMaskmaker)
+
+ cmd.AddArgument(
+ ["-bp", "--browserpath"], "Full path to browser's executable",
+ type="readfile", metaname="PATH")
+ cmd.AddArgument(
+ ["-b", "--browser"], "Which browser to use", type="string",
+ default="chrome")
+ cmd.AddArgument(
+ ["-bv", "--browserver"], "Version of the browser", metaname="VERSION")
+ cmd.AddArgument(
+ ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR",
+ required=True)
+ cmd.AddArgument(
+ ["-u", "--url"], "URL to compare")
+ cmd.AddArgument(
+ ["-l", "--list"], "List of URLs to compare", type="readfile")
+ cmd.AddMutualExclusion(["--url", "--list"])
+ cmd.AddArgument(
+ ["-s", "--startline"], "First line of URL list", type="int")
+ cmd.AddArgument(
+ ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+ cmd.AddArgument(
+ ["-c", "--count"], "Number of lines of URL file to use", type="int")
+ cmd.AddDependency("--startline", "--list")
+ cmd.AddRequiredGroup(["--url", "--list"])
+ cmd.AddDependency("--endline", "--list")
+ cmd.AddDependency("--count", "--list")
+ cmd.AddMutualExclusion(["--count", "--endline"])
+ cmd.AddDependency("--count", "--startline")
+ cmd.AddArgument(
+ ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+ "finish loading",
+ type="int", default=60)
+ cmd.AddArgument(
+ ["-w", "--wait"],
+ "Amount of time (in seconds) to wait between successive scrapes",
+ type="int", default=60)
+ cmd.AddArgument(
+ ["-sc", "--scrapes"],
+ "Number of successive scrapes which must result in no change to a mask "
+ "before mask creation is considered complete", type="int", default=10)
+ cmd.AddArgument(
+ ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+ cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes")
+ cmd.AddArgument(
+ ["-gu", "--giveup"],
+ "Number of times to scrape before giving up", type="int", default=50)
+ cmd.AddArgument(
+ ["-th", "--threshhold"],
+ "Percentage of different pixels (0-100) above which the scrape will be"
+ "discarded and the mask not updated.", type="int", default=100)
+ cmd.AddArgument(
+ ["--er", "--errors"],
+ "Number of times a scrape can fail before giving up on the URL.",
+ type="int", default=1)
+
+
+def ValidateMaskmaker(command):
+ """Validate the arguments to maskmaker. Raises ParseError if failed."""
+ executables = [".exe", ".com", ".bat"]
+ if command["--browserpath"]:
+ if os.path.splitext(command["--browserpath"])[1].lower() not in executables:
+ raise command_line.ParseError("Browser filename must be an executable")
+
+
+def ExecuteMaskmaker(command):
+ """Performs automatic mask generation."""
+
+ # Get the list of URLs to generate masks for
+ class MaskmakerURL(object):
+ """Helper class for holding information about a URL passed to maskmaker."""
+ __slots__ = ['url', 'consecutive_successes', 'errors']
+ def __init__(self, url):
+ self.url = url
+ self.consecutive_successes = 0
+ self.errors = 0
+
+ if command["--url"]:
+ url_list = [MaskmakerURL(command["--url"])]
+ else:
+ startline = command["--startline"]
+ if command["--count"]:
+ endline = startline+command["--count"]
+ else:
+ endline = command["--endline"]
+ url_list = [MaskmakerURL(url.strip()) for url in
+ open(command["--list"], "r").readlines()[startline:endline]]
+
+ complete_list = []
+ error_list = []
+
+ outdir = command["--outdir"]
+ scrapes = command["--scrapes"]
+ errors = command["--errors"]
+ size = command["--size"]
+ scrape_pass = 0
+
+ scrapedir = command["--scrapedir"]
+ if not scrapedir: scrapedir = tempfile.gettempdir()
+
+ # Get the scraper
+ scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
+
+ # Repeatedly iterate through the list of URLs until either every URL has
+ # a successful mask or too many errors, or we've exceeded the giveup limit
+ while url_list and scrape_pass < command["--giveup"]:
+ # Scrape each URL
+ for url in url_list:
+ print "Processing %r..." % url.url
+ mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp")
+
+ # Load the existing mask. This is in a loop so we can try to recover
+ # from error conditions
+ while True:
+ try:
+ mask = Image.open(mask_filename)
+ if mask.size != size:
+ print " %r already exists and is the wrong size! (%r vs %r)" % (
+ mask_filename, mask.size, size)
+ mask_filename = "%s_%r%s" % (
+ mask_filename[:-4], size, mask_filename[-4:])
+ print " Trying again as %r..." % mask_filename
+ continue
+ break
+ except IOError:
+ print " %r does not exist, creating" % mask_filename
+ mask = Image.new("1", size, 1)
+ mask.save(mask_filename)
+
+ # Find the stored scrape path
+ mask_scrape_dir = os.path.join(
+ scrapedir, os.path.splitext(os.path.basename(mask_filename))[0])
+ drivers.windowing.PreparePath(mask_scrape_dir)
+
+ # Find the baseline image
+ mask_scrapes = os.listdir(mask_scrape_dir)
+ mask_scrapes.sort()
+
+ if not mask_scrapes:
+ print " No baseline image found, mask will not be updated"
+ baseline = None
+ else:
+ baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0]))
+
+ mask_scrape_filename = os.path.join(mask_scrape_dir,
+ time.strftime("%y%m%d-%H%M%S.bmp"))
+
+ # Do the scrape
+ result = scraper.Scrape(
+ [url.url], mask_scrape_dir, size, (0, 0),
+ command["--timeout"], path=command["--browserpath"],
+ filename=mask_scrape_filename)
+
+ if result:
+ # Return value other than None means an error
+ print " Scrape failed with error '%r'" % result
+ url.errors += 1
+ if url.errors >= errors:
+ print " ** Exceeded maximum error count for this URL, giving up"
+ continue
+
+ # Load the new scrape
+ scrape = Image.open(mask_scrape_filename)
+
+ # Calculate the difference between the new scrape and the baseline,
+ # subject to the current mask
+ if baseline:
+ diff = ImageChops.multiply(ImageChops.difference(scrape, baseline),
+ mask.convert(scrape.mode))
+
+ # If the difference is none, there's nothing to update
+ if max(diff.getextrema()) == (0, 0):
+ print " Scrape identical to baseline, no change in mask"
+ url.consecutive_successes += 1
+ if url.consecutive_successes >= scrapes:
+ print " ** No change for %r scrapes, done!" % scrapes
+ else:
+ # convert the difference to black and white, then change all
+ # black pixels (where the scrape and the baseline were identical)
+ # to white, all others (where the scrape and the baseline differed)
+ # to black.
+ #
+ # Since the below command is a little unclear, here's how it works.
+ # 1. convert("L") converts the RGB image to grayscale
+ # 2. point() maps grayscale values (or the individual channels)
+ # of an RGB image) to different ones. Because it operates on
+ # individual channels, the grayscale conversion from step 1
+ # is necessary.
+ # 3. The "1" second parameter to point() outputs the result as
+ # a monochrome bitmap. If the original RGB image were converted
+ # directly to monochrome, PIL would dither it.
+ diff = diff.convert("L").point([255]+[0]*255, "1")
+
+ # count the number of different pixels
+ diff_pixels = diff.getcolors()[0][0]
+
+ # is this too much?
+ diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1])
+ if diff_pixel_percent > command["--threshhold"]:
+ print (" Scrape differed from baseline by %.2f percent, ignoring"
+ % diff_pixel_percent)
+ else:
+ print " Scrape differed in %d pixels, updating mask" % diff_pixels
+ mask = ImageChops.multiply(mask, diff)
+ mask.save(mask_filename)
+
+ # reset the number of consecutive "good" scrapes
+ url.consecutive_successes = 0
+
+ # Remove URLs whose mask is deemed done
+ complete_list.extend(
+ [url for url in url_list if url.consecutive_successes >= scrapes])
+ error_list.extend(
+ [url for url in url_list if url.errors >= errors])
+ url_list = [
+ url for url in url_list if
+ url.consecutive_successes < scrapes and
+ url.errors < errors]
+
+ scrape_pass += 1
+ print "**Done with scrape pass %d\n" % scrape_pass
+
+ if scrape_pass >= command["--giveup"]:
+ print "**Exceeded giveup threshhold. Giving up."
+ else:
+ print "Waiting %d seconds..." % command["--wait"]
+ time.sleep(command["--wait"])
+
+ print
+ print "*** MASKMAKER COMPLETE ***"
+ print "Summary report:"
+ print " %d masks successfully generated" % len(complete_list)
+ for url in complete_list:
+ print " ", url.url
+ print " %d masks failed with too many errors" % len(error_list)
+ for url in error_list:
+ print " ", url.url
+ if scrape_pass >= command["--giveup"]:
+ print (" %d masks were not completed before "
+ "reaching the giveup threshhold" % len(url_list))
+ for url in url_list:
+ print " ", url.url
diff --git a/tools/site_compare/commands/measure.py b/tools/site_compare/commands/measure.py
new file mode 100644
index 0000000..477db57
--- /dev/null
+++ b/tools/site_compare/commands/measure.py
@@ -0,0 +1,78 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Command for measuring how long pages take to load in a browser.
+
+Prerequisites:
+ 1. The command_line package from tools/site_compare
+ 2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+ 1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+ 2. Add a file called "measurepageloadtimeextension@google.com" to
+ the default Firefox profile directory under extensions, containing
+ the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+import win32process
+
+from drivers import windowing
+from utils import browser_iterate
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["measure"],
+ "Measures how long a series of URLs takes to load in one or more browsers.",
+ None,
+ ExecuteMeasure)
+
+ browser_iterate.SetupIterationCommandLine(cmd)
+ cmd.AddArgument(
+ ["-log", "--logfile"], "File to write output", type="string", required=True)
+
+
+def ExecuteMeasure(command):
+ """Executes the Measure command."""
+
+ def LogResult(url, proc, wnd, result):
+ """Write the result of the browse to the log file."""
+ log_file.write(result)
+
+ log_file = open(command["--logfile"], "w")
+
+ browser_iterate.Iterate(command, LogResult)
+
+ # Close the log file and return. We're done.
+ log_file.close()
diff --git a/tools/site_compare/commands/scrape.py b/tools/site_compare/commands/scrape.py
new file mode 100644
index 0000000..a9b3398
--- /dev/null
+++ b/tools/site_compare/commands/scrape.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Command for scraping images from a URL or list of URLs.
+
+Prerequisites:
+ 1. The command_line package from tools/site_compare
+ 2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+ 1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+ 2. Add a file called "measurepageloadtimeextension@google.com" to
+ the default Firefox profile directory under extensions, containing
+ the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+
+from drivers import windowing
+from utils import browser_iterate
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["scrape"],
+ "Scrapes an image from a URL or series of URLs.",
+ None,
+ ExecuteScrape)
+
+ browser_iterate.SetupIterationCommandLine(cmd)
+ cmd.AddArgument(
+ ["-log", "--logfile"], "File to write text output", type="string")
+ cmd.AddArgument(
+ ["-out", "--outdir"], "Directory to store scrapes", type="string", required=True)
+
+
+def ExecuteScrape(command):
+ """Executes the Scrape command."""
+
+ def ScrapeResult(url, proc, wnd, result):
+ """Capture and save the scrape."""
+ if log_file: log_file.write(result)
+
+ # Scrape the page
+ image = windowing.ScrapeWindow(wnd)
+ filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
+ image.save(filename)
+
+ if command["--logfile"]: log_file = open(command["--logfile"], "w")
+ else: log_file = None
+
+ browser_iterate.Iterate(command, ScrapeResult)
+
+ # Close the log file and return. We're done.
+ if log_file: log_file.close()
diff --git a/tools/site_compare/commands/timeload.py b/tools/site_compare/commands/timeload.py
new file mode 100644
index 0000000..a983173
--- /dev/null
+++ b/tools/site_compare/commands/timeload.py
@@ -0,0 +1,170 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare command to time page loads
+
+Loads a series of URLs in a series of browsers (and browser versions)
+and measures how long the page takes to load in each. Outputs a
+comma-delimited file. The first line is "URL,[browser names", each
+additional line is a URL follored by comma-delimited times (in seconds),
+or the string "timeout" or "crashed".
+
+"""
+
+import os # Functions for walking the directory tree
+import tempfile # Get a temporary directory to hold intermediates
+
+import command_line
+import drivers # Functions for driving keyboard/mouse/windows, OS-specific
+import operators # Functions that, given two bitmaps as input, produce
+ # output depending on the performance of an operation
+import scrapers # Functions that know how to capture a render from
+ # particular browsers
+
+
+def CreateCommand(cmdline):
+ """Inserts the command and arguments into a command line for parsing."""
+ cmd = cmdline.AddCommand(
+ ["timeload"],
+ "Measures how long a series of URLs takes to load in one or more browsers.",
+ None,
+ ExecuteTimeLoad)
+
+ cmd.AddArgument(
+ ["-b", "--browsers"], "List of browsers to use. Comma-separated",
+ type="string", required=True)
+ cmd.AddArgument(
+ ["-bp", "--browserpaths"], "List of paths to browsers. Comma-separated",
+ type="string", required=False)
+ cmd.AddArgument(
+ ["-bv", "--browserversions"], "List of versions of browsers. Comma-separated",
+ type="string", required=False)
+ cmd.AddArgument(
+ ["-u", "--url"], "URL to time")
+ cmd.AddArgument(
+ ["-l", "--list"], "List of URLs to time", type="readfile")
+ cmd.AddMutualExclusion(["--url", "--list"])
+ cmd.AddArgument(
+ ["-s", "--startline"], "First line of URL list", type="int")
+ cmd.AddArgument(
+ ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+ cmd.AddArgument(
+ ["-c", "--count"], "Number of lines of URL file to use", type="int")
+ cmd.AddDependency("--startline", "--list")
+ cmd.AddRequiredGroup(["--url", "--list"])
+ cmd.AddDependency("--endline", "--list")
+ cmd.AddDependency("--count", "--list")
+ cmd.AddMutualExclusion(["--count", "--endline"])
+ cmd.AddDependency("--count", "--startline")
+ cmd.AddArgument(
+ ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+ "finish loading",
+ type="int", default=60)
+ cmd.AddArgument(
+ ["-log", "--logfile"], "File to write output", type="string", required=True)
+ cmd.AddArgument(
+ ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+
+
+def ExecuteTimeLoad(command):
+ """Executes the TimeLoad command."""
+ browsers = command["--browsers"].split(",")
+ num_browsers = len(browsers)
+
+ if command["--browserversions"]:
+ browser_versions = command["--browserversions"].split(",")
+ else:
+ browser_versions = [None] * num_browsers
+
+ if command["--browserpaths"]:
+ browser_paths = command["--browserpaths"].split(",")
+ else:
+ browser_paths = [None] * num_browsers
+
+ if len(browser_versions) != num_browsers:
+ raise ValueError(
+ "--browserversions must be same length as --browser_paths")
+ if len(browser_paths) != num_browsers:
+ raise ValueError(
+ "--browserversions must be same length as --browser_paths")
+
+ if [b for b in browsers if b not in ["chrome", "ie", "firefox"]]:
+ raise ValueError("unknown browsers: %r" % b)
+
+ scraper_list = []
+
+ for b in xrange(num_browsers):
+ version = browser_versions[b]
+ if not version: version = None
+
+ scraper = scrapers.GetScraper( (browsers[b], version) )
+ if not scraper:
+ raise ValueError("could not find scraper for (%r, %r)" %
+ (browsers[b], version))
+ scraper_list.append(scraper)
+
+ if command["--url"]:
+ url_list = [command["--url"]]
+ else:
+ startline = command["--startline"]
+ if command["--count"]:
+ endline = startline+command["--count"]
+ else:
+ endline = command["--endline"]
+ url_list = [url.strip() for url in
+ open(command["--list"], "r").readlines()[startline:endline]]
+
+ log_file = open(command["--logfile"], "w")
+
+ log_file.write("URL")
+ for b in xrange(num_browsers):
+ log_file.write(",%s" % browsers[b])
+
+ if browser_versions[b]: log_file.write(" %s" % browser_versions[b])
+ log_file.write("\n")
+
+ results = {}
+ for url in url_list:
+ results[url] = [None] * num_browsers
+
+ for b in xrange(num_browsers):
+ result = scraper_list[b].Time(url_list, command["--size"],
+ command["--timeout"],
+ path=browser_paths[b])
+
+ for (url, time) in result:
+ results[url][b] = time
+
+ # output the results
+ for url in url_list:
+ log_file.write(url)
+ for b in xrange(num_browsers):
+ log_file.write(",%r" % results[url][b])
+
diff --git a/tools/site_compare/drivers/__init__.py b/tools/site_compare/drivers/__init__.py
new file mode 100644
index 0000000..befc1353
--- /dev/null
+++ b/tools/site_compare/drivers/__init__.py
@@ -0,0 +1,15 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Imports a set of drivers appropriate to the current OS."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+import sys
+
+platform_dir = sys.platform
+
+keyboard = __import__(platform_dir+".keyboard", globals(), locals(), [''])
+mouse = __import__(platform_dir+".mouse", globals(), locals(), [''])
+windowing = __import__(platform_dir+".windowing", globals(), locals(), [''])
diff --git a/tools/site_compare/drivers/win32/__init__.py b/tools/site_compare/drivers/win32/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/site_compare/drivers/win32/__init__.py
diff --git a/tools/site_compare/drivers/win32/keyboard.py b/tools/site_compare/drivers/win32/keyboard.py
new file mode 100644
index 0000000..5888318
--- /dev/null
+++ b/tools/site_compare/drivers/win32/keyboard.py
@@ -0,0 +1,223 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare module for simulating keyboard input.
+
+This module contains functions that can be used to simulate a user
+pressing keys on a keyboard. Support is provided for formatted strings
+including special characters to represent modifier keys like CTRL and ALT
+"""
+
+import time # for sleep
+import win32api # for keybd_event and VkKeyCode
+import win32con # Windows constants
+
+# TODO(jhaas): Ask the readability guys if this would be acceptable:
+#
+# from win32con import VK_SHIFT, VK_CONTROL, VK_MENU, VK_LWIN, KEYEVENTF_KEYUP
+#
+# This is a violation of the style guide but having win32con. everywhere
+# is just plain ugly, and win32con is a huge import for just a handful of
+# constants
+
+
+def PressKey(down, key):
+ """Presses or unpresses a key.
+
+ Uses keybd_event to simulate either depressing or releasing
+ a key
+
+ Args:
+ down: Whether the key is to be pressed or released
+ key: Virtual key code of key to press or release
+ """
+
+ # keybd_event injects key events at a very low level (it's the
+ # Windows API keyboard device drivers call) so this is a very
+ # reliable way of simulating user input
+ win32api.keybd_event(key, 0, (not down) * win32con.KEYEVENTF_KEYUP)
+
+
+def TypeKey(key, keystroke_time=0):
+ """Simulate a keypress of a virtual key.
+
+ Args:
+ key: which key to press
+ keystroke_time: length of time (in seconds) to "hold down" the key
+ Note that zero works just fine
+
+ Returns:
+ None
+ """
+
+ # This just wraps a pair of PressKey calls with an intervening delay
+ PressKey(True, key)
+ time.sleep(keystroke_time)
+ PressKey(False, key)
+
+
+def TypeString(string_to_type,
+ use_modifiers=False,
+ keystroke_time=0,
+ time_between_keystrokes=0):
+ """Simulate typing a string on the keyboard.
+
+ Args:
+ string_to_type: the string to print
+ use_modifiers: specifies whether the following modifier characters
+ should be active:
+ {abc}: type characters with ALT held down
+ [abc]: type characters with CTRL held down
+ \ escapes {}[] and treats these values as literal
+ standard escape sequences are valid even if use_modifiers is false
+ \p is "pause" for one second, useful when driving menus
+ \1-\9 is F-key, \0 is F10
+
+ TODO(jhaas): support for explicit control of SHIFT, support for
+ nonprintable keys (F-keys, ESC, arrow keys, etc),
+ support for explicit control of left vs. right ALT or SHIFT,
+ support for Windows key
+
+ keystroke_time: length of time (in secondes) to "hold down" the key
+ time_between_keystrokes: length of time (seconds) to pause between keys
+
+ Returns:
+ None
+ """
+
+ shift_held = win32api.GetAsyncKeyState(win32con.VK_SHIFT ) < 0
+ ctrl_held = win32api.GetAsyncKeyState(win32con.VK_CONTROL) < 0
+ alt_held = win32api.GetAsyncKeyState(win32con.VK_MENU ) < 0
+
+ next_escaped = False
+ escape_chars = {
+ 'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\v'}
+
+ for char in string_to_type:
+ vk = None
+ handled = False
+
+ # Check to see if this is the start or end of a modified block (that is,
+ # {abc} for ALT-modified keys or [abc] for CTRL-modified keys
+ if use_modifiers and not next_escaped:
+ handled = True
+ if char == "{" and not alt_held:
+ alt_held = True
+ PressKey(True, win32con.VK_MENU)
+ elif char == "}" and alt_held:
+ alt_held = False
+ PressKey(False, win32con.VK_MENU)
+ elif char == "[" and not ctrl_held:
+ ctrl_held = True
+ PressKey(True, win32con.VK_CONTROL)
+ elif char == "]" and ctrl_held:
+ ctrl_held = False
+ PressKey(False, win32con.VK_CONTROL)
+ else:
+ handled = False
+
+ # If this is an explicitly-escaped character, replace it with the
+ # appropriate code
+ if next_escaped and char in escape_chars: char = escape_chars[char]
+
+ # If this is \p, pause for one second.
+ if next_escaped and char == 'p':
+ time.sleep(1)
+ next_escaped = False
+ handled = True
+
+ # If this is \(d), press F key
+ if next_escaped and char.isdigit():
+ fkey = int(char)
+ if not fkey: fkey = 10
+ next_escaped = False
+ vk = win32con.VK_F1 + fkey - 1
+
+ # If this is the backslash, the next character is escaped
+ if not next_escaped and char == "\\":
+ next_escaped = True
+ handled = True
+
+ # If we make it here, it's not a special character, or it's an
+ # escaped special character which should be treated as a literal
+ if not handled:
+ next_escaped = False
+ if not vk: vk = win32api.VkKeyScan(char)
+
+ # VkKeyScan() returns the scan code in the low byte. The upper
+ # byte specifies modifiers necessary to produce the given character
+ # from the given scan code. The only one we're concerned with at the
+ # moment is Shift. Determine the shift state and compare it to the
+ # current state... if it differs, press or release the shift key.
+ new_shift_held = bool(vk & (1<<8))
+
+ if new_shift_held != shift_held:
+ PressKey(new_shift_held, win32con.VK_SHIFT)
+ shift_held = new_shift_held
+
+ # Type the key with the specified length, then wait the specified delay
+ TypeKey(vk & 0xFF, keystroke_time)
+ time.sleep(time_between_keystrokes)
+
+ # Release the modifier keys, if held
+ if shift_held: PressKey(False, win32con.VK_SHIFT)
+ if ctrl_held: PressKey(False, win32con.VK_CONTROL)
+ if alt_held: PressKey(False, win32con.VK_MENU)
+
+if __name__ == "__main__":
+ # We're being invoked rather than imported. Let's do some tests
+
+ # Press command-R to bring up the Run dialog
+ PressKey(True, win32con.VK_LWIN)
+ TypeKey(ord('R'))
+ PressKey(False, win32con.VK_LWIN)
+
+ # Wait a sec to make sure it comes up
+ time.sleep(1)
+
+ # Invoke Notepad through the Run dialog
+ TypeString("wordpad\n")
+
+ # Wait another sec, then start typing
+ time.sleep(1)
+ TypeString("This is a test of SiteCompare's Keyboard.py module.\n\n")
+ TypeString("There should be a blank line above and below this one.\n\n")
+ TypeString("This line has control characters to make "
+ "[b]boldface text[b] and [i]italic text[i] and normal text.\n\n",
+ use_modifiers=True)
+ TypeString(r"This line should be typed with a visible delay between "
+ "characters. When it ends, there should be a 3-second pause, "
+ "then the menu will select File/Exit, then another 3-second "
+ "pause, then No to exit without saving. Ready?\p\p\p{f}x\p\p\pn",
+ use_modifiers=True,
+ keystroke_time=0.05,
+ time_between_keystrokes=0.05)
+
+ \ No newline at end of file
diff --git a/tools/site_compare/drivers/win32/mouse.py b/tools/site_compare/drivers/win32/mouse.py
new file mode 100644
index 0000000..9475f2d
--- /dev/null
+++ b/tools/site_compare/drivers/win32/mouse.py
@@ -0,0 +1,243 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare module for simulating mouse input.
+
+This module contains functions that can be used to simulate a user
+navigating using a pointing device. This includes mouse movement,
+clicking with any button, and dragging.
+"""
+
+import time # for sleep
+
+import win32api # for mouse_event
+import win32con # Windows constants
+import win32gui # for window functions
+
+
+def ScreenToMouse(pt):
+ """Convert a value in screen coordinates to mouse coordinates.
+
+ Mouse coordinates are specified as a percentage of screen dimensions,
+ normalized to 16 bits. 0 represents the far left/top of the screen,
+ 65535 represents the far right/bottom. This function assumes that
+ the size of the screen is fixed at module load time and does not change
+
+ Args:
+ pt: the point of the coords to convert
+
+ Returns:
+ the converted point
+ """
+
+ # Initialize the screen dimensions on first execution. Note that this
+ # function assumes that the screen dimensions do not change during run.
+ if not ScreenToMouse._SCREEN_DIMENSIONS:
+ desktop = win32gui.GetClientRect(win32gui.GetDesktopWindow())
+ ScreenToMouse._SCREEN_DIMENSIONS = (desktop[2], desktop[3])
+
+ return ((65535 * pt[0]) / ScreenToMouse._SCREEN_DIMENSIONS[0],
+ (65535 * pt[1]) / ScreenToMouse._SCREEN_DIMENSIONS[1])
+
+ScreenToMouse._SCREEN_DIMENSIONS = None
+
+
+def PressButton(down, button='left'):
+ """Simulate a mouse button press or release at the current mouse location.
+
+ Args:
+ down: whether the button is pressed or released
+ button: which button is pressed
+
+ Returns:
+ None
+ """
+
+ # Put the mouse_event flags in a convenient dictionary by button
+ flags = {
+ 'left': (win32con.MOUSEEVENTF_LEFTUP, win32con.MOUSEEVENTF_LEFTDOWN),
+ 'middle': (win32con.MOUSEEVENTF_MIDDLEUP, win32con.MOUSEEVENTF_MIDDLEDOWN),
+ 'right': (win32con.MOUSEEVENTF_RIGHTUP, win32con.MOUSEEVENTF_RIGHTDOWN)
+ }
+
+ # hit the button
+ win32api.mouse_event(flags[button][down], 0, 0)
+
+
+def ClickButton(button='left', click_time=0):
+ """Press and release a mouse button at the current mouse location.
+
+ Args:
+ button: which button to click
+ click_time: duration between press and release
+
+ Returns:
+ None
+ """
+ PressButton(True, button)
+ time.sleep(click_time)
+ PressButton(False, button)
+
+
+def DoubleClickButton(button='left', click_time=0, time_between_clicks=0):
+ """Double-click a mouse button at the current mouse location.
+
+ Args:
+ button: which button to click
+ click_time: duration between press and release
+ time_between_clicks: time to pause between clicks
+
+ Returns:
+ None
+ """
+ ClickButton(button, click_time)
+ time.sleep(time_between_clicks)
+ ClickButton(button, click_time)
+
+
+def MoveToLocation(pos, duration=0, tick=0.01):
+ """Move the mouse cursor to a specified location, taking the specified time.
+
+ Args:
+ pos: position (in screen coordinates) to move to
+ duration: amount of time the move should take
+ tick: amount of time between successive moves of the mouse
+
+ Returns:
+ None
+ """
+ # calculate the number of moves to reach the destination
+ num_steps = (duration/tick)+1
+
+ # get the current and final mouse position in mouse coords
+ current_location = ScreenToMouse(win32gui.GetCursorPos())
+ end_location = ScreenToMouse(pos)
+
+ # Calculate the step size
+ step_size = ((end_location[0]-current_location[0])/num_steps,
+ (end_location[1]-current_location[1])/num_steps)
+ step = 0
+
+ while step < num_steps:
+ # Move the mouse one step
+ current_location = (current_location[0]+step_size[0],
+ current_location[1]+step_size[1])
+
+ # Coerce the coords to int to avoid a warning from pywin32
+ win32api.mouse_event(
+ win32con.MOUSEEVENTF_MOVE|win32con.MOUSEEVENTF_ABSOLUTE,
+ int(current_location[0]), int(current_location[1]))
+
+ step += 1
+ time.sleep(tick)
+
+
+def ClickAtLocation(pos, button='left', click_time=0):
+ """Simulate a mouse click in a particular location, in screen coordinates.
+
+ Args:
+ pos: position in screen coordinates (x,y)
+ button: which button to click
+ click_time: duration of the click
+
+ Returns:
+ None
+ """
+ MoveToLocation(pos)
+ ClickButton(button, click_time)
+
+
+def ClickInWindow(hwnd, offset=None, button='left', click_time=0):
+ """Simulate a user mouse click in the center of a window.
+
+ Args:
+ hwnd: handle of the window to click in
+ offset: where to click, defaults to dead center
+ button: which button to click
+ click_time: duration of the click
+
+ Returns:
+ Nothing
+ """
+
+ rect = win32gui.GetClientRect(hwnd)
+ if offset is None: offset = (rect[2]/2, rect[3]/2)
+
+ # get the screen coordinates of the window's center
+ pos = win32gui.ClientToScreen(hwnd, offset)
+
+ ClickAtLocation(pos, button, click_time)
+
+
+def DoubleClickInWindow(
+ hwnd, offset=None, button='left', click_time=0, time_between_clicks=0.1):
+ """Simulate a user mouse double click in the center of a window.
+
+ Args:
+ hwnd: handle of the window to click in
+ offset: where to click, defaults to dead center
+ button: which button to click
+ click_time: duration of the clicks
+ time_between_clicks: length of time to pause between clicks
+
+ Returns:
+ Nothing
+ """
+ ClickInWindow(hwnd, offset, button, click_time)
+ time.sleep(time_between_clicks)
+ ClickInWindow(hwnd, offset, button, click_time)
+
+if __name__ == "__main__":
+ # We're being invoked rather than imported. Let's do some tests
+
+ screen_size = win32gui.GetClientRect(win32gui.GetDesktopWindow())
+ screen_size = (screen_size[2], screen_size[3])
+
+ # move the mouse (instantly) to the upper right corner
+ MoveToLocation((screen_size[0], 0))
+
+ # move the mouse (over five seconds) to the lower left corner
+ MoveToLocation((0, screen_size[1]), 5)
+
+ # click the left mouse button. This will open up the Start menu
+ # if the taskbar is at the bottom
+
+ ClickButton()
+
+ # wait a bit, then click the right button to open the context menu
+ time.sleep(3)
+ ClickButton('right')
+
+ # move the mouse away and then click the left button to dismiss the
+ # context menu
+ MoveToLocation((screen_size[0]/2, screen_size[1]/2), 3)
+ MoveToLocation((0, 0), 3)
+ ClickButton()
+ \ No newline at end of file
diff --git a/tools/site_compare/drivers/win32/windowing.py b/tools/site_compare/drivers/win32/windowing.py
new file mode 100644
index 0000000..94ec511
--- /dev/null
+++ b/tools/site_compare/drivers/win32/windowing.py
@@ -0,0 +1,386 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare module for invoking, locating, and manipulating windows.
+
+This module is a catch-all wrapper for operating system UI functionality
+that doesn't belong in other modules. It contains functions for finding
+particular windows, scraping their contents, and invoking processes to
+create them.
+"""
+
+import os
+import string
+import time
+
+import PIL.ImageGrab
+import pywintypes
+import win32event
+import win32gui
+import win32process
+
+
+def FindChildWindows(hwnd, path):
+ """Find a set of windows through a path specification.
+
+ Args:
+ hwnd: Handle of the parent window
+ path: Path to the window to find. Has the following form:
+ "foo/bar/baz|foobar/|foobarbaz"
+ The slashes specify the "path" to the child window.
+ The text is the window class, a pipe (if present) is a title.
+ * is a wildcard and will find all child windows at that level
+
+ Returns:
+ A list of the windows that were found
+ """
+ windows_to_check = [hwnd]
+
+ # The strategy will be to take windows_to_check and use it
+ # to find a list of windows that match the next specification
+ # in the path, then repeat with the list of found windows as the
+ # new list of windows to check
+ for segment in path.split("/"):
+ windows_found = []
+ check_values = segment.split("|")
+
+ # check_values is now a list with the first element being
+ # the window class, the second being the window caption.
+ # If the class is absent (or wildcarded) set it to None
+ if check_values[0] == "*" or not check_values[0]: check_values[0] = None
+
+ # If the window caption is also absent, force it to None as well
+ if len(check_values) == 1: check_values.append(None)
+
+ # Loop through the list of windows to check
+ for window_check in windows_to_check:
+ window_found = None
+ while window_found != 0: # lint complains, but 0 != None
+ if window_found is None: window_found = 0
+ try:
+ # Look for the next sibling (or first sibling if window_found is 0)
+ # of window_check with the specified caption and/or class
+ window_found = win32gui.FindWindowEx(
+ window_check, window_found, check_values[0], check_values[1])
+ except pywintypes.error, e:
+ # FindWindowEx() raises error 2 if not found
+ if e[0] == 2:
+ window_found = 0
+ else:
+ raise e
+
+ # If FindWindowEx struck gold, add to our list of windows found
+ if window_found: windows_found.append(window_found)
+
+ # The windows we found become the windows to check for the next segment
+ windows_to_check = windows_found
+
+ return windows_found
+
+
+def FindChildWindow(hwnd, path):
+ """Find a window through a path specification.
+
+ This method is a simple wrapper for FindChildWindows() for the
+ case (the majority case) where you expect to find a single window
+
+ Args:
+ hwnd: Handle of the parent window
+ path: Path to the window to find. See FindChildWindows()
+
+ Returns:
+ The window that was found
+ """
+ return FindChildWindows(hwnd, path)[0]
+
+
+def ScrapeWindow(hwnd, rect=None):
+ """Scrape a visible window and return its contents as a bitmap.
+
+ Args:
+ hwnd: handle of the window to scrape
+ rect: rectangle to scrape in client coords, defaults to the whole thing
+ If specified, it's a 4-tuple of (left, top, right, bottom)
+
+ Returns:
+ An Image containing the scraped data
+ """
+ # Activate the window
+ SetForegroundWindow(hwnd)
+
+ # If no rectangle was specified, use the fill client rectangle
+ if not rect: rect = win32gui.GetClientRect(hwnd)
+
+ upper_left = win32gui.ClientToScreen(hwnd, (rect[0], rect[1]))
+ lower_right = win32gui.ClientToScreen(hwnd, (rect[2], rect[3]))
+ rect = upper_left+lower_right
+
+ return PIL.ImageGrab.grab(rect)
+
+
+def SetForegroundWindow(hwnd):
+ """Bring a window to the foreground."""
+ win32gui.SetForegroundWindow(hwnd)
+
+
+def InvokeAndWait(path, cmdline="", timeout=10, tick=1.):
+ """Invoke an application and wait for it to bring up a window.
+
+ Args:
+ path: full path to the executable to invoke
+ cmdline: command line to pass to executable
+ timeout: how long (in seconds) to wait before giving up
+ tick: length of time to wait between checks
+
+ Returns:
+ A tuple of handles to the process and the application's window,
+ or (None, None) if it timed out waiting for the process
+ """
+
+ def EnumWindowProc(hwnd, ret):
+ """Internal enumeration func, checks for visibility and proper PID."""
+ if win32gui.IsWindowVisible(hwnd): # don't bother even checking hidden wnds
+ pid = win32process.GetWindowThreadProcessId(hwnd)[1]
+ if pid == ret[0]:
+ ret[1] = hwnd
+ return 0 # 0 means stop enumeration
+ return 1 # 1 means continue enumeration
+
+ # We don't need to change anything about the startupinfo structure
+ # (the default is quite sufficient) but we need to create it just the
+ # same.
+ sinfo = win32process.STARTUPINFO()
+
+ proc = win32process.CreateProcess(
+ path, # path to new process's executable
+ cmdline, # application's command line
+ None, # process security attributes (default)
+ None, # thread security attributes (default)
+ False, # inherit parent's handles
+ 0, # creation flags
+ None, # environment variables
+ None, # directory
+ sinfo) # default startup info
+
+ # Create process returns (prochandle, pid, threadhandle, tid). At
+ # some point we may care about the other members, but for now, all
+ # we're after is the pid
+ pid = proc[2]
+
+ # Enumeration APIs can take an arbitrary integer, usually a pointer,
+ # to be passed to the enumeration function. We'll pass a pointer to
+ # a structure containing the PID we're looking for, and an empty out
+ # parameter to hold the found window ID
+ ret = [pid, None]
+
+ tries_until_timeout = timeout/tick
+ num_tries = 0
+
+ # Enumerate top-level windows, look for one with our PID
+ while num_tries < tries_until_timeout and ret[1] is None:
+ try:
+ win32gui.EnumWindows(EnumWindowProc, ret)
+ except pywintypes.error, e:
+ # error 0 isn't an error, it just meant the enumeration was
+ # terminated early
+ if e[0]: raise e
+
+ time.sleep(tick)
+ num_tries += 1
+
+ # TODO(jhaas): Should we throw an exception if we timeout? Or is returning
+ # a window ID of None sufficient?
+ return (proc[0], ret[1])
+
+
+def WaitForProcessExit(proc, timeout=None):
+ """Waits for a given process to terminate.
+
+ Args:
+ proc: handle to process
+ timeout: timeout (in seconds). None = wait indefinitely
+
+ Returns:
+ True if process ended, False if timed out
+ """
+ if timeout is None:
+ timeout = win32event.INFINITE
+ else:
+ # convert sec to msec
+ timeout *= 1000
+
+ return (win32event.WaitForSingleObject(proc, timeout) ==
+ win32event.WAIT_OBJECT_0)
+
+
+def WaitForThrobber(hwnd, rect=None, timeout=20, tick=0.1, done=10):
+ """Wait for a browser's "throbber" (loading animation) to complete.
+
+ Args:
+ hwnd: window containing the throbber
+ rect: rectangle of the throbber, in client coords. If None, whole window
+ timeout: if the throbber is still throbbing after this long, give up
+ tick: how often to check the throbber
+ done: how long the throbber must be unmoving to be considered done
+
+ Returns:
+ Number of seconds waited, -1 if timed out
+ """
+ if not rect: rect = win32gui.GetClientRect(hwnd)
+
+ # last_throbber will hold the results of the preceding scrape;
+ # we'll compare it against the current scrape to see if we're throbbing
+ last_throbber = ScrapeWindow(hwnd, rect)
+ start_clock = time.clock()
+ timeout_clock = start_clock + timeout
+ last_changed_clock = start_clock;
+
+ while time.clock() < timeout_clock:
+ time.sleep(tick)
+
+ current_throbber = ScrapeWindow(hwnd, rect)
+ if current_throbber.tostring() != last_throbber.tostring():
+ last_throbber = current_throbber
+ last_changed_clock = time.clock()
+ else:
+ if time.clock() - last_changed_clock > done:
+ return last_changed_clock - start_clock
+
+ return -1
+
+
+def MoveAndSizeWindow(wnd, position=None, size=None, child=None):
+ """Moves and/or resizes a window.
+
+ Repositions and resizes a window. If a child window is provided,
+ the parent window is resized so the child window has the given size
+
+ Args:
+ wnd: handle of the frame window
+ position: new location for the frame window
+ size: new size for the frame window (or the child window)
+ child: handle of the child window
+
+ Returns:
+ None
+ """
+ rect = win32gui.GetWindowRect(wnd)
+
+ if position is None: position = (rect[0], rect[1])
+ if size is None:
+ size = (rect[2]-rect[0], rect[3]-rect[1])
+ elif child is not None:
+ child_rect = win32gui.GetWindowRect(child)
+ slop = (rect[2]-rect[0]-child_rect[2]+child_rect[0],
+ rect[3]-rect[1]-child_rect[3]+child_rect[1])
+ size = (size[0]+slop[0], size[1]+slop[1])
+
+ win32gui.MoveWindow(wnd, # window to move
+ position[0], # new x coord
+ position[1], # new y coord
+ size[0], # new width
+ size[1], # new height
+ True) # repaint?
+
+
+def EndProcess(proc, code=0):
+ """Ends a process.
+
+ Wraps the OS TerminateProcess call for platform-independence
+
+ Args:
+ proc: process ID
+ code: process exit code
+
+ Returns:
+ None
+ """
+ win32process.TerminateProcess(proc, code)
+
+
+def URLtoFilename(url, path=None, extension=None):
+ """Converts a URL to a filename, given a path.
+
+ This in theory could cause collisions if two URLs differ only
+ in unprintable characters (eg. http://www.foo.com/?bar and
+ http://www.foo.com/:bar. In practice this shouldn't be a problem.
+
+ Args:
+ url: The URL to convert
+ path: path to the directory to store the file
+ extension: string to append to filename
+
+ Returns:
+ filename
+ """
+ trans = string.maketrans(r'\/:*?"<>|', '_________')
+
+ if path is None: path = ""
+ if extension is None: extension = ""
+ if len(path) > 0 and path[-1] != '\\': path += '\\'
+ url = url.translate(trans)
+ return "%s%s%s" % (path, url, extension)
+
+
+def PreparePath(path):
+ """Ensures that a given path exists, making subdirectories if necessary.
+
+ Args:
+ path: fully-qualified path of directory to ensure exists
+
+ Returns:
+ None
+ """
+ try:
+ os.makedirs(path)
+ except OSError, e:
+ if e[0] != 17: raise e # error 17: path already exists
+
+if __name__ == "__main__":
+ PreparePath(r"c:\sitecompare\scrapes\ie7")
+ # We're being invoked rather than imported. Let's do some tests
+
+ # Hardcode IE's location for the purpose of this test
+ (proc, wnd) = InvokeAndWait(
+ r"c:\program files\internet explorer\iexplore.exe")
+
+ # Find the browser pane in the IE window
+ browser = FindChildWindow(
+ wnd, "TabWindowClass/Shell DocObject View/Internet Explorer_Server")
+
+ # Move and size the window
+ MoveAndSizeWindow(wnd, (0, 0), (1024, 768), browser)
+
+ # Take a screenshot
+ i = ScrapeWindow(browser)
+
+ i.show()
+
+ EndProcess(proc, 0)
diff --git a/tools/site_compare/operators/__init__.py b/tools/site_compare/operators/__init__.py
new file mode 100644
index 0000000..02eac07
--- /dev/null
+++ b/tools/site_compare/operators/__init__.py
@@ -0,0 +1,26 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Selects the appropriate operator."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+
+def GetOperator(operator):
+ """Given an operator by name, returns its module.
+
+ Args:
+ operator: string describing the comparison
+
+ Returns:
+ module
+ """
+
+ # TODO(jhaas): come up with a happy way of integrating multiple operators
+ # with different, possibly divergent and possibly convergent, operators.
+
+ module = __import__(operator, globals(), locals(), [''])
+
+ return module
+
diff --git a/tools/site_compare/operators/equals.py b/tools/site_compare/operators/equals.py
new file mode 100644
index 0000000..2771401
--- /dev/null
+++ b/tools/site_compare/operators/equals.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Compare two images for equality."""
+
+from PIL import Image
+from PIL import ImageChops
+
+
+def Compare(file1, file2, **kwargs):
+ """Compares two images to see if they're identical.
+
+ Args:
+ file1: path to first image to compare
+ file2: path to second image to compare
+ kwargs: unused for this operator
+
+ Returns:
+ None if the images are identical
+ A tuple of (errorstring, image) if they're not
+ """
+ kwargs = kwargs # unused parameter
+
+ im1 = Image.open(file1)
+ im2 = Image.open(file2)
+
+ if im1.size != im2.size:
+ return ("The images are of different size (%s vs %s)" %
+ (im1.size, im2.size), im1)
+
+ diff = ImageChops.difference(im1, im2)
+
+ if max(diff.getextrema()) != (0, 0):
+ return ("The images differ", diff)
+ else:
+ return None
+
+
+ \ No newline at end of file
diff --git a/tools/site_compare/operators/equals_with_mask.py b/tools/site_compare/operators/equals_with_mask.py
new file mode 100644
index 0000000..574457a
--- /dev/null
+++ b/tools/site_compare/operators/equals_with_mask.py
@@ -0,0 +1,86 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Compare two images for equality, subject to a mask."""
+
+from PIL import Image
+from PIL import ImageChops
+
+import os.path
+
+
+def Compare(file1, file2, **kwargs):
+ """Compares two images to see if they're identical subject to a mask.
+
+ An optional directory containing masks is supplied. If a mask exists
+ which matches file1's name, areas under the mask where it's black
+ are ignored.
+
+ Args:
+ file1: path to first image to compare
+ file2: path to second image to compare
+ kwargs: ["maskdir"] contains the directory holding the masks
+
+ Returns:
+ None if the images are identical
+ A tuple of (errorstring, image) if they're not
+ """
+
+ maskdir = None
+ if "maskdir" in kwargs:
+ maskdir = kwargs["maskdir"]
+
+ im1 = Image.open(file1)
+ im2 = Image.open(file2)
+
+ if im1.size != im2.size:
+ return ("The images are of different size (%r vs %r)" %
+ (im1.size, im2.size), im1)
+
+ diff = ImageChops.difference(im1, im2)
+
+ if maskdir:
+ maskfile = os.path.join(maskdir, os.path.basename(file1))
+ if os.path.exists(maskfile):
+ mask = Image.open(maskfile)
+
+ if mask.size != im1.size:
+ return ("The mask is of a different size than the images (%r vs %r)" %
+ (mask.size, im1.size), mask)
+
+ diff = ImageChops.multiply(diff, mask.convert(diff.mode))
+
+ if max(diff.getextrema()) != (0, 0):
+ return ("The images differ", diff)
+ else:
+ return None
+
+
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/__init__.py b/tools/site_compare/scrapers/__init__.py
new file mode 100644
index 0000000..08790aa
--- /dev/null
+++ b/tools/site_compare/scrapers/__init__.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Selects the appropriate scraper for a given browser and version."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+import types
+
+# TODO(jhaas): unify all optional scraper parameters into kwargs
+
+def GetScraper(browser):
+ """Given a browser and an optional version, returns the scraper module.
+
+ Args:
+ browser: either a string (browser name) or a tuple (name, version)
+
+ Returns:
+ module
+ """
+
+ if type(browser) == types.StringType: browser = (browser, None)
+
+ package = __import__(browser[0], globals(), locals(), [''])
+ module = package.GetScraper(browser[1])
+ if browser[1] is not None: module.version = browser[1]
+
+ return module
+
+# if invoked rather than imported, do some tests
+if __name__ == "__main__":
+ print GetScraper("IE")
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/chrome/__init__.py b/tools/site_compare/scrapers/chrome/__init__.py
new file mode 100644
index 0000000..2ba76c4
--- /dev/null
+++ b/tools/site_compare/scrapers/chrome/__init__.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Selects the appropriate scraper for Chrome."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+def GetScraper(version):
+ """Returns the scraper module for the given version.
+
+ Args:
+ version: version string of Chrome, or None for most recent
+
+ Returns:
+ scrape module for given version
+ """
+ if version is None:
+ version = "0.1.101.0"
+
+ parsed_version = [int(x) for x in version.split(".")]
+
+ if (parsed_version[0] > 0 or
+ parsed_version[1] > 1 or
+ parsed_version[2] > 97 or
+ parsed_version[3] > 0):
+ scraper_version = "chrome011010"
+ else:
+ scraper_version = "chrome01970"
+
+ return __import__(scraper_version, globals(), locals(), [''])
+
+# if invoked rather than imported, test
+if __name__ == "__main__":
+ version = "0.1.101.0"
+
+ print GetScraper(version).version
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/chrome/chrome011010.py b/tools/site_compare/scrapers/chrome/chrome011010.py
new file mode 100644
index 0000000..0b75ff8
--- /dev/null
+++ b/tools/site_compare/scrapers/chrome/chrome011010.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does scraping for versions of Chrome from 0.1.101.0 up."""
+
+from drivers import windowing
+
+import chromebase
+
+# Default version
+version = "0.1.101.0"
+
+
+def GetChromeRenderPane(wnd):
+ return windowing.FindChildWindow(wnd, "Chrome_TabContents")
+
+
+def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
+ """Invoke a browser, send it to a series of URLs, and save its output.
+
+ Args:
+ urls: list of URLs to scrape
+ outdir: directory to place output
+ size: size of browser window to use
+ pos: position of browser window
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ None if succeeded, else an error code
+ """
+ chromebase.GetChromeRenderPane = GetChromeRenderPane
+
+ return chromebase.Scrape(urls, outdir, size, pos, timeout, kwargs)
+
+
+def Time(urls, size, timeout, **kwargs):
+ """Forwards the Time command to chromebase."""
+ chromebase.GetChromeRenderPane = GetChromeRenderPane
+
+ return chromebase.Time(urls, size, timeout, kwargs)
diff --git a/tools/site_compare/scrapers/chrome/chrome01970.py b/tools/site_compare/scrapers/chrome/chrome01970.py
new file mode 100644
index 0000000..bf43095
--- /dev/null
+++ b/tools/site_compare/scrapers/chrome/chrome01970.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does scraping for versions of Chrome up to 0.1.97.0."""
+
+from drivers import windowing
+
+import chromebase
+
+# Default version
+version = "0.1.97.0"
+
+
+def GetChromeRenderPane(wnd):
+ return windowing.FindChildWindow(wnd, "Chrome_BrowserWindow")
+
+
+def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
+ """Invoke a browser, send it to a series of URLs, and save its output.
+
+ Args:
+ urls: list of URLs to scrape
+ outdir: directory to place output
+ size: size of browser window to use
+ pos: position of browser window
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ None if succeeded, else an error code
+ """
+ chromebase.GetChromeRenderPane = GetChromeRenderPane
+
+ return chromebase.Scrape(urls, outdir, size, pos, timeout, kwargs)
+
+
+def Time(urls, size, timeout, **kwargs):
+ """Forwards the Time command to chromebase."""
+ chromebase.GetChromeRenderPane = GetChromeRenderPane
+
+ return chromebase.Time(urls, size, timeout, kwargs)
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/chrome/chromebase.py b/tools/site_compare/scrapers/chrome/chromebase.py
new file mode 100644
index 0000000..4825049
--- /dev/null
+++ b/tools/site_compare/scrapers/chrome/chromebase.py
@@ -0,0 +1,217 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does scraping for all currently-known versions of Chrome"""
+
+import pywintypes
+import types
+
+from drivers import keyboard
+from drivers import mouse
+from drivers import windowing
+
+# TODO: this has moved, use some logic to find it. For now,
+# expects a subst k:.
+DEFAULT_PATH = r"k:\chrome.exe"
+
+def InvokeBrowser(path):
+ """Invoke the Chrome browser.
+
+ Args:
+ path: full path to browser
+
+ Returns:
+ A tuple of (main window, process handle, address bar, render pane)
+ """
+
+ # Reuse an existing instance of the browser if we can find one. This
+ # may not work correctly, especially if the window is behind other windows.
+
+ # TODO(jhaas): make this work with Vista
+ wnds = windowing.FindChildWindows(0, "Chrome_XPFrame")
+ if len(wnds):
+ wnd = wnds[0]
+ proc = None
+ else:
+ # Invoke Chrome
+ (proc, wnd) = windowing.InvokeAndWait(path)
+
+ # Get windows we'll need
+ address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit")
+ render_pane = GetChromeRenderPane(wnd)
+
+ return (wnd, proc, address_bar, render_pane)
+
+
+def Scrape(urls, outdir, size, pos, timeout, kwargs):
+ """Invoke a browser, send it to a series of URLs, and save its output.
+
+ Args:
+ urls: list of URLs to scrape
+ outdir: directory to place output
+ size: size of browser window to use
+ pos: position of browser window
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ None if success, else an error string
+ """
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+ else: path = DEFAULT_PATH
+
+ (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ timedout = False
+
+ for url in urls:
+ # Double-click in the address bar, type the name, and press Enter
+ mouse.ClickInWindow(address_bar)
+ keyboard.TypeString(url, 0.1)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
+ timedout = load_time < 0
+
+ if timedout:
+ break
+
+ # Scrape the page
+ image = windowing.ScrapeWindow(render_pane)
+
+ # Save to disk
+ if "filename" in kwargs:
+ if callable(kwargs["filename"]):
+ filename = kwargs["filename"](url)
+ else:
+ filename = kwargs["filename"]
+ else:
+ filename = windowing.URLtoFilename(url, outdir, ".bmp")
+ image.save(filename)
+
+ if proc:
+ windowing.SetForegroundWindow(wnd)
+
+ # Send Alt-F4, then wait for process to end
+ keyboard.TypeString(r"{\4}", use_modifiers=True)
+ if not windowing.WaitForProcessExit(proc, timeout):
+ windowing.EndProcess(proc)
+ return "crashed"
+
+ if timedout:
+ return "timeout"
+
+ return None
+
+
+def Time(urls, size, timeout, kwargs):
+ """Measure how long it takes to load each of a series of URLs
+
+ Args:
+ urls: list of URLs to time
+ size: size of browser window to use
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ A list of tuples (url, time). "time" can be "crashed" or "timeout"
+ """
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+ else: path = DEFAULT_PATH
+ proc = None
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ ret = []
+ for url in urls:
+ try:
+ # Invoke the browser if necessary
+ if not proc:
+ (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
+
+ # Double-click in the address bar, type the name, and press Enter
+ mouse.ClickInWindow(address_bar)
+ keyboard.TypeString(url, 0.1)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
+
+ timedout = load_time < 0
+
+ if timedout:
+ load_time = "timeout"
+
+ # Send an alt-F4 to make the browser close; if this times out,
+ # we've probably got a crash
+ windowing.SetForegroundWindow(wnd)
+
+ keyboard.TypeString(r"{\4}", use_modifiers=True)
+ if not windowing.WaitForProcessExit(proc, timeout):
+ windowing.EndProcess(proc)
+ load_time = "crashed"
+ proc = None
+ except pywintypes.error:
+ proc = None
+ load_time = "crashed"
+
+ ret.append( (url, load_time) )
+
+ if proc:
+ windowing.SetForegroundWindow(wnd)
+ keyboard.TypeString(r"{\4}", use_modifiers=True)
+ if not windowing.WaitForProcessExit(proc, timeout):
+ windowing.EndProcess(proc)
+
+ return ret
+
+
+if __name__ == "__main__":
+ # We're being invoked rather than imported, so run some tests
+ path = r"c:\sitecompare\scrapes\chrome\0.1.97.0"
+ windowing.PreparePath(path)
+
+ # Scrape three sites and save the results
+ Scrape([
+ "http://www.microsoft.com",
+ "http://www.google.com",
+ "http://www.sun.com"],
+ path, (1024, 768), (0, 0))
diff --git a/tools/site_compare/scrapers/firefox/__init__.py b/tools/site_compare/scrapers/firefox/__init__.py
new file mode 100644
index 0000000..255dc4b
--- /dev/null
+++ b/tools/site_compare/scrapers/firefox/__init__.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Selects the appropriate scraper for Firefox."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+
+def GetScraper(version):
+ """Returns the scraper module for the given version.
+
+ Args:
+ version: version string of IE, or None for most recent
+
+ Returns:
+ scrape module for given version
+ """
+
+ # Pychecker will warn that the parameter is unused; we only
+ # support one version of Firefox at this time
+
+ # We only have one version of the Firefox scraper for now
+ return __import__("firefox2", globals(), locals(), [''])
+
+# if invoked rather than imported, test
+if __name__ == "__main__":
+ version = "2.0.0.6"
+
+ print GetScraper("2.0.0.6").version
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/firefox/firefox2.py b/tools/site_compare/scrapers/firefox/firefox2.py
new file mode 100644
index 0000000..d91534e
--- /dev/null
+++ b/tools/site_compare/scrapers/firefox/firefox2.py
@@ -0,0 +1,269 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does scraping for Firefox 2.0."""
+
+import pywintypes
+import time
+import types
+
+from drivers import keyboard
+from drivers import mouse
+from drivers import windowing
+
+# Default version
+version = "2.0.0.6"
+
+DEFAULT_PATH = r"c:\program files\mozilla firefox\firefox.exe"
+
+# TODO(jhaas): the Firefox scraper is a bit rickety at the moment. Known
+# issues: 1) won't work if the default profile puts toolbars in different
+# locations, 2) uses sleep() statements rather than more robust checks,
+# 3) fails badly if an existing Firefox window is open when the scrape
+# is invoked. This needs to be fortified at some point.
+
+def GetBrowser(path):
+ """Invoke the Firefox browser and return the process and window.
+
+ Args:
+ path: full path to browser
+
+ Returns:
+ A tuple of (process handle, render pane)
+ """
+ if not path: path = DEFAULT_PATH
+
+ # Invoke Firefox
+ (proc, wnd) = windowing.InvokeAndWait(path)
+
+ # Get the content pane
+ render_pane = windowing.FindChildWindow(
+ wnd,
+ "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass")
+
+ return (proc, wnd, render_pane)
+
+
+def InvokeBrowser(path):
+ """Invoke the Firefox browser.
+
+ Args:
+ path: full path to browser
+
+ Returns:
+ A tuple of (main window, process handle, render pane)
+ """
+ # Reuse an existing instance of the browser if we can find one. This
+ # may not work correctly, especially if the window is behind other windows.
+ wnds = windowing.FindChildWindows(0, "MozillaUIWindowClass")
+ if len(wnds):
+ wnd = wnds[0]
+ proc = None
+ else:
+ # Invoke Firefox
+ (proc, wnd) = windowing.InvokeAndWait(path)
+
+ # Get the content pane
+ render_pane = windowing.FindChildWindow(
+ wnd,
+ "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass")
+
+ return (wnd, proc, render_pane)
+
+
+def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
+ """Invoke a browser, send it to a series of URLs, and save its output.
+
+ Args:
+ urls: list of URLs to scrape
+ outdir: directory to place output
+ size: size of browser window to use
+ pos: position of browser window
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ None if success, else an error string
+ """
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+ else: path = DEFAULT_PATH
+
+ (wnd, proc, render_pane) = InvokeBrowser(path)
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)
+
+ time.sleep(3)
+
+ # Firefox is a bit of a pain: it doesn't use standard edit controls,
+ # and it doesn't display a throbber when there's no tab. Let's make
+ # sure there's at least one tab, then select the first one
+
+ mouse.ClickInWindow(wnd)
+ keyboard.TypeString("[t]", True)
+ mouse.ClickInWindow(wnd, (30, 115))
+ time.sleep(2)
+
+ timedout = False
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ for url in urls:
+
+ # Use keyboard shortcuts
+ keyboard.TypeString("{d}", True)
+ keyboard.TypeString(url)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout)
+ timedout = load_time < 0
+
+ if timedout:
+ break
+
+ # Scrape the page
+ image = windowing.ScrapeWindow(render_pane)
+
+ # Save to disk
+ if "filename" in kwargs:
+ if callable(kwargs["filename"]):
+ filename = kwargs["filename"](url)
+ else:
+ filename = kwargs["filename"]
+ else:
+ filename = windowing.URLtoFilename(url, outdir, ".bmp")
+ image.save(filename)
+
+ # Close all the tabs, cheesily
+ mouse.ClickInWindow(wnd)
+
+ while len(windowing.FindChildWindows(0, "MozillaUIWindowClass")):
+ keyboard.TypeString("[w]", True)
+ time.sleep(1)
+
+ if timedout:
+ return "timeout"
+
+
+def Time(urls, size, timeout, **kwargs):
+ """Measure how long it takes to load each of a series of URLs
+
+ Args:
+ urls: list of URLs to time
+ size: size of browser window to use
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ A list of tuples (url, time). "time" can be "crashed" or "timeout"
+ """
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+ else: path = DEFAULT_PATH
+ proc = None
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ ret = []
+ for url in urls:
+ try:
+ # Invoke the browser if necessary
+ if not proc:
+ (wnd, proc, render_pane) = InvokeBrowser(path)
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
+
+ time.sleep(3)
+
+ # Firefox is a bit of a pain: it doesn't use standard edit controls,
+ # and it doesn't display a throbber when there's no tab. Let's make
+ # sure there's at least one tab, then select the first one
+
+ mouse.ClickInWindow(wnd)
+ keyboard.TypeString("[t]", True)
+ mouse.ClickInWindow(wnd, (30, 115))
+ time.sleep(2)
+
+ # Use keyboard shortcuts
+ keyboard.TypeString("{d}", True)
+ keyboard.TypeString(url)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout)
+ timedout = load_time < 0
+
+ if timedout:
+ load_time = "timeout"
+
+ # Try to close the browser; if this fails it's probably a crash
+ mouse.ClickInWindow(wnd)
+
+ count = 0
+ while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass"))
+ and count < 5):
+ keyboard.TypeString("[w]", True)
+ time.sleep(1)
+ count = count + 1
+
+ if len(windowing.FindChildWindows(0, "MozillaUIWindowClass")):
+ windowing.EndProcess(proc)
+ load_time = "crashed"
+
+ proc = None
+ except pywintypes.error:
+ proc = None
+ load_time = "crashed"
+
+ ret.append( (url, load_time) )
+
+ if proc:
+ count = 0
+ while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass"))
+ and count < 5):
+ keyboard.TypeString("[w]", True)
+ time.sleep(1)
+ count = count + 1
+ return ret
+
+
+if __name__ == "__main__":
+ # We're being invoked rather than imported, so run some tests
+ path = r"c:\sitecompare\scrapes\Firefox\2.0.0.6"
+ windowing.PreparePath(path)
+
+ # Scrape three sites and save the results
+ Scrape(
+ ["http://www.microsoft.com", "http://www.google.com",
+ "http://www.sun.com"],
+ path, (1024, 768), (0, 0))
diff --git a/tools/site_compare/scrapers/ie/__init__.py b/tools/site_compare/scrapers/ie/__init__.py
new file mode 100644
index 0000000..4b8949b
--- /dev/null
+++ b/tools/site_compare/scrapers/ie/__init__.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Selects the appropriate scraper for Internet Explorer."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
+
+
+def GetScraper(version):
+ """Returns the scraper module for the given version.
+
+ Args:
+ version: version string of IE, or None for most recent
+
+ Returns:
+ scrape module for given version
+ """
+
+ # Pychecker will warn that the parameter is unused; we only
+ # support one version of IE at this time
+
+ # We only have one version of the IE scraper for now
+ return __import__("ie7", globals(), locals(), [''])
+
+# if invoked rather than imported, test
+if __name__ == "__main__":
+ version = "7.0.5370.1"
+
+ print GetScraper(version).version
+ \ No newline at end of file
diff --git a/tools/site_compare/scrapers/ie/ie7.py b/tools/site_compare/scrapers/ie/ie7.py
new file mode 100644
index 0000000..a0475e0
--- /dev/null
+++ b/tools/site_compare/scrapers/ie/ie7.py
@@ -0,0 +1,230 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does scraping for all known versions of IE."""
+
+import pywintypes
+import time
+import types
+
+from drivers import keyboard
+from drivers import mouse
+from drivers import windowing
+
+# Default version
+version = "7.0.5730.1"
+
+DEFAULT_PATH = r"c:\program files\internet explorer\iexplore.exe"
+
+def GetBrowser(path):
+ """Invoke the IE browser and return the process, frame, and content window.
+
+ Args:
+ path: full path to browser
+
+ Returns:
+ A tuple of (process handle, render pane)
+ """
+ if not path: path = DEFAULT_PATH
+
+ (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
+ return (ieproc, iewnd, render_pane)
+
+
+def InvokeBrowser(path):
+ """Invoke the IE browser.
+
+ Args:
+ path: full path to browser
+
+ Returns:
+ A tuple of (main window, process handle, address bar,
+ render_pane, tab_window)
+ """
+ # Invoke IE
+ (ieproc, iewnd) = windowing.InvokeAndWait(path)
+
+ # Get windows we'll need
+ for tries in xrange(10):
+ try:
+ address_bar = windowing.FindChildWindow(
+ iewnd, "WorkerW|Navigation Bar/ReBarWindow32/"
+ "Address Band Root/ComboBoxEx32/ComboBox/Edit")
+ render_pane = windowing.FindChildWindow(
+ iewnd, "TabWindowClass/Shell DocObject View")
+ tab_window = windowing.FindChildWindow(
+ iewnd, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND")
+ except IndexError:
+ time.sleep(1)
+ continue
+ break
+
+ return (iewnd, ieproc, address_bar, render_pane, tab_window)
+
+
+def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
+ """Invoke a browser, send it to a series of URLs, and save its output.
+
+ Args:
+ urls: list of URLs to scrape
+ outdir: directory to place output
+ size: size of browser window to use
+ pos: position of browser window
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ None if success, else an error string
+ """
+ path = r"c:\program files\internet explorer\iexplore.exe"
+
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+
+ (iewnd, ieproc, address_bar, render_pane, tab_window) = (
+ InvokeBrowser(path) )
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane)
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ timedout = False
+
+ for url in urls:
+
+ # Double-click in the address bar, type the name, and press Enter
+ mouse.DoubleClickInWindow(address_bar)
+ keyboard.TypeString(url)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(
+ tab_window, (6, 8, 22, 24), timeout)
+ timedout = load_time < 0
+
+ if timedout:
+ break
+
+ # Scrape the page
+ image = windowing.ScrapeWindow(render_pane)
+
+ # Save to disk
+ if "filename" in kwargs:
+ if callable(kwargs["filename"]):
+ filename = kwargs["filename"](url)
+ else:
+ filename = kwargs["filename"]
+ else:
+ filename = windowing.URLtoFilename(url, outdir, ".bmp")
+ image.save(filename)
+
+ windowing.EndProcess(ieproc)
+
+ if timedout:
+ return "timeout"
+
+
+def Time(urls, size, timeout, **kwargs):
+ """Measure how long it takes to load each of a series of URLs
+
+ Args:
+ urls: list of URLs to time
+ size: size of browser window to use
+ timeout: amount of time to wait for page to load
+ kwargs: miscellaneous keyword args
+
+ Returns:
+ A list of tuples (url, time). "time" can be "crashed" or "timeout"
+ """
+ if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
+ else: path = DEFAULT_PATH
+ proc = None
+
+ # Visit each URL we're given
+ if type(urls) in types.StringTypes: urls = [urls]
+
+ ret = []
+ for url in urls:
+ try:
+ # Invoke the browser if necessary
+ if not proc:
+ (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
+
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
+
+ # Double-click in the address bar, type the name, and press Enter
+ mouse.DoubleClickInWindow(address_bar)
+ keyboard.TypeString(url)
+ keyboard.TypeString("\n")
+
+ # Wait for the page to finish loading
+ load_time = windowing.WaitForThrobber(
+ tab_window, (6, 8, 22, 24), timeout)
+ timedout = load_time < 0
+
+ if timedout:
+ load_time = "timeout"
+
+ # Send an alt-F4 to make the browser close; if this times out,
+ # we've probably got a crash
+ keyboard.TypeString(r"{\4}", use_modifiers=True)
+ if not windowing.WaitForProcessExit(proc, timeout):
+ windowing.EndProcess(proc)
+ load_time = "crashed"
+ proc = None
+ except pywintypes.error:
+ load_time = "crashed"
+ proc = None
+
+ ret.append( (url, load_time) )
+
+ # Send an alt-F4 to make the browser close; if this times out,
+ # we've probably got a crash
+ if proc:
+ keyboard.TypeString(r"{\4}", use_modifiers=True)
+ if not windowing.WaitForProcessExit(proc, timeout):
+ windowing.EndProcess(proc)
+
+ return ret
+
+
+if __name__ == "__main__":
+ # We're being invoked rather than imported, so run some tests
+ path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11"
+ windowing.PreparePath(path)
+
+ # Scrape three sites and save the results
+ Scrape(
+ ["http://www.microsoft.com",
+ "http://www.google.com",
+ "http://www.sun.com"],
+ path, (1024, 768), (0, 0))
diff --git a/tools/site_compare/site_compare.py b/tools/site_compare/site_compare.py
new file mode 100644
index 0000000..8acfdcf
--- /dev/null
+++ b/tools/site_compare/site_compare.py
@@ -0,0 +1,202 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""SiteCompare component to handle bulk scrapes.
+
+Invokes a list of browsers and sends them to a list of URLs,
+saving the rendered results to a specified directory, then
+performs comparison operations on the resulting bitmaps and
+saves the results
+"""
+
+
+# This line is necessary to work around a QEMU bug
+import _imaging
+
+import os # Functions for walking the directory tree
+import types # Runtime type-checking
+
+import command_line # command-line parsing
+import drivers # Functions for driving keyboard/mouse/windows, OS-specific
+import operators # Functions that, given two bitmaps as input, produce
+ # output depending on the performance of an operation
+import scrapers # Functions that know how to capture a render from
+ # particular browsers
+
+import commands.compare2 # compare one page in two versions of same browser
+import commands.maskmaker # generate a mask based on repeated scrapes
+import commands.measure # measure length of time a page takes to load
+import commands.scrape # scrape a URL or series of URLs to a bitmap
+
+# The timeload command is obsolete (too flaky); it may be reinstated
+# later but for now it's been superceded by "measure"
+# import commands.timeload # measure length of time a page takes to load
+
+def Scrape(browsers, urls, window_size=(1024, 768),
+ window_pos=(0, 0), timeout=20, save_path=None, **kwargs):
+ """Invoke one or more browsers over one or more URLs, scraping renders.
+
+ Args:
+ browsers: browsers to invoke with optional version strings
+ urls: URLs to visit
+ window_size: size of the browser window to display
+ window_pos: location of browser window
+ timeout: time (in seconds) to wait for page to load
+ save_path: root of save path, automatically appended with browser and
+ version
+ kwargs: miscellaneous keyword args, passed to scraper
+ Returns:
+ None
+
+ @TODO(jhaas): more parameters, or perhaps an indefinite dictionary
+ parameter, for things like length of time to wait for timeout, speed
+ of mouse clicks, etc. Possibly on a per-browser, per-URL, or
+ per-browser-per-URL basis
+ """
+
+ if type(browsers) in types.StringTypes: browsers = [browsers]
+
+ if save_path is None:
+ # default save path is "scrapes" off the current root
+ save_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
+
+ for browser in browsers:
+ # Browsers should be tuples of (browser, version)
+ if type(browser) in types.StringTypes: browser = (browser, None)
+ scraper = scrapers.GetScraper(browser)
+
+ full_path = os.path.join(save_path, browser[0], scraper.version)
+ drivers.windowing.PreparePath(full_path)
+
+ scraper.Scrape(urls, full_path, window_size, window_pos, timeout, kwargs)
+
+
+def Compare(base, compare, ops, root_path=None, out_path=None):
+ """Compares a series of scrapes using a series of operators.
+
+ Args:
+ base: (browser, version) tuple of version to consider the baseline
+ compare: (browser, version) tuple of version to compare to
+ ops: list of operators plus operator arguments
+ root_path: root of the scrapes
+ out_path: place to put any output from the operators
+
+ Returns:
+ None
+
+ @TODO(jhaas): this method will likely change, to provide a robust and
+ well-defined way of chaining operators, applying operators conditionally,
+ and full-featured scripting of the operator chain. There also needs
+ to be better definition of the output; right now it's to stdout and
+ a log.txt file, with operator-dependent images saved for error output
+ """
+ if root_path is None:
+ # default save path is "scrapes" off the current root
+ root_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
+
+ if out_path is None:
+ out_path = os.path.join(os.path.split(__file__)[0], "Compares")
+
+ if type(base) in types.StringTypes: base = (base, None)
+ if type(compare) in types.StringTypes: compare = (compare, None)
+ if type(ops) in types.StringTypes: ops = [ops]
+
+ base_dir = os.path.join(root_path, base[0])
+ compare_dir = os.path.join(root_path, compare[0])
+
+ if base[1] is None:
+ # base defaults to earliest capture
+ base = (base[0], max(os.listdir(base_dir)))
+
+ if compare[1] is None:
+ # compare defaults to latest capture
+ compare = (compare[0], min(os.listdir(compare_dir)))
+
+ out_path = os.path.join(out_path, base[0], base[1], compare[0], compare[1])
+ drivers.windowing.PreparePath(out_path)
+
+ # TODO(jhaas): right now we're just dumping output to a log file
+ # (and the console), which works as far as it goes but isn't nearly
+ # robust enough. Change this after deciding exactly what we want to
+ # change it to.
+ out_file = open(os.path.join(out_path, "log.txt"), "w")
+ description_string = ("Comparing %s %s to %s %s" %
+ (base[0], base[1], compare[0], compare[1]))
+ out_file.write(description_string)
+ print description_string
+
+ base_dir = os.path.join(base_dir, base[1])
+ compare_dir = os.path.join(compare_dir, compare[1])
+
+ for filename in os.listdir(base_dir):
+ out_file.write("%s: " % filename)
+
+ if not os.path.isfile(os.path.join(compare_dir, filename)):
+ out_file.write("Does not exist in target directory\n")
+ print "File %s does not exist in target directory" % filename
+ continue
+
+ base_filename = os.path.join(base_dir, filename)
+ compare_filename = os.path.join(compare_dir, filename)
+
+ for op in ops:
+ if type(op) in types.StringTypes: op = (op, None)
+
+ module = operators.GetOperator(op[0])
+
+ ret = module.Compare(base_filename, compare_filename)
+ if ret is None:
+ print "%s: OK" % (filename,)
+ out_file.write("OK\n")
+ else:
+ print "%s: %s" % (filename, ret[0])
+ out_file.write("%s\n" % (ret[0]))
+ ret[1].save(os.path.join(out_path, filename))
+
+ out_file.close()
+
+
+def main():
+ """Main executable. Parse the command line and invoke the command."""
+ cmdline = command_line.CommandLine()
+
+ # The below two commands are currently unstable so have been disabled
+ # commands.compare2.CreateCommand(cmdline)
+ # commands.maskmaker.CreateCommand(cmdline)
+ commands.measure.CreateCommand(cmdline)
+ commands.scrape.CreateCommand(cmdline)
+
+ cmdline.ParseCommandLine()
+
+
+
+if __name__ == "__main__":
+ main()
+ \ No newline at end of file
diff --git a/tools/site_compare/utils/__init__.py b/tools/site_compare/utils/__init__.py
new file mode 100644
index 0000000..69f2237
--- /dev/null
+++ b/tools/site_compare/utils/__init__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2007 Google Inc. All Rights Reserved.
+
+"""Utilities for site_compare."""
+
+__author__ = 'jhaas@google.com (Jonathan Haas)'
diff --git a/tools/site_compare/utils/browser_iterate.py b/tools/site_compare/utils/browser_iterate.py
new file mode 100644
index 0000000..65ba24f
--- /dev/null
+++ b/tools/site_compare/utils/browser_iterate.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Utility to use a browser to visit multiple URLs.
+
+Prerequisites:
+ 1. The command_line package from tools/site_compare
+ 2. Either the IE BHO or Firefox extension (or both)
+
+Installation:
+ 1. Build the IE BHO, or call regsvr32 on a prebuilt binary
+ 2. Add a file called "measurepageloadtimeextension@google.com" to
+ the default Firefox profile directory under extensions, containing
+ the path to the Firefox extension root
+
+Invoke with the command line arguments as documented within
+the command line.
+"""
+
+import command_line
+import scrapers
+import socket
+import time
+
+from drivers import windowing
+
+# Constants
+MAX_URL = 1024
+PORT = 42492
+
+def SetupIterationCommandLine(cmd):
+ """Adds the necessary flags for iteration to a command.
+
+ Args:
+ cmd: an object created by cmdline.AddCommand
+ """
+ cmd.AddArgument(
+ ["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
+ type="string", required=True)
+ cmd.AddArgument(
+ ["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
+ cmd.AddArgument(
+ ["-p", "--browserpath"], "Path to browser.",
+ type="string", required=False)
+ cmd.AddArgument(
+ ["-u", "--url"], "URL to visit")
+ cmd.AddArgument(
+ ["-l", "--list"], "File containing list of URLs to visit", type="readfile")
+ cmd.AddMutualExclusion(["--url", "--list"])
+ cmd.AddArgument(
+ ["-s", "--startline"], "First line of URL list", type="int")
+ cmd.AddArgument(
+ ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+ cmd.AddArgument(
+ ["-c", "--count"], "Number of lines of URL file to use", type="int")
+ cmd.AddDependency("--startline", "--list")
+ cmd.AddRequiredGroup(["--url", "--list"])
+ cmd.AddDependency("--endline", "--list")
+ cmd.AddDependency("--count", "--list")
+ cmd.AddMutualExclusion(["--count", "--endline"])
+ cmd.AddDependency("--count", "--startline")
+ cmd.AddArgument(
+ ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+ "finish loading",
+ type="int", default=300)
+ cmd.AddArgument(
+ ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+
+
+def Iterate(command, iteration_func):
+ """Iterates over a list of URLs, calling a function on each.
+
+ Args:
+ command: the command line containing the iteration flags
+ iteration_func: called for each URL with (proc, wnd, url, result)
+ """
+
+ # Retrieve the browser scraper to use to invoke the browser
+ scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
+
+ def AttachToBrowser(path, timeout):
+ """Invoke the browser process and connect to the socket."""
+ (proc, frame, wnd) = scraper.GetBrowser(path)
+
+ if not wnd: raise ValueError("Could not invoke browser.")
+
+ # Try to connect the socket. If it fails, wait and try
+ # again. Do this for ten seconds
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
+
+ for attempt in xrange(10):
+ try:
+ s.connect(("localhost", PORT))
+ except socket.error:
+ time.sleep(1)
+ continue
+ break
+
+ try:
+ s.getpeername()
+ except socket.error:
+ raise ValueError("Could not connect to browser")
+
+ if command["--size"]:
+ # Resize and reposition the frame
+ windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
+
+ s.settimeout(timeout)
+
+ Iterate.proc = proc
+ Iterate.wnd = wnd
+ Iterate.s = s
+
+ def DetachFromBrowser():
+ """Close the socket and kill the process if necessary."""
+ if Iterate.s:
+ Iterate.s.close()
+ Iterate.s = None
+
+ if Iterate.proc:
+ if not windowing.WaitForProcessExit(Iterate.proc, 0):
+ try:
+ windowing.EndProcess(Iterate.proc)
+ windowing.WaitForProcessExit(Iterate.proc, 0)
+ except pywintypes.error:
+ # Exception here most likely means the process died on its own
+ pass
+ Iterate.proc = None
+
+ if command["--browserpath"]:
+ browser = command["--browserpath"]
+ else:
+ browser = None
+
+ # Read the URLs from the file
+ if command["--url"]:
+ url_list = [command["--url"]]
+ else:
+ startline = command["--startline"]
+ if command["--count"]:
+ endline = startline+command["--count"]
+ else:
+ endline = command["--endline"]
+
+ url_list = []
+ file = open(command["--list"], "r")
+
+ for line in xrange(startline-1):
+ file.readline()
+
+ for line in xrange(endline-startline):
+ url_list.append(file.readline().strip())
+
+ timeout = command["--timeout"]
+
+ # Loop through the URLs and send them through the socket
+ Iterate.s = None
+ Iterate.proc = None
+ Iterate.wnd = None
+
+ for url in url_list:
+ # Invoke the browser if necessary
+ if not Iterate.proc:
+ AttachToBrowser(browser, timeout)
+ # Send the URL and wait for a response
+ Iterate.s.send(url + "\n")
+
+ response = ""
+
+ while (response.find("\n") < 0):
+
+ try:
+ recv = Iterate.s.recv(MAX_URL)
+ response = response + recv
+
+ # Workaround for an oddity: when Firefox closes
+ # gracefully, somehow Python doesn't detect it.
+ # (Telnet does)
+ if not recv:
+ raise socket.error
+
+ except socket.timeout:
+ response = url + ",hang\n"
+ DetachFromBrowser()
+ except socket.error:
+ # If there was a socket error, it's probably a crash
+ response = url + ",crash\n"
+ DetachFromBrowser()
+
+ # If we received a timeout response, restart the browser
+ if response[-9:] == ",timeout\n":
+ DetachFromBrowser()
+
+ # Invoke the iteration function
+ iteration_func(url, Iterate.proc, Iterate.wnd, response)
+
+ # We're done
+ DetachFromBrowser()