diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
commit | 920c091ac3ee15079194c82ae8a7a18215f3f23c (patch) | |
tree | d28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/site_compare | |
parent | ae2c20f398933a9e86c387dcc465ec0f71065ffc (diff) | |
download | chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2 |
Add tools to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/site_compare')
27 files changed, 4118 insertions, 0 deletions
diff --git a/tools/site_compare/command_line.py b/tools/site_compare/command_line.py new file mode 100644 index 0000000..b99a1c9 --- /dev/null +++ b/tools/site_compare/command_line.py @@ -0,0 +1,823 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Parse a command line, retrieving a command and its arguments. + +Supports the concept of command line commands, each with its own set +of arguments. Supports dependent arguments and mutually exclusive arguments. +Basically, a better optparse. I took heed of epg's WHINE() in gvn.cmdline +and dumped optparse in favor of something better. +""" + +import os.path +import re +import string +import sys +import textwrap +import types + + +def IsString(var): + """Little helper function to see if a variable is a string.""" + return type(var) in types.StringTypes + + +class ParseError(Exception): + """Encapsulates errors from parsing, string arg is description.""" + pass + + +class Command(object): + """Implements a single command.""" + + def __init__(self, names, helptext, validator=None, impl=None): + """Initializes Command from names and helptext, plus optional callables. + + Args: + names: command name, or list of synonyms + helptext: brief string description of the command + validator: callable for custom argument validation + Should raise ParseError if it wants + impl: callable to be invoked when command is called + """ + self.names = names + self.validator = validator + self.helptext = helptext + self.impl = impl + self.args = [] + self.required_groups = [] + self.arg_dict = {} + self.positional_args = [] + self.cmdline = None + + class Argument(object): + """Encapsulates an argument to a command.""" + VALID_TYPES = ['string', 'readfile', 'int', 'flag', 'coords'] + TYPES_WITH_VALUES = ['string', 'readfile', 'int', 'coords'] + + def __init__(self, names, helptext, type, metaname, + required, default, positional): + """Command-line argument to a command. + + Args: + names: argument name, or list of synonyms + helptext: brief description of the argument + type: type of the argument. Valid values include: + string - a string + readfile - a file which must exist and be available + for reading + int - an integer + flag - an optional flag (bool) + coords - (x,y) where x and y are ints + metaname: Name to display for value in help, inferred if not + specified + required: True if argument must be specified + default: Default value if not specified + positional: Argument specified by location, not name + + Raises: + ValueError: the argument name is invalid for some reason + """ + if type not in Command.Argument.VALID_TYPES: + raise ValueError("Invalid type: %r" % type) + + if required and default is not None: + raise ValueError("required and default are mutually exclusive") + + if required and type == 'flag': + raise ValueError("A required flag? Give me a break.") + + if metaname and type not in Command.Argument.TYPES_WITH_VALUES: + raise ValueError("Type %r can't have a metaname" % type) + + # If no metaname is provided, infer it: use the alphabetical characters + # of the last provided name + if not metaname and type in Command.Argument.TYPES_WITH_VALUES: + metaname = ( + names[-1].lstrip(string.punctuation + string.whitespace).upper()) + + self.names = names + self.helptext = helptext + self.type = type + self.required = required + self.default = default + self.positional = positional + self.metaname = metaname + + self.mutex = [] # arguments that are mutually exclusive with + # this one + self.depends = [] # arguments that must be present for this + # one to be valid + self.present = False # has this argument been specified? + + def AddDependency(self, arg): + """Makes this argument dependent on another argument. + + Args: + arg: name of the argument this one depends on + """ + if arg not in self.depends: + self.depends.append(arg) + + def AddMutualExclusion(self, arg): + """Makes this argument invalid if another is specified. + + Args: + arg: name of the mutually exclusive argument. + """ + if arg not in self.mutex: + self.mutex.append(arg) + + def GetUsageString(self): + """Returns a brief string describing the argument's usage.""" + if not self.positional: + string = self.names[0] + if self.type in Command.Argument.TYPES_WITH_VALUES: + string += "="+self.metaname + else: + string = self.metaname + + if not self.required: + string = "["+string+"]" + + return string + + def GetNames(self): + """Returns a string containing a list of the arg's names.""" + if self.positional: + return self.metaname + else: + return ", ".join(self.names) + + def GetHelpString(self, width=80, indent=5, names_width=20, gutter=2): + """Returns a help string including help for all the arguments.""" + names = [" "*indent + line +" "*(names_width-len(line)) for line in + textwrap.wrap(self.GetNames(), names_width)] + + helpstring = textwrap.wrap(self.helptext, width-indent-names_width-gutter) + + if len(names) < len(helpstring): + names += [" "*(indent+names_width)]*(len(helpstring)-len(names)) + + if len(helpstring) < len(names): + helpstring += [""]*(len(names)-len(helpstring)) + + return "\n".join([name_line + " "*gutter + help_line for + name_line, help_line in zip(names, helpstring)]) + + def __repr__(self): + if self.present: + string = '= %r' % self.value + else: + string = "(absent)" + + return "Argument %s '%s'%s" % (self.type, self.names[0], string) + + # end of nested class Argument + + def AddArgument(self, names, helptext, type="string", metaname=None, + required=False, default=None, positional=False): + """Command-line argument to a command. + + Args: + names: argument name, or list of synonyms + helptext: brief description of the argument + type: type of the argument + metaname: Name to display for value in help, inferred if not + required: True if argument must be specified + default: Default value if not specified + positional: Argument specified by location, not name + + Raises: + ValueError: the argument already exists or is invalid + + Returns: + The newly-created argument + """ + if IsString(names): names = [names] + + names = [name.lower() for name in names] + + for name in names: + if name in self.arg_dict: + raise ValueError("%s is already an argument"%name) + + if (positional and required and + [arg for arg in self.args if arg.positional] and + not [arg for arg in self.args if arg.positional][-1].required): + raise ValueError( + "A required positional argument may not follow an optional one.") + + arg = Command.Argument(names, helptext, type, metaname, + required, default, positional) + + self.args.append(arg) + + for name in names: + self.arg_dict[name] = arg + + return arg + + def GetArgument(self, name): + """Return an argument from a name.""" + return self.arg_dict[name.lower()] + + def AddMutualExclusion(self, args): + """Specifies that a list of arguments are mutually exclusive.""" + if len(args) < 2: + raise ValueError("At least two arguments must be specified.") + + args = [arg.lower() for arg in args] + + for index in xrange(len(args)-1): + for index2 in xrange(index+1, len(args)): + self.arg_dict[args[index]].AddMutualExclusion(self.arg_dict[args[index2]]) + + def AddDependency(self, dependent, depends_on): + """Specifies that one argument may only be present if another is. + + Args: + dependent: the name of the dependent argument + depends_on: the name of the argument on which it depends + """ + self.arg_dict[dependent.lower()].AddDependency( + self.arg_dict[depends_on.lower()]) + + def AddMutualDependency(self, args): + """Specifies that a list of arguments are all mutually dependent.""" + if len(args) < 2: + raise ValueError("At least two arguments must be specified.") + + args = [arg.lower() for arg in args] + + for (arg1, arg2) in [(arg1, arg2) for arg1 in args for arg2 in args]: + if arg1 == arg2: continue + self.arg_dict[arg1].AddDependency(self.arg_dict[arg2]) + + def AddRequiredGroup(self, args): + """Specifies that at least one of the named arguments must be present.""" + if len(args) < 2: + raise ValueError("At least two arguments must be in a required group.") + + args = [self.arg_dict[arg.lower()] for arg in args] + + self.required_groups.append(args) + + def ParseArguments(self): + """Given a command line, parse and validate the arguments.""" + + # reset all the arguments before we parse + for arg in self.args: + arg.present = False + arg.value = None + + self.parse_errors = [] + + # look for arguments remaining on the command line + while len(self.cmdline.rargs): + try: + self.ParseNextArgument() + except ParseError, e: + self.parse_errors.append(e.args[0]) + + # after all the arguments are parsed, check for problems + for arg in self.args: + if not arg.present and arg.required: + self.parse_errors.append("'%s': required parameter was missing" + % arg.names[0]) + + if not arg.present and arg.default: + arg.present = True + arg.value = arg.default + + if arg.present: + for mutex in arg.mutex: + if mutex.present: + self.parse_errors.append( + "'%s', '%s': arguments are mutually exclusive" % + (arg.argstr, mutex.argstr)) + + for depend in arg.depends: + if not depend.present: + self.parse_errors.append("'%s': '%s' must be specified as well" % + (arg.argstr, depend.names[0])) + + # check for required groups + for group in self.required_groups: + if not [arg for arg in group if arg.present]: + self.parse_errors.append("%s: at least one must be present" % + (", ".join(["'%s'" % arg.names[-1] for arg in group]))) + + # if we have any validators, invoke them + if not self.parse_errors and self.validator: + try: + self.validator(self) + except ParseError, e: + self.parse_errors.append(e.args[0]) + + # Helper methods so you can treat the command like a dict + def __getitem__(self, key): + arg = self.arg_dict[key.lower()] + + if arg.type == 'flag': + return arg.present + else: + return arg.value + + def __iter__(self): + return [arg for arg in self.args if arg.present].__iter__() + + def ArgumentPresent(self, key): + """Tests if an argument exists and has been specified.""" + return key.lower() in self.arg_dict and self.arg_dict[key.lower()].present + + def __contains__(self, key): + return self.ArgumentPresent(key) + + def ParseNextArgument(self): + """Find the next argument in the command line and parse it.""" + arg = None + value = None + argstr = self.cmdline.rargs.pop(0) + + # First check: is this a literal argument? + if argstr.lower() in self.arg_dict: + arg = self.arg_dict[argstr.lower()] + if arg.type in Command.Argument.TYPES_WITH_VALUES: + if len(self.cmdline.rargs): + value = self.cmdline.rargs.pop(0) + + # Second check: is this of the form "arg=val" or "arg:val"? + if arg is None: + delimiter_pos = -1 + + for delimiter in [':', '=']: + pos = argstr.find(delimiter) + if pos >= 0: + if delimiter_pos < 0 or pos < delimiter_pos: + delimiter_pos = pos + + if delimiter_pos >= 0: + testarg = argstr[:delimiter_pos] + testval = argstr[delimiter_pos+1:] + + if testarg.lower() in self.arg_dict: + arg = self.arg_dict[testarg.lower()] + argstr = testarg + value = testval + + # Third check: does this begin an argument? + if arg is None: + for key in self.arg_dict.iterkeys(): + if (len(key) < len(argstr) and + self.arg_dict[key].type in Command.Argument.TYPES_WITH_VALUES and + argstr[:len(key)].lower() == key): + value = argstr[len(key):] + argstr = argstr[:len(key)] + arg = self.arg_dict[argstr] + + # Fourth check: do we have any positional arguments available? + if arg is None: + for positional_arg in [ + testarg for testarg in self.args if testarg.positional]: + if not positional_arg.present: + arg = positional_arg + value = argstr + argstr = positional_arg.names[0] + break + + # Push the retrieved argument/value onto the largs stack + if argstr: self.cmdline.largs.append(argstr) + if value: self.cmdline.largs.append(value) + + # If we've made it this far and haven't found an arg, give up + if arg is None: + raise ParseError("Unknown argument: '%s'" % argstr) + + # Convert the value, if necessary + if arg.type in Command.Argument.TYPES_WITH_VALUES and value is None: + raise ParseError("Argument '%s' requires a value" % argstr) + + if value is not None: + value = self.StringToValue(value, arg.type, argstr) + + arg.argstr = argstr + arg.value = value + arg.present = True + + # end method ParseNextArgument + + def StringToValue(self, value, type, argstr): + """Convert a string from the command line to a value type.""" + try: + if type == 'string': + pass # leave it be + + elif type == 'int': + try: + value = int(value) + except ValueError: + raise ParseError + + elif type == 'readfile': + if not os.path.isfile(value): + raise ParseError("'%s': '%s' does not exist" % (argstr, value)) + + elif type == 'coords': + try: + value = [int(val) for val in + re.match("\(\s*(\d+)\s*\,\s*(\d+)\s*\)\s*\Z", value). + groups()] + except AttributeError: + raise ParseError + + else: + raise ValueError("Unknown type: '%s'" % type) + + except ParseError, e: + # The bare exception is raised in the generic case; more specific errors + # will arrive with arguments and should just be reraised + if not e.args: + e = ParseError("'%s': unable to convert '%s' to type '%s'" % + (argstr, value, type)) + raise e + + return value + + def SortArgs(self): + """Returns a method that can be passed to sort() to sort arguments.""" + + def ArgSorter(arg1, arg2): + """Helper for sorting arguments in the usage string. + + Positional arguments come first, then required arguments, + then optional arguments. Pylint demands this trivial function + have both Args: and Returns: sections, sigh. + + Args: + arg1: the first argument to compare + arg2: the second argument to compare + + Returns: + -1 if arg1 should be sorted first, +1 if it should be sorted second, + and 0 if arg1 and arg2 have the same sort level. + """ + return ((arg2.positional-arg1.positional)*2 + + (arg2.required-arg1.required)) + return ArgSorter + + def GetUsageString(self, width=80, name=None): + """Gets a string describing how the command is used.""" + if name is None: name = self.names[0] + + initial_indent = "Usage: %s %s " % (self.cmdline.prog, name) + subsequent_indent = " " * len(initial_indent) + + sorted_args = self.args[:] + sorted_args.sort(self.SortArgs()) + + return textwrap.fill( + " ".join([arg.GetUsageString() for arg in sorted_args]), width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent) + + def GetHelpString(self, width=80): + """Returns a list of help strings for all this command's arguments.""" + sorted_args = self.args[:] + sorted_args.sort(self.SortArgs()) + + return "\n".join([arg.GetHelpString(width) for arg in sorted_args]) + + # end class Command + + +class CommandLine(object): + """Parse a command line, extracting a command and its arguments.""" + + def __init__(self): + self.commands = [] + self.cmd_dict = {} + + # Add the help command to the parser + help_cmd = self.AddCommand(["help", "--help", "-?", "-h"], + "Displays help text for a command", + ValidateHelpCommand, + DoHelpCommand) + + help_cmd.AddArgument( + "command", "Command to retrieve help for", positional=True) + help_cmd.AddArgument( + "--width", "Width of the output", type='int', default=80) + + self.Exit = sys.exit # override this if you don't want the script to halt + # on error or on display of help + + self.out = sys.stdout # override these if you want to redirect + self.err = sys.stderr # output or error messages + + def AddCommand(self, names, helptext, validator=None, impl=None): + """Add a new command to the parser. + + Args: + names: command name, or list of synonyms + helptext: brief string description of the command + validator: method to validate a command's arguments + impl: callable to be invoked when command is called + + Raises: + ValueError: raised if command already added + + Returns: + The new command + """ + if IsString(names): names = [names] + + for name in names: + if name in self.cmd_dict: + raise ValueError("%s is already a command"%name) + + cmd = Command(names, helptext, validator, impl) + cmd.cmdline = self + + self.commands.append(cmd) + for name in names: + self.cmd_dict[name.lower()] = cmd + + return cmd + + def GetUsageString(self): + """Returns simple usage instructions.""" + return "Type '%s help' for usage." % self.prog + + def ParseCommandLine(self, argv=None, prog=None, execute=True): + """Does the work of parsing a command line. + + Args: + argv: list of arguments, defaults to sys.args[1:] + prog: name of the command, defaults to the base name of the script + execute: if false, just parse, don't invoke the 'impl' member + + Returns: + The command that was executed + """ + if argv is None: argv = sys.argv[1:] + if prog is None: prog = os.path.basename(sys.argv[0]).split('.')[0] + + # Store off our parameters, we may need them someday + self.argv = argv + self.prog = prog + + # We shouldn't be invoked without arguments, that's just lame + if not len(argv): + self.out.writelines(self.GetUsageString()) + self.Exit() + return None # in case the client overrides Exit + + # Is it a valid command? + self.command_string = argv[0].lower() + if not self.command_string in self.cmd_dict: + self.err.write("Unknown command: '%s'\n\n" % self.command_string) + self.out.write(self.GetUsageString()) + self.Exit() + return None # in case the client overrides Exit + + self.command = self.cmd_dict[self.command_string] + + # "rargs" = remaining (unparsed) arguments + # "largs" = already parsed, "left" of the read head + self.rargs = argv[1:] + self.largs = [] + + # let the command object do the parsing + self.command.ParseArguments() + + if self.command.parse_errors: + # there were errors, output the usage string and exit + self.err.write(self.command.GetUsageString()+"\n\n") + self.err.write("\n".join(self.command.parse_errors)) + self.err.write("\n\n") + + self.Exit() + + elif execute and self.command.impl: + self.command.impl(self.command) + + return self.command + + def __getitem__(self, key): + return self.cmd_dict[key] + + def __iter__(self): + return self.cmd_dict.__iter__() + + +def ValidateHelpCommand(command): + """Checks to make sure an argument to 'help' is a valid command.""" + if 'command' in command and command['command'] not in command.cmdline: + raise ParseError("'%s': unknown command" % command['command']) + + +def DoHelpCommand(command): + """Executed when the command is 'help'.""" + out = command.cmdline.out + width = command['--width'] + + if 'command' not in command: + out.write(command.GetUsageString()) + out.write("\n\n") + + indent = 5 + gutter = 2 + + command_width = ( + max([len(cmd.names[0]) for cmd in command.cmdline.commands]) + gutter) + + for cmd in command.cmdline.commands: + cmd_name = cmd.names[0] + + initial_indent = (" "*indent + cmd_name + " "* + (command_width+gutter-len(cmd_name))) + subsequent_indent = " "*(indent+command_width+gutter) + + out.write(textwrap.fill(cmd.helptext, width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent)) + out.write("\n") + + out.write("\n") + + else: + help_cmd = command.cmdline[command['command']] + + out.write(textwrap.fill(help_cmd.helptext, width)) + out.write("\n\n") + out.write(help_cmd.GetUsageString(width=width)) + out.write("\n\n") + out.write(help_cmd.GetHelpString(width=width)) + out.write("\n") + + command.cmdline.Exit() + +if __name__ == "__main__": + # If we're invoked rather than imported, run some tests + cmdline = CommandLine() + + # Since we're testing, override Exit() + def TestExit(): + pass + cmdline.Exit = TestExit + + # Actually, while we're at it, let's override error output too + cmdline.err = open(os.path.devnull, "w") + + test = cmdline.AddCommand(["test", "testa", "testb"], "test command") + test.AddArgument(["-i", "--int", "--integer", "--optint", "--optionalint"], + "optional integer parameter", type='int') + test.AddArgument("--reqint", "required integer parameter", type='int', + required=True) + test.AddArgument("pos1", "required positional argument", positional=True, + required=True) + test.AddArgument("pos2", "optional positional argument", positional=True) + test.AddArgument("pos3", "another optional positional arg", + positional=True) + + # mutually dependent arguments + test.AddArgument("--mutdep1", "mutually dependent parameter 1") + test.AddArgument("--mutdep2", "mutually dependent parameter 2") + test.AddArgument("--mutdep3", "mutually dependent parameter 3") + test.AddMutualDependency(["--mutdep1", "--mutdep2", "--mutdep3"]) + + # mutually exclusive arguments + test.AddArgument("--mutex1", "mutually exclusive parameter 1") + test.AddArgument("--mutex2", "mutually exclusive parameter 2") + test.AddArgument("--mutex3", "mutually exclusive parameter 3") + test.AddMutualExclusion(["--mutex1", "--mutex2", "--mutex3"]) + + # dependent argument + test.AddArgument("--dependent", "dependent argument") + test.AddDependency("--dependent", "--int") + + # other argument types + test.AddArgument("--file", "filename argument", type='readfile') + test.AddArgument("--coords", "coordinate argument", type='coords') + test.AddArgument("--flag", "flag argument", type='flag') + + test.AddArgument("--req1", "part of a required group", type='flag') + test.AddArgument("--req2", "part 2 of a required group", type='flag') + + test.AddRequiredGroup(["--req1", "--req2"]) + + # a few failure cases + exception_cases = """ + test.AddArgument("failpos", "can't have req'd pos arg after opt", + positional=True, required=True) ++++ + test.AddArgument("--int", "this argument already exists") ++++ + test.AddDependency("--int", "--doesntexist") ++++ + test.AddMutualDependency(["--doesntexist", "--mutdep2"]) ++++ + test.AddMutualExclusion(["--doesntexist", "--mutex2"]) ++++ + test.AddArgument("--reqflag", "required flag", required=True, type='flag') ++++ + test.AddRequiredGroup(["--req1", "--doesntexist"]) +""" + for exception_case in exception_cases.split("+++"): + try: + exception_case = exception_case.strip() + exec exception_case # yes, I'm using exec, it's just for a test. + except ValueError: + # this is expected + pass + except KeyError: + # ...and so is this + pass + else: + print ("FAILURE: expected an exception for '%s'" + " and didn't get it" % exception_case) + + # Let's do some parsing! first, the minimal success line: + MIN = "test --reqint 123 param1 --req1 " + + # tuples of (command line, expected error count) + test_lines = [ + ("test --int 3 foo --req1", 1), # missing required named parameter + ("test --reqint 3 --req1", 1), # missing required positional parameter + (MIN, 0), # success! + ("test param1 --reqint 123 --req1", 0), # success, order shouldn't matter + ("test param1 --reqint 123 --req2", 0), # success, any of required group ok + (MIN+"param2", 0), # another positional parameter is okay + (MIN+"param2 param3", 0), # and so are three + (MIN+"param2 param3 param4", 1), # but four are just too many + (MIN+"--int", 1), # where's the value? + (MIN+"--int 456", 0), # this is fine + (MIN+"--int456", 0), # as is this + (MIN+"--int:456", 0), # and this + (MIN+"--int=456", 0), # and this + (MIN+"--file c:\\windows\\system32\\kernel32.dll", 0), # yup + (MIN+"--file c:\\thisdoesntexist", 1), # nope + (MIN+"--mutdep1 a", 2), # no! + (MIN+"--mutdep2 b", 2), # also no! + (MIN+"--mutdep3 c", 2), # dream on! + (MIN+"--mutdep1 a --mutdep2 b", 2), # almost! + (MIN+"--mutdep1 a --mutdep2 b --mutdep3 c", 0), # yes + (MIN+"--mutex1 a", 0), # yes + (MIN+"--mutex2 b", 0), # yes + (MIN+"--mutex3 c", 0), # fine + (MIN+"--mutex1 a --mutex2 b", 1), # not fine + (MIN+"--mutex1 a --mutex2 b --mutex3 c", 3), # even worse + (MIN+"--dependent 1", 1), # no + (MIN+"--dependent 1 --int 2", 0), # ok + (MIN+"--int abc", 1), # bad type + (MIN+"--coords abc", 1), # also bad + (MIN+"--coords (abc)", 1), # getting warmer + (MIN+"--coords (abc,def)", 1), # missing something + (MIN+"--coords (123)", 1), # ooh, so close + (MIN+"--coords (123,def)", 1), # just a little farther + (MIN+"--coords (123,456)", 0), # finally! + ("test --int 123 --reqint=456 foo bar --coords(42,88) baz --req1", 0) + ] + + badtests = 0 + + for (test, expected_failures) in test_lines: + cmdline.ParseCommandLine([x.strip() for x in test.strip().split(" ")]) + + if not len(cmdline.command.parse_errors) == expected_failures: + print "FAILED:\n issued: '%s'\n expected: %d\n received: %d\n\n" % ( + test, expected_failures, len(cmdline.command.parse_errors)) + badtests += 1 + + print "%d failed out of %d tests" % (badtests, len(test_lines)) + + cmdline.ParseCommandLine(["help", "test"]) + diff --git a/tools/site_compare/commands/__init__.py b/tools/site_compare/commands/__init__.py new file mode 100644 index 0000000..a699508 --- /dev/null +++ b/tools/site_compare/commands/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/Python2.4 + diff --git a/tools/site_compare/commands/compare2.py b/tools/site_compare/commands/compare2.py new file mode 100644 index 0000000..6dc00c7 --- /dev/null +++ b/tools/site_compare/commands/compare2.py @@ -0,0 +1,196 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare command to invoke the same page in two versions of a browser. + +Does the easiest compatibility test: equality comparison between two different +versions of the same browser. Invoked with a series of command line options +that specify which URLs to check, which browser to use, where to store results, +etc. +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates + +import command_line +import drivers # Functions for driving keyboard/mouse/windows, OS-specific +import operators # Functions that, given two bitmaps as input, produce + # output depending on the performance of an operation +import scrapers # Functions that know how to capture a render from + # particular browsers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["compare2"], + "Compares the output of two browsers on the same URL or list of URLs", + ValidateCompare2, + ExecuteCompare2) + + cmd.AddArgument( + ["-b1", "--browser1"], "Full path to first browser's executable", + type="readfile", metaname="PATH", required=True) + cmd.AddArgument( + ["-b2", "--browser2"], "Full path to second browser's executable", + type="readfile", metaname="PATH", required=True) + cmd.AddArgument( + ["-b", "--browser"], "Which browser to use", type="string", + default="chrome") + cmd.AddArgument( + ["-b1v", "--browser1ver"], "Version of first browser", metaname="VERSION") + cmd.AddArgument( + ["-b2v", "--browser2ver"], "Version of second browser", metaname="VERSION") + cmd.AddArgument( + ["-b1n", "--browser1name"], "Optional name for first browser (used in " + "directory to hold intermediate files)", metaname="NAME") + cmd.AddArgument( + ["-b2n", "--browser2name"], "Optional name for second browser (used in " + "directory to hold intermediate files)", metaname="NAME") + cmd.AddArgument( + ["-o", "--outdir"], "Directory to store scrape files", metaname="DIR") + cmd.AddArgument( + ["-u", "--url"], "URL to compare") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to compare", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + cmd.AddArgument( + ["-m", "--maskdir"], "Path that holds masks to use for comparison") + cmd.AddArgument( + ["-d", "--diffdir"], "Path to hold the difference of comparisons that fail") + + +def ValidateCompare2(command): + """Validate the arguments to compare2. Raises ParseError if failed.""" + executables = [".exe", ".com", ".bat"] + if (os.path.splitext(command["--browser1"])[1].lower() not in executables or + os.path.splitext(command["--browser2"])[1].lower() not in executables): + raise command_line.ParseError("Browser filename must be an executable") + + +def ExecuteCompare2(command): + """Executes the Compare2 command.""" + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [url.strip() for url in + open(command["--list"], "r").readlines()[startline:endline]] + + log_file = open(command["--logfile"], "w") + + outdir = command["--outdir"] + if not outdir: outdir = tempfile.gettempdir() + + scrape_info_list = [] + + class ScrapeInfo(object): + """Helper class to hold information about a scrape.""" + __slots__ = ["browser_path", "scraper", "outdir", "result"] + + for index in xrange(1, 3): + scrape_info = ScrapeInfo() + scrape_info.browser_path = command["--browser%d" % index] + scrape_info.scraper = scrapers.GetScraper( + (command["--browser"], command["--browser%dver" % index])) + + if command["--browser%dname" % index]: + scrape_info.outdir = os.path.join(outdir, + command["--browser%dname" % index]) + else: + scrape_info.outdir = os.path.join(outdir, str(index)) + + drivers.windowing.PreparePath(scrape_info.outdir) + scrape_info_list.append(scrape_info) + + compare = operators.GetOperator("equals_with_mask") + + for url in url_list: + success = True + + for scrape_info in scrape_info_list: + scrape_info.result = scrape_info.scraper.Scrape( + [url], scrape_info.outdir, command["--size"], (0, 0), + command["--timeout"], path=scrape_info.browser_path) + + if not scrape_info.result: + scrape_info.result = "success" + else: + success = False + + result = "unknown" + + if success: + result = "equal" + + file1 = drivers.windowing.URLtoFilename( + url, scrape_info_list[0].outdir, ".bmp") + file2 = drivers.windowing.URLtoFilename( + url, scrape_info_list[1].outdir, ".bmp") + + comparison_result = compare.Compare(file1, file2, + maskdir=command["--maskdir"]) + + if comparison_result is not None: + result = "not-equal" + + if command["--diffdir"]: + comparison_result[1].save( + drivers.windowing.URLtoFilename(url, command["--diffdir"], ".bmp")) + + # TODO(jhaas): maybe use the logging module rather than raw file writes + log_file.write("%s %s %s %s\n" % (url, + scrape_info_list[0].result, + scrape_info_list[1].result, + result)) diff --git a/tools/site_compare/commands/maskmaker.py b/tools/site_compare/commands/maskmaker.py new file mode 100644 index 0000000..95bdeb45 --- /dev/null +++ b/tools/site_compare/commands/maskmaker.py @@ -0,0 +1,298 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Component for automatically creating masks of changing areas of a website. + +Works by repeated invokation of a browser and scraping of the resulting page. +Areas that differ will be added to the auto-generated mask. The mask generator +considers the mask complete when further scrapes fail to produce any differences +in the mask. +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates +import time # Used for sleep() and naming masks by time + +import command_line +import drivers +from PIL import Image +from PIL import ImageChops +import scrapers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["maskmaker"], + "Automatically generates a mask from a list of URLs", + ValidateMaskmaker, + ExecuteMaskmaker) + + cmd.AddArgument( + ["-bp", "--browserpath"], "Full path to browser's executable", + type="readfile", metaname="PATH") + cmd.AddArgument( + ["-b", "--browser"], "Which browser to use", type="string", + default="chrome") + cmd.AddArgument( + ["-bv", "--browserver"], "Version of the browser", metaname="VERSION") + cmd.AddArgument( + ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR", + required=True) + cmd.AddArgument( + ["-u", "--url"], "URL to compare") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to compare", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-w", "--wait"], + "Amount of time (in seconds) to wait between successive scrapes", + type="int", default=60) + cmd.AddArgument( + ["-sc", "--scrapes"], + "Number of successive scrapes which must result in no change to a mask " + "before mask creation is considered complete", type="int", default=10) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes") + cmd.AddArgument( + ["-gu", "--giveup"], + "Number of times to scrape before giving up", type="int", default=50) + cmd.AddArgument( + ["-th", "--threshhold"], + "Percentage of different pixels (0-100) above which the scrape will be" + "discarded and the mask not updated.", type="int", default=100) + cmd.AddArgument( + ["--er", "--errors"], + "Number of times a scrape can fail before giving up on the URL.", + type="int", default=1) + + +def ValidateMaskmaker(command): + """Validate the arguments to maskmaker. Raises ParseError if failed.""" + executables = [".exe", ".com", ".bat"] + if command["--browserpath"]: + if os.path.splitext(command["--browserpath"])[1].lower() not in executables: + raise command_line.ParseError("Browser filename must be an executable") + + +def ExecuteMaskmaker(command): + """Performs automatic mask generation.""" + + # Get the list of URLs to generate masks for + class MaskmakerURL(object): + """Helper class for holding information about a URL passed to maskmaker.""" + __slots__ = ['url', 'consecutive_successes', 'errors'] + def __init__(self, url): + self.url = url + self.consecutive_successes = 0 + self.errors = 0 + + if command["--url"]: + url_list = [MaskmakerURL(command["--url"])] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [MaskmakerURL(url.strip()) for url in + open(command["--list"], "r").readlines()[startline:endline]] + + complete_list = [] + error_list = [] + + outdir = command["--outdir"] + scrapes = command["--scrapes"] + errors = command["--errors"] + size = command["--size"] + scrape_pass = 0 + + scrapedir = command["--scrapedir"] + if not scrapedir: scrapedir = tempfile.gettempdir() + + # Get the scraper + scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) + + # Repeatedly iterate through the list of URLs until either every URL has + # a successful mask or too many errors, or we've exceeded the giveup limit + while url_list and scrape_pass < command["--giveup"]: + # Scrape each URL + for url in url_list: + print "Processing %r..." % url.url + mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp") + + # Load the existing mask. This is in a loop so we can try to recover + # from error conditions + while True: + try: + mask = Image.open(mask_filename) + if mask.size != size: + print " %r already exists and is the wrong size! (%r vs %r)" % ( + mask_filename, mask.size, size) + mask_filename = "%s_%r%s" % ( + mask_filename[:-4], size, mask_filename[-4:]) + print " Trying again as %r..." % mask_filename + continue + break + except IOError: + print " %r does not exist, creating" % mask_filename + mask = Image.new("1", size, 1) + mask.save(mask_filename) + + # Find the stored scrape path + mask_scrape_dir = os.path.join( + scrapedir, os.path.splitext(os.path.basename(mask_filename))[0]) + drivers.windowing.PreparePath(mask_scrape_dir) + + # Find the baseline image + mask_scrapes = os.listdir(mask_scrape_dir) + mask_scrapes.sort() + + if not mask_scrapes: + print " No baseline image found, mask will not be updated" + baseline = None + else: + baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0])) + + mask_scrape_filename = os.path.join(mask_scrape_dir, + time.strftime("%y%m%d-%H%M%S.bmp")) + + # Do the scrape + result = scraper.Scrape( + [url.url], mask_scrape_dir, size, (0, 0), + command["--timeout"], path=command["--browserpath"], + filename=mask_scrape_filename) + + if result: + # Return value other than None means an error + print " Scrape failed with error '%r'" % result + url.errors += 1 + if url.errors >= errors: + print " ** Exceeded maximum error count for this URL, giving up" + continue + + # Load the new scrape + scrape = Image.open(mask_scrape_filename) + + # Calculate the difference between the new scrape and the baseline, + # subject to the current mask + if baseline: + diff = ImageChops.multiply(ImageChops.difference(scrape, baseline), + mask.convert(scrape.mode)) + + # If the difference is none, there's nothing to update + if max(diff.getextrema()) == (0, 0): + print " Scrape identical to baseline, no change in mask" + url.consecutive_successes += 1 + if url.consecutive_successes >= scrapes: + print " ** No change for %r scrapes, done!" % scrapes + else: + # convert the difference to black and white, then change all + # black pixels (where the scrape and the baseline were identical) + # to white, all others (where the scrape and the baseline differed) + # to black. + # + # Since the below command is a little unclear, here's how it works. + # 1. convert("L") converts the RGB image to grayscale + # 2. point() maps grayscale values (or the individual channels) + # of an RGB image) to different ones. Because it operates on + # individual channels, the grayscale conversion from step 1 + # is necessary. + # 3. The "1" second parameter to point() outputs the result as + # a monochrome bitmap. If the original RGB image were converted + # directly to monochrome, PIL would dither it. + diff = diff.convert("L").point([255]+[0]*255, "1") + + # count the number of different pixels + diff_pixels = diff.getcolors()[0][0] + + # is this too much? + diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1]) + if diff_pixel_percent > command["--threshhold"]: + print (" Scrape differed from baseline by %.2f percent, ignoring" + % diff_pixel_percent) + else: + print " Scrape differed in %d pixels, updating mask" % diff_pixels + mask = ImageChops.multiply(mask, diff) + mask.save(mask_filename) + + # reset the number of consecutive "good" scrapes + url.consecutive_successes = 0 + + # Remove URLs whose mask is deemed done + complete_list.extend( + [url for url in url_list if url.consecutive_successes >= scrapes]) + error_list.extend( + [url for url in url_list if url.errors >= errors]) + url_list = [ + url for url in url_list if + url.consecutive_successes < scrapes and + url.errors < errors] + + scrape_pass += 1 + print "**Done with scrape pass %d\n" % scrape_pass + + if scrape_pass >= command["--giveup"]: + print "**Exceeded giveup threshhold. Giving up." + else: + print "Waiting %d seconds..." % command["--wait"] + time.sleep(command["--wait"]) + + print + print "*** MASKMAKER COMPLETE ***" + print "Summary report:" + print " %d masks successfully generated" % len(complete_list) + for url in complete_list: + print " ", url.url + print " %d masks failed with too many errors" % len(error_list) + for url in error_list: + print " ", url.url + if scrape_pass >= command["--giveup"]: + print (" %d masks were not completed before " + "reaching the giveup threshhold" % len(url_list)) + for url in url_list: + print " ", url.url diff --git a/tools/site_compare/commands/measure.py b/tools/site_compare/commands/measure.py new file mode 100644 index 0000000..477db57 --- /dev/null +++ b/tools/site_compare/commands/measure.py @@ -0,0 +1,78 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Command for measuring how long pages take to load in a browser. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line +import win32process + +from drivers import windowing +from utils import browser_iterate + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["measure"], + "Measures how long a series of URLs takes to load in one or more browsers.", + None, + ExecuteMeasure) + + browser_iterate.SetupIterationCommandLine(cmd) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + + +def ExecuteMeasure(command): + """Executes the Measure command.""" + + def LogResult(url, proc, wnd, result): + """Write the result of the browse to the log file.""" + log_file.write(result) + + log_file = open(command["--logfile"], "w") + + browser_iterate.Iterate(command, LogResult) + + # Close the log file and return. We're done. + log_file.close() diff --git a/tools/site_compare/commands/scrape.py b/tools/site_compare/commands/scrape.py new file mode 100644 index 0000000..a9b3398 --- /dev/null +++ b/tools/site_compare/commands/scrape.py @@ -0,0 +1,85 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Command for scraping images from a URL or list of URLs. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line + +from drivers import windowing +from utils import browser_iterate + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["scrape"], + "Scrapes an image from a URL or series of URLs.", + None, + ExecuteScrape) + + browser_iterate.SetupIterationCommandLine(cmd) + cmd.AddArgument( + ["-log", "--logfile"], "File to write text output", type="string") + cmd.AddArgument( + ["-out", "--outdir"], "Directory to store scrapes", type="string", required=True) + + +def ExecuteScrape(command): + """Executes the Scrape command.""" + + def ScrapeResult(url, proc, wnd, result): + """Capture and save the scrape.""" + if log_file: log_file.write(result) + + # Scrape the page + image = windowing.ScrapeWindow(wnd) + filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp") + image.save(filename) + + if command["--logfile"]: log_file = open(command["--logfile"], "w") + else: log_file = None + + browser_iterate.Iterate(command, ScrapeResult) + + # Close the log file and return. We're done. + if log_file: log_file.close() diff --git a/tools/site_compare/commands/timeload.py b/tools/site_compare/commands/timeload.py new file mode 100644 index 0000000..a983173 --- /dev/null +++ b/tools/site_compare/commands/timeload.py @@ -0,0 +1,170 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare command to time page loads + +Loads a series of URLs in a series of browsers (and browser versions) +and measures how long the page takes to load in each. Outputs a +comma-delimited file. The first line is "URL,[browser names", each +additional line is a URL follored by comma-delimited times (in seconds), +or the string "timeout" or "crashed". + +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates + +import command_line +import drivers # Functions for driving keyboard/mouse/windows, OS-specific +import operators # Functions that, given two bitmaps as input, produce + # output depending on the performance of an operation +import scrapers # Functions that know how to capture a render from + # particular browsers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["timeload"], + "Measures how long a series of URLs takes to load in one or more browsers.", + None, + ExecuteTimeLoad) + + cmd.AddArgument( + ["-b", "--browsers"], "List of browsers to use. Comma-separated", + type="string", required=True) + cmd.AddArgument( + ["-bp", "--browserpaths"], "List of paths to browsers. Comma-separated", + type="string", required=False) + cmd.AddArgument( + ["-bv", "--browserversions"], "List of versions of browsers. Comma-separated", + type="string", required=False) + cmd.AddArgument( + ["-u", "--url"], "URL to time") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to time", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + + +def ExecuteTimeLoad(command): + """Executes the TimeLoad command.""" + browsers = command["--browsers"].split(",") + num_browsers = len(browsers) + + if command["--browserversions"]: + browser_versions = command["--browserversions"].split(",") + else: + browser_versions = [None] * num_browsers + + if command["--browserpaths"]: + browser_paths = command["--browserpaths"].split(",") + else: + browser_paths = [None] * num_browsers + + if len(browser_versions) != num_browsers: + raise ValueError( + "--browserversions must be same length as --browser_paths") + if len(browser_paths) != num_browsers: + raise ValueError( + "--browserversions must be same length as --browser_paths") + + if [b for b in browsers if b not in ["chrome", "ie", "firefox"]]: + raise ValueError("unknown browsers: %r" % b) + + scraper_list = [] + + for b in xrange(num_browsers): + version = browser_versions[b] + if not version: version = None + + scraper = scrapers.GetScraper( (browsers[b], version) ) + if not scraper: + raise ValueError("could not find scraper for (%r, %r)" % + (browsers[b], version)) + scraper_list.append(scraper) + + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [url.strip() for url in + open(command["--list"], "r").readlines()[startline:endline]] + + log_file = open(command["--logfile"], "w") + + log_file.write("URL") + for b in xrange(num_browsers): + log_file.write(",%s" % browsers[b]) + + if browser_versions[b]: log_file.write(" %s" % browser_versions[b]) + log_file.write("\n") + + results = {} + for url in url_list: + results[url] = [None] * num_browsers + + for b in xrange(num_browsers): + result = scraper_list[b].Time(url_list, command["--size"], + command["--timeout"], + path=browser_paths[b]) + + for (url, time) in result: + results[url][b] = time + + # output the results + for url in url_list: + log_file.write(url) + for b in xrange(num_browsers): + log_file.write(",%r" % results[url][b]) + diff --git a/tools/site_compare/drivers/__init__.py b/tools/site_compare/drivers/__init__.py new file mode 100644 index 0000000..befc1353 --- /dev/null +++ b/tools/site_compare/drivers/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Imports a set of drivers appropriate to the current OS.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + +import sys + +platform_dir = sys.platform + +keyboard = __import__(platform_dir+".keyboard", globals(), locals(), ['']) +mouse = __import__(platform_dir+".mouse", globals(), locals(), ['']) +windowing = __import__(platform_dir+".windowing", globals(), locals(), ['']) diff --git a/tools/site_compare/drivers/win32/__init__.py b/tools/site_compare/drivers/win32/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tools/site_compare/drivers/win32/__init__.py diff --git a/tools/site_compare/drivers/win32/keyboard.py b/tools/site_compare/drivers/win32/keyboard.py new file mode 100644 index 0000000..5888318 --- /dev/null +++ b/tools/site_compare/drivers/win32/keyboard.py @@ -0,0 +1,223 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare module for simulating keyboard input. + +This module contains functions that can be used to simulate a user +pressing keys on a keyboard. Support is provided for formatted strings +including special characters to represent modifier keys like CTRL and ALT +""" + +import time # for sleep +import win32api # for keybd_event and VkKeyCode +import win32con # Windows constants + +# TODO(jhaas): Ask the readability guys if this would be acceptable: +# +# from win32con import VK_SHIFT, VK_CONTROL, VK_MENU, VK_LWIN, KEYEVENTF_KEYUP +# +# This is a violation of the style guide but having win32con. everywhere +# is just plain ugly, and win32con is a huge import for just a handful of +# constants + + +def PressKey(down, key): + """Presses or unpresses a key. + + Uses keybd_event to simulate either depressing or releasing + a key + + Args: + down: Whether the key is to be pressed or released + key: Virtual key code of key to press or release + """ + + # keybd_event injects key events at a very low level (it's the + # Windows API keyboard device drivers call) so this is a very + # reliable way of simulating user input + win32api.keybd_event(key, 0, (not down) * win32con.KEYEVENTF_KEYUP) + + +def TypeKey(key, keystroke_time=0): + """Simulate a keypress of a virtual key. + + Args: + key: which key to press + keystroke_time: length of time (in seconds) to "hold down" the key + Note that zero works just fine + + Returns: + None + """ + + # This just wraps a pair of PressKey calls with an intervening delay + PressKey(True, key) + time.sleep(keystroke_time) + PressKey(False, key) + + +def TypeString(string_to_type, + use_modifiers=False, + keystroke_time=0, + time_between_keystrokes=0): + """Simulate typing a string on the keyboard. + + Args: + string_to_type: the string to print + use_modifiers: specifies whether the following modifier characters + should be active: + {abc}: type characters with ALT held down + [abc]: type characters with CTRL held down + \ escapes {}[] and treats these values as literal + standard escape sequences are valid even if use_modifiers is false + \p is "pause" for one second, useful when driving menus + \1-\9 is F-key, \0 is F10 + + TODO(jhaas): support for explicit control of SHIFT, support for + nonprintable keys (F-keys, ESC, arrow keys, etc), + support for explicit control of left vs. right ALT or SHIFT, + support for Windows key + + keystroke_time: length of time (in secondes) to "hold down" the key + time_between_keystrokes: length of time (seconds) to pause between keys + + Returns: + None + """ + + shift_held = win32api.GetAsyncKeyState(win32con.VK_SHIFT ) < 0 + ctrl_held = win32api.GetAsyncKeyState(win32con.VK_CONTROL) < 0 + alt_held = win32api.GetAsyncKeyState(win32con.VK_MENU ) < 0 + + next_escaped = False + escape_chars = { + 'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\v'} + + for char in string_to_type: + vk = None + handled = False + + # Check to see if this is the start or end of a modified block (that is, + # {abc} for ALT-modified keys or [abc] for CTRL-modified keys + if use_modifiers and not next_escaped: + handled = True + if char == "{" and not alt_held: + alt_held = True + PressKey(True, win32con.VK_MENU) + elif char == "}" and alt_held: + alt_held = False + PressKey(False, win32con.VK_MENU) + elif char == "[" and not ctrl_held: + ctrl_held = True + PressKey(True, win32con.VK_CONTROL) + elif char == "]" and ctrl_held: + ctrl_held = False + PressKey(False, win32con.VK_CONTROL) + else: + handled = False + + # If this is an explicitly-escaped character, replace it with the + # appropriate code + if next_escaped and char in escape_chars: char = escape_chars[char] + + # If this is \p, pause for one second. + if next_escaped and char == 'p': + time.sleep(1) + next_escaped = False + handled = True + + # If this is \(d), press F key + if next_escaped and char.isdigit(): + fkey = int(char) + if not fkey: fkey = 10 + next_escaped = False + vk = win32con.VK_F1 + fkey - 1 + + # If this is the backslash, the next character is escaped + if not next_escaped and char == "\\": + next_escaped = True + handled = True + + # If we make it here, it's not a special character, or it's an + # escaped special character which should be treated as a literal + if not handled: + next_escaped = False + if not vk: vk = win32api.VkKeyScan(char) + + # VkKeyScan() returns the scan code in the low byte. The upper + # byte specifies modifiers necessary to produce the given character + # from the given scan code. The only one we're concerned with at the + # moment is Shift. Determine the shift state and compare it to the + # current state... if it differs, press or release the shift key. + new_shift_held = bool(vk & (1<<8)) + + if new_shift_held != shift_held: + PressKey(new_shift_held, win32con.VK_SHIFT) + shift_held = new_shift_held + + # Type the key with the specified length, then wait the specified delay + TypeKey(vk & 0xFF, keystroke_time) + time.sleep(time_between_keystrokes) + + # Release the modifier keys, if held + if shift_held: PressKey(False, win32con.VK_SHIFT) + if ctrl_held: PressKey(False, win32con.VK_CONTROL) + if alt_held: PressKey(False, win32con.VK_MENU) + +if __name__ == "__main__": + # We're being invoked rather than imported. Let's do some tests + + # Press command-R to bring up the Run dialog + PressKey(True, win32con.VK_LWIN) + TypeKey(ord('R')) + PressKey(False, win32con.VK_LWIN) + + # Wait a sec to make sure it comes up + time.sleep(1) + + # Invoke Notepad through the Run dialog + TypeString("wordpad\n") + + # Wait another sec, then start typing + time.sleep(1) + TypeString("This is a test of SiteCompare's Keyboard.py module.\n\n") + TypeString("There should be a blank line above and below this one.\n\n") + TypeString("This line has control characters to make " + "[b]boldface text[b] and [i]italic text[i] and normal text.\n\n", + use_modifiers=True) + TypeString(r"This line should be typed with a visible delay between " + "characters. When it ends, there should be a 3-second pause, " + "then the menu will select File/Exit, then another 3-second " + "pause, then No to exit without saving. Ready?\p\p\p{f}x\p\p\pn", + use_modifiers=True, + keystroke_time=0.05, + time_between_keystrokes=0.05) + +
\ No newline at end of file diff --git a/tools/site_compare/drivers/win32/mouse.py b/tools/site_compare/drivers/win32/mouse.py new file mode 100644 index 0000000..9475f2d --- /dev/null +++ b/tools/site_compare/drivers/win32/mouse.py @@ -0,0 +1,243 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare module for simulating mouse input. + +This module contains functions that can be used to simulate a user +navigating using a pointing device. This includes mouse movement, +clicking with any button, and dragging. +""" + +import time # for sleep + +import win32api # for mouse_event +import win32con # Windows constants +import win32gui # for window functions + + +def ScreenToMouse(pt): + """Convert a value in screen coordinates to mouse coordinates. + + Mouse coordinates are specified as a percentage of screen dimensions, + normalized to 16 bits. 0 represents the far left/top of the screen, + 65535 represents the far right/bottom. This function assumes that + the size of the screen is fixed at module load time and does not change + + Args: + pt: the point of the coords to convert + + Returns: + the converted point + """ + + # Initialize the screen dimensions on first execution. Note that this + # function assumes that the screen dimensions do not change during run. + if not ScreenToMouse._SCREEN_DIMENSIONS: + desktop = win32gui.GetClientRect(win32gui.GetDesktopWindow()) + ScreenToMouse._SCREEN_DIMENSIONS = (desktop[2], desktop[3]) + + return ((65535 * pt[0]) / ScreenToMouse._SCREEN_DIMENSIONS[0], + (65535 * pt[1]) / ScreenToMouse._SCREEN_DIMENSIONS[1]) + +ScreenToMouse._SCREEN_DIMENSIONS = None + + +def PressButton(down, button='left'): + """Simulate a mouse button press or release at the current mouse location. + + Args: + down: whether the button is pressed or released + button: which button is pressed + + Returns: + None + """ + + # Put the mouse_event flags in a convenient dictionary by button + flags = { + 'left': (win32con.MOUSEEVENTF_LEFTUP, win32con.MOUSEEVENTF_LEFTDOWN), + 'middle': (win32con.MOUSEEVENTF_MIDDLEUP, win32con.MOUSEEVENTF_MIDDLEDOWN), + 'right': (win32con.MOUSEEVENTF_RIGHTUP, win32con.MOUSEEVENTF_RIGHTDOWN) + } + + # hit the button + win32api.mouse_event(flags[button][down], 0, 0) + + +def ClickButton(button='left', click_time=0): + """Press and release a mouse button at the current mouse location. + + Args: + button: which button to click + click_time: duration between press and release + + Returns: + None + """ + PressButton(True, button) + time.sleep(click_time) + PressButton(False, button) + + +def DoubleClickButton(button='left', click_time=0, time_between_clicks=0): + """Double-click a mouse button at the current mouse location. + + Args: + button: which button to click + click_time: duration between press and release + time_between_clicks: time to pause between clicks + + Returns: + None + """ + ClickButton(button, click_time) + time.sleep(time_between_clicks) + ClickButton(button, click_time) + + +def MoveToLocation(pos, duration=0, tick=0.01): + """Move the mouse cursor to a specified location, taking the specified time. + + Args: + pos: position (in screen coordinates) to move to + duration: amount of time the move should take + tick: amount of time between successive moves of the mouse + + Returns: + None + """ + # calculate the number of moves to reach the destination + num_steps = (duration/tick)+1 + + # get the current and final mouse position in mouse coords + current_location = ScreenToMouse(win32gui.GetCursorPos()) + end_location = ScreenToMouse(pos) + + # Calculate the step size + step_size = ((end_location[0]-current_location[0])/num_steps, + (end_location[1]-current_location[1])/num_steps) + step = 0 + + while step < num_steps: + # Move the mouse one step + current_location = (current_location[0]+step_size[0], + current_location[1]+step_size[1]) + + # Coerce the coords to int to avoid a warning from pywin32 + win32api.mouse_event( + win32con.MOUSEEVENTF_MOVE|win32con.MOUSEEVENTF_ABSOLUTE, + int(current_location[0]), int(current_location[1])) + + step += 1 + time.sleep(tick) + + +def ClickAtLocation(pos, button='left', click_time=0): + """Simulate a mouse click in a particular location, in screen coordinates. + + Args: + pos: position in screen coordinates (x,y) + button: which button to click + click_time: duration of the click + + Returns: + None + """ + MoveToLocation(pos) + ClickButton(button, click_time) + + +def ClickInWindow(hwnd, offset=None, button='left', click_time=0): + """Simulate a user mouse click in the center of a window. + + Args: + hwnd: handle of the window to click in + offset: where to click, defaults to dead center + button: which button to click + click_time: duration of the click + + Returns: + Nothing + """ + + rect = win32gui.GetClientRect(hwnd) + if offset is None: offset = (rect[2]/2, rect[3]/2) + + # get the screen coordinates of the window's center + pos = win32gui.ClientToScreen(hwnd, offset) + + ClickAtLocation(pos, button, click_time) + + +def DoubleClickInWindow( + hwnd, offset=None, button='left', click_time=0, time_between_clicks=0.1): + """Simulate a user mouse double click in the center of a window. + + Args: + hwnd: handle of the window to click in + offset: where to click, defaults to dead center + button: which button to click + click_time: duration of the clicks + time_between_clicks: length of time to pause between clicks + + Returns: + Nothing + """ + ClickInWindow(hwnd, offset, button, click_time) + time.sleep(time_between_clicks) + ClickInWindow(hwnd, offset, button, click_time) + +if __name__ == "__main__": + # We're being invoked rather than imported. Let's do some tests + + screen_size = win32gui.GetClientRect(win32gui.GetDesktopWindow()) + screen_size = (screen_size[2], screen_size[3]) + + # move the mouse (instantly) to the upper right corner + MoveToLocation((screen_size[0], 0)) + + # move the mouse (over five seconds) to the lower left corner + MoveToLocation((0, screen_size[1]), 5) + + # click the left mouse button. This will open up the Start menu + # if the taskbar is at the bottom + + ClickButton() + + # wait a bit, then click the right button to open the context menu + time.sleep(3) + ClickButton('right') + + # move the mouse away and then click the left button to dismiss the + # context menu + MoveToLocation((screen_size[0]/2, screen_size[1]/2), 3) + MoveToLocation((0, 0), 3) + ClickButton() +
\ No newline at end of file diff --git a/tools/site_compare/drivers/win32/windowing.py b/tools/site_compare/drivers/win32/windowing.py new file mode 100644 index 0000000..94ec511 --- /dev/null +++ b/tools/site_compare/drivers/win32/windowing.py @@ -0,0 +1,386 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare module for invoking, locating, and manipulating windows. + +This module is a catch-all wrapper for operating system UI functionality +that doesn't belong in other modules. It contains functions for finding +particular windows, scraping their contents, and invoking processes to +create them. +""" + +import os +import string +import time + +import PIL.ImageGrab +import pywintypes +import win32event +import win32gui +import win32process + + +def FindChildWindows(hwnd, path): + """Find a set of windows through a path specification. + + Args: + hwnd: Handle of the parent window + path: Path to the window to find. Has the following form: + "foo/bar/baz|foobar/|foobarbaz" + The slashes specify the "path" to the child window. + The text is the window class, a pipe (if present) is a title. + * is a wildcard and will find all child windows at that level + + Returns: + A list of the windows that were found + """ + windows_to_check = [hwnd] + + # The strategy will be to take windows_to_check and use it + # to find a list of windows that match the next specification + # in the path, then repeat with the list of found windows as the + # new list of windows to check + for segment in path.split("/"): + windows_found = [] + check_values = segment.split("|") + + # check_values is now a list with the first element being + # the window class, the second being the window caption. + # If the class is absent (or wildcarded) set it to None + if check_values[0] == "*" or not check_values[0]: check_values[0] = None + + # If the window caption is also absent, force it to None as well + if len(check_values) == 1: check_values.append(None) + + # Loop through the list of windows to check + for window_check in windows_to_check: + window_found = None + while window_found != 0: # lint complains, but 0 != None + if window_found is None: window_found = 0 + try: + # Look for the next sibling (or first sibling if window_found is 0) + # of window_check with the specified caption and/or class + window_found = win32gui.FindWindowEx( + window_check, window_found, check_values[0], check_values[1]) + except pywintypes.error, e: + # FindWindowEx() raises error 2 if not found + if e[0] == 2: + window_found = 0 + else: + raise e + + # If FindWindowEx struck gold, add to our list of windows found + if window_found: windows_found.append(window_found) + + # The windows we found become the windows to check for the next segment + windows_to_check = windows_found + + return windows_found + + +def FindChildWindow(hwnd, path): + """Find a window through a path specification. + + This method is a simple wrapper for FindChildWindows() for the + case (the majority case) where you expect to find a single window + + Args: + hwnd: Handle of the parent window + path: Path to the window to find. See FindChildWindows() + + Returns: + The window that was found + """ + return FindChildWindows(hwnd, path)[0] + + +def ScrapeWindow(hwnd, rect=None): + """Scrape a visible window and return its contents as a bitmap. + + Args: + hwnd: handle of the window to scrape + rect: rectangle to scrape in client coords, defaults to the whole thing + If specified, it's a 4-tuple of (left, top, right, bottom) + + Returns: + An Image containing the scraped data + """ + # Activate the window + SetForegroundWindow(hwnd) + + # If no rectangle was specified, use the fill client rectangle + if not rect: rect = win32gui.GetClientRect(hwnd) + + upper_left = win32gui.ClientToScreen(hwnd, (rect[0], rect[1])) + lower_right = win32gui.ClientToScreen(hwnd, (rect[2], rect[3])) + rect = upper_left+lower_right + + return PIL.ImageGrab.grab(rect) + + +def SetForegroundWindow(hwnd): + """Bring a window to the foreground.""" + win32gui.SetForegroundWindow(hwnd) + + +def InvokeAndWait(path, cmdline="", timeout=10, tick=1.): + """Invoke an application and wait for it to bring up a window. + + Args: + path: full path to the executable to invoke + cmdline: command line to pass to executable + timeout: how long (in seconds) to wait before giving up + tick: length of time to wait between checks + + Returns: + A tuple of handles to the process and the application's window, + or (None, None) if it timed out waiting for the process + """ + + def EnumWindowProc(hwnd, ret): + """Internal enumeration func, checks for visibility and proper PID.""" + if win32gui.IsWindowVisible(hwnd): # don't bother even checking hidden wnds + pid = win32process.GetWindowThreadProcessId(hwnd)[1] + if pid == ret[0]: + ret[1] = hwnd + return 0 # 0 means stop enumeration + return 1 # 1 means continue enumeration + + # We don't need to change anything about the startupinfo structure + # (the default is quite sufficient) but we need to create it just the + # same. + sinfo = win32process.STARTUPINFO() + + proc = win32process.CreateProcess( + path, # path to new process's executable + cmdline, # application's command line + None, # process security attributes (default) + None, # thread security attributes (default) + False, # inherit parent's handles + 0, # creation flags + None, # environment variables + None, # directory + sinfo) # default startup info + + # Create process returns (prochandle, pid, threadhandle, tid). At + # some point we may care about the other members, but for now, all + # we're after is the pid + pid = proc[2] + + # Enumeration APIs can take an arbitrary integer, usually a pointer, + # to be passed to the enumeration function. We'll pass a pointer to + # a structure containing the PID we're looking for, and an empty out + # parameter to hold the found window ID + ret = [pid, None] + + tries_until_timeout = timeout/tick + num_tries = 0 + + # Enumerate top-level windows, look for one with our PID + while num_tries < tries_until_timeout and ret[1] is None: + try: + win32gui.EnumWindows(EnumWindowProc, ret) + except pywintypes.error, e: + # error 0 isn't an error, it just meant the enumeration was + # terminated early + if e[0]: raise e + + time.sleep(tick) + num_tries += 1 + + # TODO(jhaas): Should we throw an exception if we timeout? Or is returning + # a window ID of None sufficient? + return (proc[0], ret[1]) + + +def WaitForProcessExit(proc, timeout=None): + """Waits for a given process to terminate. + + Args: + proc: handle to process + timeout: timeout (in seconds). None = wait indefinitely + + Returns: + True if process ended, False if timed out + """ + if timeout is None: + timeout = win32event.INFINITE + else: + # convert sec to msec + timeout *= 1000 + + return (win32event.WaitForSingleObject(proc, timeout) == + win32event.WAIT_OBJECT_0) + + +def WaitForThrobber(hwnd, rect=None, timeout=20, tick=0.1, done=10): + """Wait for a browser's "throbber" (loading animation) to complete. + + Args: + hwnd: window containing the throbber + rect: rectangle of the throbber, in client coords. If None, whole window + timeout: if the throbber is still throbbing after this long, give up + tick: how often to check the throbber + done: how long the throbber must be unmoving to be considered done + + Returns: + Number of seconds waited, -1 if timed out + """ + if not rect: rect = win32gui.GetClientRect(hwnd) + + # last_throbber will hold the results of the preceding scrape; + # we'll compare it against the current scrape to see if we're throbbing + last_throbber = ScrapeWindow(hwnd, rect) + start_clock = time.clock() + timeout_clock = start_clock + timeout + last_changed_clock = start_clock; + + while time.clock() < timeout_clock: + time.sleep(tick) + + current_throbber = ScrapeWindow(hwnd, rect) + if current_throbber.tostring() != last_throbber.tostring(): + last_throbber = current_throbber + last_changed_clock = time.clock() + else: + if time.clock() - last_changed_clock > done: + return last_changed_clock - start_clock + + return -1 + + +def MoveAndSizeWindow(wnd, position=None, size=None, child=None): + """Moves and/or resizes a window. + + Repositions and resizes a window. If a child window is provided, + the parent window is resized so the child window has the given size + + Args: + wnd: handle of the frame window + position: new location for the frame window + size: new size for the frame window (or the child window) + child: handle of the child window + + Returns: + None + """ + rect = win32gui.GetWindowRect(wnd) + + if position is None: position = (rect[0], rect[1]) + if size is None: + size = (rect[2]-rect[0], rect[3]-rect[1]) + elif child is not None: + child_rect = win32gui.GetWindowRect(child) + slop = (rect[2]-rect[0]-child_rect[2]+child_rect[0], + rect[3]-rect[1]-child_rect[3]+child_rect[1]) + size = (size[0]+slop[0], size[1]+slop[1]) + + win32gui.MoveWindow(wnd, # window to move + position[0], # new x coord + position[1], # new y coord + size[0], # new width + size[1], # new height + True) # repaint? + + +def EndProcess(proc, code=0): + """Ends a process. + + Wraps the OS TerminateProcess call for platform-independence + + Args: + proc: process ID + code: process exit code + + Returns: + None + """ + win32process.TerminateProcess(proc, code) + + +def URLtoFilename(url, path=None, extension=None): + """Converts a URL to a filename, given a path. + + This in theory could cause collisions if two URLs differ only + in unprintable characters (eg. http://www.foo.com/?bar and + http://www.foo.com/:bar. In practice this shouldn't be a problem. + + Args: + url: The URL to convert + path: path to the directory to store the file + extension: string to append to filename + + Returns: + filename + """ + trans = string.maketrans(r'\/:*?"<>|', '_________') + + if path is None: path = "" + if extension is None: extension = "" + if len(path) > 0 and path[-1] != '\\': path += '\\' + url = url.translate(trans) + return "%s%s%s" % (path, url, extension) + + +def PreparePath(path): + """Ensures that a given path exists, making subdirectories if necessary. + + Args: + path: fully-qualified path of directory to ensure exists + + Returns: + None + """ + try: + os.makedirs(path) + except OSError, e: + if e[0] != 17: raise e # error 17: path already exists + +if __name__ == "__main__": + PreparePath(r"c:\sitecompare\scrapes\ie7") + # We're being invoked rather than imported. Let's do some tests + + # Hardcode IE's location for the purpose of this test + (proc, wnd) = InvokeAndWait( + r"c:\program files\internet explorer\iexplore.exe") + + # Find the browser pane in the IE window + browser = FindChildWindow( + wnd, "TabWindowClass/Shell DocObject View/Internet Explorer_Server") + + # Move and size the window + MoveAndSizeWindow(wnd, (0, 0), (1024, 768), browser) + + # Take a screenshot + i = ScrapeWindow(browser) + + i.show() + + EndProcess(proc, 0) diff --git a/tools/site_compare/operators/__init__.py b/tools/site_compare/operators/__init__.py new file mode 100644 index 0000000..02eac07 --- /dev/null +++ b/tools/site_compare/operators/__init__.py @@ -0,0 +1,26 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Selects the appropriate operator.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + + +def GetOperator(operator): + """Given an operator by name, returns its module. + + Args: + operator: string describing the comparison + + Returns: + module + """ + + # TODO(jhaas): come up with a happy way of integrating multiple operators + # with different, possibly divergent and possibly convergent, operators. + + module = __import__(operator, globals(), locals(), ['']) + + return module + diff --git a/tools/site_compare/operators/equals.py b/tools/site_compare/operators/equals.py new file mode 100644 index 0000000..2771401 --- /dev/null +++ b/tools/site_compare/operators/equals.py @@ -0,0 +1,66 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Compare two images for equality.""" + +from PIL import Image +from PIL import ImageChops + + +def Compare(file1, file2, **kwargs): + """Compares two images to see if they're identical. + + Args: + file1: path to first image to compare + file2: path to second image to compare + kwargs: unused for this operator + + Returns: + None if the images are identical + A tuple of (errorstring, image) if they're not + """ + kwargs = kwargs # unused parameter + + im1 = Image.open(file1) + im2 = Image.open(file2) + + if im1.size != im2.size: + return ("The images are of different size (%s vs %s)" % + (im1.size, im2.size), im1) + + diff = ImageChops.difference(im1, im2) + + if max(diff.getextrema()) != (0, 0): + return ("The images differ", diff) + else: + return None + + +
\ No newline at end of file diff --git a/tools/site_compare/operators/equals_with_mask.py b/tools/site_compare/operators/equals_with_mask.py new file mode 100644 index 0000000..574457a --- /dev/null +++ b/tools/site_compare/operators/equals_with_mask.py @@ -0,0 +1,86 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Compare two images for equality, subject to a mask.""" + +from PIL import Image +from PIL import ImageChops + +import os.path + + +def Compare(file1, file2, **kwargs): + """Compares two images to see if they're identical subject to a mask. + + An optional directory containing masks is supplied. If a mask exists + which matches file1's name, areas under the mask where it's black + are ignored. + + Args: + file1: path to first image to compare + file2: path to second image to compare + kwargs: ["maskdir"] contains the directory holding the masks + + Returns: + None if the images are identical + A tuple of (errorstring, image) if they're not + """ + + maskdir = None + if "maskdir" in kwargs: + maskdir = kwargs["maskdir"] + + im1 = Image.open(file1) + im2 = Image.open(file2) + + if im1.size != im2.size: + return ("The images are of different size (%r vs %r)" % + (im1.size, im2.size), im1) + + diff = ImageChops.difference(im1, im2) + + if maskdir: + maskfile = os.path.join(maskdir, os.path.basename(file1)) + if os.path.exists(maskfile): + mask = Image.open(maskfile) + + if mask.size != im1.size: + return ("The mask is of a different size than the images (%r vs %r)" % + (mask.size, im1.size), mask) + + diff = ImageChops.multiply(diff, mask.convert(diff.mode)) + + if max(diff.getextrema()) != (0, 0): + return ("The images differ", diff) + else: + return None + + +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/__init__.py b/tools/site_compare/scrapers/__init__.py new file mode 100644 index 0000000..08790aa --- /dev/null +++ b/tools/site_compare/scrapers/__init__.py @@ -0,0 +1,34 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Selects the appropriate scraper for a given browser and version.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + +import types + +# TODO(jhaas): unify all optional scraper parameters into kwargs + +def GetScraper(browser): + """Given a browser and an optional version, returns the scraper module. + + Args: + browser: either a string (browser name) or a tuple (name, version) + + Returns: + module + """ + + if type(browser) == types.StringType: browser = (browser, None) + + package = __import__(browser[0], globals(), locals(), ['']) + module = package.GetScraper(browser[1]) + if browser[1] is not None: module.version = browser[1] + + return module + +# if invoked rather than imported, do some tests +if __name__ == "__main__": + print GetScraper("IE") +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/chrome/__init__.py b/tools/site_compare/scrapers/chrome/__init__.py new file mode 100644 index 0000000..2ba76c4 --- /dev/null +++ b/tools/site_compare/scrapers/chrome/__init__.py @@ -0,0 +1,38 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Selects the appropriate scraper for Chrome.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + +def GetScraper(version): + """Returns the scraper module for the given version. + + Args: + version: version string of Chrome, or None for most recent + + Returns: + scrape module for given version + """ + if version is None: + version = "0.1.101.0" + + parsed_version = [int(x) for x in version.split(".")] + + if (parsed_version[0] > 0 or + parsed_version[1] > 1 or + parsed_version[2] > 97 or + parsed_version[3] > 0): + scraper_version = "chrome011010" + else: + scraper_version = "chrome01970" + + return __import__(scraper_version, globals(), locals(), ['']) + +# if invoked rather than imported, test +if __name__ == "__main__": + version = "0.1.101.0" + + print GetScraper(version).version +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/chrome/chrome011010.py b/tools/site_compare/scrapers/chrome/chrome011010.py new file mode 100644 index 0000000..0b75ff8 --- /dev/null +++ b/tools/site_compare/scrapers/chrome/chrome011010.py @@ -0,0 +1,68 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does scraping for versions of Chrome from 0.1.101.0 up.""" + +from drivers import windowing + +import chromebase + +# Default version +version = "0.1.101.0" + + +def GetChromeRenderPane(wnd): + return windowing.FindChildWindow(wnd, "Chrome_TabContents") + + +def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): + """Invoke a browser, send it to a series of URLs, and save its output. + + Args: + urls: list of URLs to scrape + outdir: directory to place output + size: size of browser window to use + pos: position of browser window + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + None if succeeded, else an error code + """ + chromebase.GetChromeRenderPane = GetChromeRenderPane + + return chromebase.Scrape(urls, outdir, size, pos, timeout, kwargs) + + +def Time(urls, size, timeout, **kwargs): + """Forwards the Time command to chromebase.""" + chromebase.GetChromeRenderPane = GetChromeRenderPane + + return chromebase.Time(urls, size, timeout, kwargs) diff --git a/tools/site_compare/scrapers/chrome/chrome01970.py b/tools/site_compare/scrapers/chrome/chrome01970.py new file mode 100644 index 0000000..bf43095 --- /dev/null +++ b/tools/site_compare/scrapers/chrome/chrome01970.py @@ -0,0 +1,69 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does scraping for versions of Chrome up to 0.1.97.0.""" + +from drivers import windowing + +import chromebase + +# Default version +version = "0.1.97.0" + + +def GetChromeRenderPane(wnd): + return windowing.FindChildWindow(wnd, "Chrome_BrowserWindow") + + +def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): + """Invoke a browser, send it to a series of URLs, and save its output. + + Args: + urls: list of URLs to scrape + outdir: directory to place output + size: size of browser window to use + pos: position of browser window + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + None if succeeded, else an error code + """ + chromebase.GetChromeRenderPane = GetChromeRenderPane + + return chromebase.Scrape(urls, outdir, size, pos, timeout, kwargs) + + +def Time(urls, size, timeout, **kwargs): + """Forwards the Time command to chromebase.""" + chromebase.GetChromeRenderPane = GetChromeRenderPane + + return chromebase.Time(urls, size, timeout, kwargs) +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/chrome/chromebase.py b/tools/site_compare/scrapers/chrome/chromebase.py new file mode 100644 index 0000000..4825049 --- /dev/null +++ b/tools/site_compare/scrapers/chrome/chromebase.py @@ -0,0 +1,217 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does scraping for all currently-known versions of Chrome""" + +import pywintypes +import types + +from drivers import keyboard +from drivers import mouse +from drivers import windowing + +# TODO: this has moved, use some logic to find it. For now, +# expects a subst k:. +DEFAULT_PATH = r"k:\chrome.exe" + +def InvokeBrowser(path): + """Invoke the Chrome browser. + + Args: + path: full path to browser + + Returns: + A tuple of (main window, process handle, address bar, render pane) + """ + + # Reuse an existing instance of the browser if we can find one. This + # may not work correctly, especially if the window is behind other windows. + + # TODO(jhaas): make this work with Vista + wnds = windowing.FindChildWindows(0, "Chrome_XPFrame") + if len(wnds): + wnd = wnds[0] + proc = None + else: + # Invoke Chrome + (proc, wnd) = windowing.InvokeAndWait(path) + + # Get windows we'll need + address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit") + render_pane = GetChromeRenderPane(wnd) + + return (wnd, proc, address_bar, render_pane) + + +def Scrape(urls, outdir, size, pos, timeout, kwargs): + """Invoke a browser, send it to a series of URLs, and save its output. + + Args: + urls: list of URLs to scrape + outdir: directory to place output + size: size of browser window to use + pos: position of browser window + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + None if success, else an error string + """ + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + else: path = DEFAULT_PATH + + (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(wnd, pos, size, render_pane) + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + timedout = False + + for url in urls: + # Double-click in the address bar, type the name, and press Enter + mouse.ClickInWindow(address_bar) + keyboard.TypeString(url, 0.1) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) + timedout = load_time < 0 + + if timedout: + break + + # Scrape the page + image = windowing.ScrapeWindow(render_pane) + + # Save to disk + if "filename" in kwargs: + if callable(kwargs["filename"]): + filename = kwargs["filename"](url) + else: + filename = kwargs["filename"] + else: + filename = windowing.URLtoFilename(url, outdir, ".bmp") + image.save(filename) + + if proc: + windowing.SetForegroundWindow(wnd) + + # Send Alt-F4, then wait for process to end + keyboard.TypeString(r"{\4}", use_modifiers=True) + if not windowing.WaitForProcessExit(proc, timeout): + windowing.EndProcess(proc) + return "crashed" + + if timedout: + return "timeout" + + return None + + +def Time(urls, size, timeout, kwargs): + """Measure how long it takes to load each of a series of URLs + + Args: + urls: list of URLs to time + size: size of browser window to use + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + A list of tuples (url, time). "time" can be "crashed" or "timeout" + """ + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + else: path = DEFAULT_PATH + proc = None + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + ret = [] + for url in urls: + try: + # Invoke the browser if necessary + if not proc: + (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) + + # Double-click in the address bar, type the name, and press Enter + mouse.ClickInWindow(address_bar) + keyboard.TypeString(url, 0.1) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) + + timedout = load_time < 0 + + if timedout: + load_time = "timeout" + + # Send an alt-F4 to make the browser close; if this times out, + # we've probably got a crash + windowing.SetForegroundWindow(wnd) + + keyboard.TypeString(r"{\4}", use_modifiers=True) + if not windowing.WaitForProcessExit(proc, timeout): + windowing.EndProcess(proc) + load_time = "crashed" + proc = None + except pywintypes.error: + proc = None + load_time = "crashed" + + ret.append( (url, load_time) ) + + if proc: + windowing.SetForegroundWindow(wnd) + keyboard.TypeString(r"{\4}", use_modifiers=True) + if not windowing.WaitForProcessExit(proc, timeout): + windowing.EndProcess(proc) + + return ret + + +if __name__ == "__main__": + # We're being invoked rather than imported, so run some tests + path = r"c:\sitecompare\scrapes\chrome\0.1.97.0" + windowing.PreparePath(path) + + # Scrape three sites and save the results + Scrape([ + "http://www.microsoft.com", + "http://www.google.com", + "http://www.sun.com"], + path, (1024, 768), (0, 0)) diff --git a/tools/site_compare/scrapers/firefox/__init__.py b/tools/site_compare/scrapers/firefox/__init__.py new file mode 100644 index 0000000..255dc4b --- /dev/null +++ b/tools/site_compare/scrapers/firefox/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Selects the appropriate scraper for Firefox.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + + +def GetScraper(version): + """Returns the scraper module for the given version. + + Args: + version: version string of IE, or None for most recent + + Returns: + scrape module for given version + """ + + # Pychecker will warn that the parameter is unused; we only + # support one version of Firefox at this time + + # We only have one version of the Firefox scraper for now + return __import__("firefox2", globals(), locals(), ['']) + +# if invoked rather than imported, test +if __name__ == "__main__": + version = "2.0.0.6" + + print GetScraper("2.0.0.6").version +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/firefox/firefox2.py b/tools/site_compare/scrapers/firefox/firefox2.py new file mode 100644 index 0000000..d91534e --- /dev/null +++ b/tools/site_compare/scrapers/firefox/firefox2.py @@ -0,0 +1,269 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does scraping for Firefox 2.0.""" + +import pywintypes +import time +import types + +from drivers import keyboard +from drivers import mouse +from drivers import windowing + +# Default version +version = "2.0.0.6" + +DEFAULT_PATH = r"c:\program files\mozilla firefox\firefox.exe" + +# TODO(jhaas): the Firefox scraper is a bit rickety at the moment. Known +# issues: 1) won't work if the default profile puts toolbars in different +# locations, 2) uses sleep() statements rather than more robust checks, +# 3) fails badly if an existing Firefox window is open when the scrape +# is invoked. This needs to be fortified at some point. + +def GetBrowser(path): + """Invoke the Firefox browser and return the process and window. + + Args: + path: full path to browser + + Returns: + A tuple of (process handle, render pane) + """ + if not path: path = DEFAULT_PATH + + # Invoke Firefox + (proc, wnd) = windowing.InvokeAndWait(path) + + # Get the content pane + render_pane = windowing.FindChildWindow( + wnd, + "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass") + + return (proc, wnd, render_pane) + + +def InvokeBrowser(path): + """Invoke the Firefox browser. + + Args: + path: full path to browser + + Returns: + A tuple of (main window, process handle, render pane) + """ + # Reuse an existing instance of the browser if we can find one. This + # may not work correctly, especially if the window is behind other windows. + wnds = windowing.FindChildWindows(0, "MozillaUIWindowClass") + if len(wnds): + wnd = wnds[0] + proc = None + else: + # Invoke Firefox + (proc, wnd) = windowing.InvokeAndWait(path) + + # Get the content pane + render_pane = windowing.FindChildWindow( + wnd, + "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass") + + return (wnd, proc, render_pane) + + +def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): + """Invoke a browser, send it to a series of URLs, and save its output. + + Args: + urls: list of URLs to scrape + outdir: directory to place output + size: size of browser window to use + pos: position of browser window + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + None if success, else an error string + """ + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + else: path = DEFAULT_PATH + + (wnd, proc, render_pane) = InvokeBrowser(path) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(wnd, pos, size, render_pane) + + time.sleep(3) + + # Firefox is a bit of a pain: it doesn't use standard edit controls, + # and it doesn't display a throbber when there's no tab. Let's make + # sure there's at least one tab, then select the first one + + mouse.ClickInWindow(wnd) + keyboard.TypeString("[t]", True) + mouse.ClickInWindow(wnd, (30, 115)) + time.sleep(2) + + timedout = False + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + for url in urls: + + # Use keyboard shortcuts + keyboard.TypeString("{d}", True) + keyboard.TypeString(url) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout) + timedout = load_time < 0 + + if timedout: + break + + # Scrape the page + image = windowing.ScrapeWindow(render_pane) + + # Save to disk + if "filename" in kwargs: + if callable(kwargs["filename"]): + filename = kwargs["filename"](url) + else: + filename = kwargs["filename"] + else: + filename = windowing.URLtoFilename(url, outdir, ".bmp") + image.save(filename) + + # Close all the tabs, cheesily + mouse.ClickInWindow(wnd) + + while len(windowing.FindChildWindows(0, "MozillaUIWindowClass")): + keyboard.TypeString("[w]", True) + time.sleep(1) + + if timedout: + return "timeout" + + +def Time(urls, size, timeout, **kwargs): + """Measure how long it takes to load each of a series of URLs + + Args: + urls: list of URLs to time + size: size of browser window to use + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + A list of tuples (url, time). "time" can be "crashed" or "timeout" + """ + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + else: path = DEFAULT_PATH + proc = None + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + ret = [] + for url in urls: + try: + # Invoke the browser if necessary + if not proc: + (wnd, proc, render_pane) = InvokeBrowser(path) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) + + time.sleep(3) + + # Firefox is a bit of a pain: it doesn't use standard edit controls, + # and it doesn't display a throbber when there's no tab. Let's make + # sure there's at least one tab, then select the first one + + mouse.ClickInWindow(wnd) + keyboard.TypeString("[t]", True) + mouse.ClickInWindow(wnd, (30, 115)) + time.sleep(2) + + # Use keyboard shortcuts + keyboard.TypeString("{d}", True) + keyboard.TypeString(url) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout) + timedout = load_time < 0 + + if timedout: + load_time = "timeout" + + # Try to close the browser; if this fails it's probably a crash + mouse.ClickInWindow(wnd) + + count = 0 + while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass")) + and count < 5): + keyboard.TypeString("[w]", True) + time.sleep(1) + count = count + 1 + + if len(windowing.FindChildWindows(0, "MozillaUIWindowClass")): + windowing.EndProcess(proc) + load_time = "crashed" + + proc = None + except pywintypes.error: + proc = None + load_time = "crashed" + + ret.append( (url, load_time) ) + + if proc: + count = 0 + while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass")) + and count < 5): + keyboard.TypeString("[w]", True) + time.sleep(1) + count = count + 1 + return ret + + +if __name__ == "__main__": + # We're being invoked rather than imported, so run some tests + path = r"c:\sitecompare\scrapes\Firefox\2.0.0.6" + windowing.PreparePath(path) + + # Scrape three sites and save the results + Scrape( + ["http://www.microsoft.com", "http://www.google.com", + "http://www.sun.com"], + path, (1024, 768), (0, 0)) diff --git a/tools/site_compare/scrapers/ie/__init__.py b/tools/site_compare/scrapers/ie/__init__.py new file mode 100644 index 0000000..4b8949b --- /dev/null +++ b/tools/site_compare/scrapers/ie/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Selects the appropriate scraper for Internet Explorer.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' + + +def GetScraper(version): + """Returns the scraper module for the given version. + + Args: + version: version string of IE, or None for most recent + + Returns: + scrape module for given version + """ + + # Pychecker will warn that the parameter is unused; we only + # support one version of IE at this time + + # We only have one version of the IE scraper for now + return __import__("ie7", globals(), locals(), ['']) + +# if invoked rather than imported, test +if __name__ == "__main__": + version = "7.0.5370.1" + + print GetScraper(version).version +
\ No newline at end of file diff --git a/tools/site_compare/scrapers/ie/ie7.py b/tools/site_compare/scrapers/ie/ie7.py new file mode 100644 index 0000000..a0475e0 --- /dev/null +++ b/tools/site_compare/scrapers/ie/ie7.py @@ -0,0 +1,230 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does scraping for all known versions of IE.""" + +import pywintypes +import time +import types + +from drivers import keyboard +from drivers import mouse +from drivers import windowing + +# Default version +version = "7.0.5730.1" + +DEFAULT_PATH = r"c:\program files\internet explorer\iexplore.exe" + +def GetBrowser(path): + """Invoke the IE browser and return the process, frame, and content window. + + Args: + path: full path to browser + + Returns: + A tuple of (process handle, render pane) + """ + if not path: path = DEFAULT_PATH + + (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path) + return (ieproc, iewnd, render_pane) + + +def InvokeBrowser(path): + """Invoke the IE browser. + + Args: + path: full path to browser + + Returns: + A tuple of (main window, process handle, address bar, + render_pane, tab_window) + """ + # Invoke IE + (ieproc, iewnd) = windowing.InvokeAndWait(path) + + # Get windows we'll need + for tries in xrange(10): + try: + address_bar = windowing.FindChildWindow( + iewnd, "WorkerW|Navigation Bar/ReBarWindow32/" + "Address Band Root/ComboBoxEx32/ComboBox/Edit") + render_pane = windowing.FindChildWindow( + iewnd, "TabWindowClass/Shell DocObject View") + tab_window = windowing.FindChildWindow( + iewnd, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND") + except IndexError: + time.sleep(1) + continue + break + + return (iewnd, ieproc, address_bar, render_pane, tab_window) + + +def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): + """Invoke a browser, send it to a series of URLs, and save its output. + + Args: + urls: list of URLs to scrape + outdir: directory to place output + size: size of browser window to use + pos: position of browser window + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + None if success, else an error string + """ + path = r"c:\program files\internet explorer\iexplore.exe" + + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + + (iewnd, ieproc, address_bar, render_pane, tab_window) = ( + InvokeBrowser(path) ) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane) + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + timedout = False + + for url in urls: + + # Double-click in the address bar, type the name, and press Enter + mouse.DoubleClickInWindow(address_bar) + keyboard.TypeString(url) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber( + tab_window, (6, 8, 22, 24), timeout) + timedout = load_time < 0 + + if timedout: + break + + # Scrape the page + image = windowing.ScrapeWindow(render_pane) + + # Save to disk + if "filename" in kwargs: + if callable(kwargs["filename"]): + filename = kwargs["filename"](url) + else: + filename = kwargs["filename"] + else: + filename = windowing.URLtoFilename(url, outdir, ".bmp") + image.save(filename) + + windowing.EndProcess(ieproc) + + if timedout: + return "timeout" + + +def Time(urls, size, timeout, **kwargs): + """Measure how long it takes to load each of a series of URLs + + Args: + urls: list of URLs to time + size: size of browser window to use + timeout: amount of time to wait for page to load + kwargs: miscellaneous keyword args + + Returns: + A list of tuples (url, time). "time" can be "crashed" or "timeout" + """ + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] + else: path = DEFAULT_PATH + proc = None + + # Visit each URL we're given + if type(urls) in types.StringTypes: urls = [urls] + + ret = [] + for url in urls: + try: + # Invoke the browser if necessary + if not proc: + (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path) + + # Resize and reposition the frame + windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) + + # Double-click in the address bar, type the name, and press Enter + mouse.DoubleClickInWindow(address_bar) + keyboard.TypeString(url) + keyboard.TypeString("\n") + + # Wait for the page to finish loading + load_time = windowing.WaitForThrobber( + tab_window, (6, 8, 22, 24), timeout) + timedout = load_time < 0 + + if timedout: + load_time = "timeout" + + # Send an alt-F4 to make the browser close; if this times out, + # we've probably got a crash + keyboard.TypeString(r"{\4}", use_modifiers=True) + if not windowing.WaitForProcessExit(proc, timeout): + windowing.EndProcess(proc) + load_time = "crashed" + proc = None + except pywintypes.error: + load_time = "crashed" + proc = None + + ret.append( (url, load_time) ) + + # Send an alt-F4 to make the browser close; if this times out, + # we've probably got a crash + if proc: + keyboard.TypeString(r"{\4}", use_modifiers=True) + if not windowing.WaitForProcessExit(proc, timeout): + windowing.EndProcess(proc) + + return ret + + +if __name__ == "__main__": + # We're being invoked rather than imported, so run some tests + path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11" + windowing.PreparePath(path) + + # Scrape three sites and save the results + Scrape( + ["http://www.microsoft.com", + "http://www.google.com", + "http://www.sun.com"], + path, (1024, 768), (0, 0)) diff --git a/tools/site_compare/site_compare.py b/tools/site_compare/site_compare.py new file mode 100644 index 0000000..8acfdcf --- /dev/null +++ b/tools/site_compare/site_compare.py @@ -0,0 +1,202 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare component to handle bulk scrapes. + +Invokes a list of browsers and sends them to a list of URLs, +saving the rendered results to a specified directory, then +performs comparison operations on the resulting bitmaps and +saves the results +""" + + +# This line is necessary to work around a QEMU bug +import _imaging + +import os # Functions for walking the directory tree +import types # Runtime type-checking + +import command_line # command-line parsing +import drivers # Functions for driving keyboard/mouse/windows, OS-specific +import operators # Functions that, given two bitmaps as input, produce + # output depending on the performance of an operation +import scrapers # Functions that know how to capture a render from + # particular browsers + +import commands.compare2 # compare one page in two versions of same browser +import commands.maskmaker # generate a mask based on repeated scrapes +import commands.measure # measure length of time a page takes to load +import commands.scrape # scrape a URL or series of URLs to a bitmap + +# The timeload command is obsolete (too flaky); it may be reinstated +# later but for now it's been superceded by "measure" +# import commands.timeload # measure length of time a page takes to load + +def Scrape(browsers, urls, window_size=(1024, 768), + window_pos=(0, 0), timeout=20, save_path=None, **kwargs): + """Invoke one or more browsers over one or more URLs, scraping renders. + + Args: + browsers: browsers to invoke with optional version strings + urls: URLs to visit + window_size: size of the browser window to display + window_pos: location of browser window + timeout: time (in seconds) to wait for page to load + save_path: root of save path, automatically appended with browser and + version + kwargs: miscellaneous keyword args, passed to scraper + Returns: + None + + @TODO(jhaas): more parameters, or perhaps an indefinite dictionary + parameter, for things like length of time to wait for timeout, speed + of mouse clicks, etc. Possibly on a per-browser, per-URL, or + per-browser-per-URL basis + """ + + if type(browsers) in types.StringTypes: browsers = [browsers] + + if save_path is None: + # default save path is "scrapes" off the current root + save_path = os.path.join(os.path.split(__file__)[0], "Scrapes") + + for browser in browsers: + # Browsers should be tuples of (browser, version) + if type(browser) in types.StringTypes: browser = (browser, None) + scraper = scrapers.GetScraper(browser) + + full_path = os.path.join(save_path, browser[0], scraper.version) + drivers.windowing.PreparePath(full_path) + + scraper.Scrape(urls, full_path, window_size, window_pos, timeout, kwargs) + + +def Compare(base, compare, ops, root_path=None, out_path=None): + """Compares a series of scrapes using a series of operators. + + Args: + base: (browser, version) tuple of version to consider the baseline + compare: (browser, version) tuple of version to compare to + ops: list of operators plus operator arguments + root_path: root of the scrapes + out_path: place to put any output from the operators + + Returns: + None + + @TODO(jhaas): this method will likely change, to provide a robust and + well-defined way of chaining operators, applying operators conditionally, + and full-featured scripting of the operator chain. There also needs + to be better definition of the output; right now it's to stdout and + a log.txt file, with operator-dependent images saved for error output + """ + if root_path is None: + # default save path is "scrapes" off the current root + root_path = os.path.join(os.path.split(__file__)[0], "Scrapes") + + if out_path is None: + out_path = os.path.join(os.path.split(__file__)[0], "Compares") + + if type(base) in types.StringTypes: base = (base, None) + if type(compare) in types.StringTypes: compare = (compare, None) + if type(ops) in types.StringTypes: ops = [ops] + + base_dir = os.path.join(root_path, base[0]) + compare_dir = os.path.join(root_path, compare[0]) + + if base[1] is None: + # base defaults to earliest capture + base = (base[0], max(os.listdir(base_dir))) + + if compare[1] is None: + # compare defaults to latest capture + compare = (compare[0], min(os.listdir(compare_dir))) + + out_path = os.path.join(out_path, base[0], base[1], compare[0], compare[1]) + drivers.windowing.PreparePath(out_path) + + # TODO(jhaas): right now we're just dumping output to a log file + # (and the console), which works as far as it goes but isn't nearly + # robust enough. Change this after deciding exactly what we want to + # change it to. + out_file = open(os.path.join(out_path, "log.txt"), "w") + description_string = ("Comparing %s %s to %s %s" % + (base[0], base[1], compare[0], compare[1])) + out_file.write(description_string) + print description_string + + base_dir = os.path.join(base_dir, base[1]) + compare_dir = os.path.join(compare_dir, compare[1]) + + for filename in os.listdir(base_dir): + out_file.write("%s: " % filename) + + if not os.path.isfile(os.path.join(compare_dir, filename)): + out_file.write("Does not exist in target directory\n") + print "File %s does not exist in target directory" % filename + continue + + base_filename = os.path.join(base_dir, filename) + compare_filename = os.path.join(compare_dir, filename) + + for op in ops: + if type(op) in types.StringTypes: op = (op, None) + + module = operators.GetOperator(op[0]) + + ret = module.Compare(base_filename, compare_filename) + if ret is None: + print "%s: OK" % (filename,) + out_file.write("OK\n") + else: + print "%s: %s" % (filename, ret[0]) + out_file.write("%s\n" % (ret[0])) + ret[1].save(os.path.join(out_path, filename)) + + out_file.close() + + +def main(): + """Main executable. Parse the command line and invoke the command.""" + cmdline = command_line.CommandLine() + + # The below two commands are currently unstable so have been disabled + # commands.compare2.CreateCommand(cmdline) + # commands.maskmaker.CreateCommand(cmdline) + commands.measure.CreateCommand(cmdline) + commands.scrape.CreateCommand(cmdline) + + cmdline.ParseCommandLine() + + + +if __name__ == "__main__": + main() +
\ No newline at end of file diff --git a/tools/site_compare/utils/__init__.py b/tools/site_compare/utils/__init__.py new file mode 100644 index 0000000..69f2237 --- /dev/null +++ b/tools/site_compare/utils/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/python2.4 +# +# Copyright 2007 Google Inc. All Rights Reserved. + +"""Utilities for site_compare.""" + +__author__ = 'jhaas@google.com (Jonathan Haas)' diff --git a/tools/site_compare/utils/browser_iterate.py b/tools/site_compare/utils/browser_iterate.py new file mode 100644 index 0000000..65ba24f --- /dev/null +++ b/tools/site_compare/utils/browser_iterate.py @@ -0,0 +1,225 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Utility to use a browser to visit multiple URLs. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line +import scrapers +import socket +import time + +from drivers import windowing + +# Constants +MAX_URL = 1024 +PORT = 42492 + +def SetupIterationCommandLine(cmd): + """Adds the necessary flags for iteration to a command. + + Args: + cmd: an object created by cmdline.AddCommand + """ + cmd.AddArgument( + ["-b", "--browser"], "Browser to use (ie, firefox, chrome)", + type="string", required=True) + cmd.AddArgument( + ["-b1v", "--browserver"], "Version of browser", metaname="VERSION") + cmd.AddArgument( + ["-p", "--browserpath"], "Path to browser.", + type="string", required=False) + cmd.AddArgument( + ["-u", "--url"], "URL to visit") + cmd.AddArgument( + ["-l", "--list"], "File containing list of URLs to visit", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=300) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + + +def Iterate(command, iteration_func): + """Iterates over a list of URLs, calling a function on each. + + Args: + command: the command line containing the iteration flags + iteration_func: called for each URL with (proc, wnd, url, result) + """ + + # Retrieve the browser scraper to use to invoke the browser + scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) + + def AttachToBrowser(path, timeout): + """Invoke the browser process and connect to the socket.""" + (proc, frame, wnd) = scraper.GetBrowser(path) + + if not wnd: raise ValueError("Could not invoke browser.") + + # Try to connect the socket. If it fails, wait and try + # again. Do this for ten seconds + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) + + for attempt in xrange(10): + try: + s.connect(("localhost", PORT)) + except socket.error: + time.sleep(1) + continue + break + + try: + s.getpeername() + except socket.error: + raise ValueError("Could not connect to browser") + + if command["--size"]: + # Resize and reposition the frame + windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd) + + s.settimeout(timeout) + + Iterate.proc = proc + Iterate.wnd = wnd + Iterate.s = s + + def DetachFromBrowser(): + """Close the socket and kill the process if necessary.""" + if Iterate.s: + Iterate.s.close() + Iterate.s = None + + if Iterate.proc: + if not windowing.WaitForProcessExit(Iterate.proc, 0): + try: + windowing.EndProcess(Iterate.proc) + windowing.WaitForProcessExit(Iterate.proc, 0) + except pywintypes.error: + # Exception here most likely means the process died on its own + pass + Iterate.proc = None + + if command["--browserpath"]: + browser = command["--browserpath"] + else: + browser = None + + # Read the URLs from the file + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + + url_list = [] + file = open(command["--list"], "r") + + for line in xrange(startline-1): + file.readline() + + for line in xrange(endline-startline): + url_list.append(file.readline().strip()) + + timeout = command["--timeout"] + + # Loop through the URLs and send them through the socket + Iterate.s = None + Iterate.proc = None + Iterate.wnd = None + + for url in url_list: + # Invoke the browser if necessary + if not Iterate.proc: + AttachToBrowser(browser, timeout) + # Send the URL and wait for a response + Iterate.s.send(url + "\n") + + response = "" + + while (response.find("\n") < 0): + + try: + recv = Iterate.s.recv(MAX_URL) + response = response + recv + + # Workaround for an oddity: when Firefox closes + # gracefully, somehow Python doesn't detect it. + # (Telnet does) + if not recv: + raise socket.error + + except socket.timeout: + response = url + ",hang\n" + DetachFromBrowser() + except socket.error: + # If there was a socket error, it's probably a crash + response = url + ",crash\n" + DetachFromBrowser() + + # If we received a timeout response, restart the browser + if response[-9:] == ",timeout\n": + DetachFromBrowser() + + # Invoke the iteration function + iteration_func(url, Iterate.proc, Iterate.wnd, response) + + # We're done + DetachFromBrowser() |