diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:12:16 +0000 |
commit | 920c091ac3ee15079194c82ae8a7a18215f3f23c (patch) | |
tree | d28515d1e7732e2b6d077df1b4855ace3f4ac84f /tools/site_compare/commands | |
parent | ae2c20f398933a9e86c387dcc465ec0f71065ffc (diff) | |
download | chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.zip chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.gz chromium_src-920c091ac3ee15079194c82ae8a7a18215f3f23c.tar.bz2 |
Add tools to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/site_compare/commands')
-rw-r--r-- | tools/site_compare/commands/__init__.py | 2 | ||||
-rw-r--r-- | tools/site_compare/commands/compare2.py | 196 | ||||
-rw-r--r-- | tools/site_compare/commands/maskmaker.py | 298 | ||||
-rw-r--r-- | tools/site_compare/commands/measure.py | 78 | ||||
-rw-r--r-- | tools/site_compare/commands/scrape.py | 85 | ||||
-rw-r--r-- | tools/site_compare/commands/timeload.py | 170 |
6 files changed, 829 insertions, 0 deletions
diff --git a/tools/site_compare/commands/__init__.py b/tools/site_compare/commands/__init__.py new file mode 100644 index 0000000..a699508 --- /dev/null +++ b/tools/site_compare/commands/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/Python2.4 + diff --git a/tools/site_compare/commands/compare2.py b/tools/site_compare/commands/compare2.py new file mode 100644 index 0000000..6dc00c7 --- /dev/null +++ b/tools/site_compare/commands/compare2.py @@ -0,0 +1,196 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare command to invoke the same page in two versions of a browser. + +Does the easiest compatibility test: equality comparison between two different +versions of the same browser. Invoked with a series of command line options +that specify which URLs to check, which browser to use, where to store results, +etc. +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates + +import command_line +import drivers # Functions for driving keyboard/mouse/windows, OS-specific +import operators # Functions that, given two bitmaps as input, produce + # output depending on the performance of an operation +import scrapers # Functions that know how to capture a render from + # particular browsers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["compare2"], + "Compares the output of two browsers on the same URL or list of URLs", + ValidateCompare2, + ExecuteCompare2) + + cmd.AddArgument( + ["-b1", "--browser1"], "Full path to first browser's executable", + type="readfile", metaname="PATH", required=True) + cmd.AddArgument( + ["-b2", "--browser2"], "Full path to second browser's executable", + type="readfile", metaname="PATH", required=True) + cmd.AddArgument( + ["-b", "--browser"], "Which browser to use", type="string", + default="chrome") + cmd.AddArgument( + ["-b1v", "--browser1ver"], "Version of first browser", metaname="VERSION") + cmd.AddArgument( + ["-b2v", "--browser2ver"], "Version of second browser", metaname="VERSION") + cmd.AddArgument( + ["-b1n", "--browser1name"], "Optional name for first browser (used in " + "directory to hold intermediate files)", metaname="NAME") + cmd.AddArgument( + ["-b2n", "--browser2name"], "Optional name for second browser (used in " + "directory to hold intermediate files)", metaname="NAME") + cmd.AddArgument( + ["-o", "--outdir"], "Directory to store scrape files", metaname="DIR") + cmd.AddArgument( + ["-u", "--url"], "URL to compare") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to compare", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + cmd.AddArgument( + ["-m", "--maskdir"], "Path that holds masks to use for comparison") + cmd.AddArgument( + ["-d", "--diffdir"], "Path to hold the difference of comparisons that fail") + + +def ValidateCompare2(command): + """Validate the arguments to compare2. Raises ParseError if failed.""" + executables = [".exe", ".com", ".bat"] + if (os.path.splitext(command["--browser1"])[1].lower() not in executables or + os.path.splitext(command["--browser2"])[1].lower() not in executables): + raise command_line.ParseError("Browser filename must be an executable") + + +def ExecuteCompare2(command): + """Executes the Compare2 command.""" + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [url.strip() for url in + open(command["--list"], "r").readlines()[startline:endline]] + + log_file = open(command["--logfile"], "w") + + outdir = command["--outdir"] + if not outdir: outdir = tempfile.gettempdir() + + scrape_info_list = [] + + class ScrapeInfo(object): + """Helper class to hold information about a scrape.""" + __slots__ = ["browser_path", "scraper", "outdir", "result"] + + for index in xrange(1, 3): + scrape_info = ScrapeInfo() + scrape_info.browser_path = command["--browser%d" % index] + scrape_info.scraper = scrapers.GetScraper( + (command["--browser"], command["--browser%dver" % index])) + + if command["--browser%dname" % index]: + scrape_info.outdir = os.path.join(outdir, + command["--browser%dname" % index]) + else: + scrape_info.outdir = os.path.join(outdir, str(index)) + + drivers.windowing.PreparePath(scrape_info.outdir) + scrape_info_list.append(scrape_info) + + compare = operators.GetOperator("equals_with_mask") + + for url in url_list: + success = True + + for scrape_info in scrape_info_list: + scrape_info.result = scrape_info.scraper.Scrape( + [url], scrape_info.outdir, command["--size"], (0, 0), + command["--timeout"], path=scrape_info.browser_path) + + if not scrape_info.result: + scrape_info.result = "success" + else: + success = False + + result = "unknown" + + if success: + result = "equal" + + file1 = drivers.windowing.URLtoFilename( + url, scrape_info_list[0].outdir, ".bmp") + file2 = drivers.windowing.URLtoFilename( + url, scrape_info_list[1].outdir, ".bmp") + + comparison_result = compare.Compare(file1, file2, + maskdir=command["--maskdir"]) + + if comparison_result is not None: + result = "not-equal" + + if command["--diffdir"]: + comparison_result[1].save( + drivers.windowing.URLtoFilename(url, command["--diffdir"], ".bmp")) + + # TODO(jhaas): maybe use the logging module rather than raw file writes + log_file.write("%s %s %s %s\n" % (url, + scrape_info_list[0].result, + scrape_info_list[1].result, + result)) diff --git a/tools/site_compare/commands/maskmaker.py b/tools/site_compare/commands/maskmaker.py new file mode 100644 index 0000000..95bdeb45 --- /dev/null +++ b/tools/site_compare/commands/maskmaker.py @@ -0,0 +1,298 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Component for automatically creating masks of changing areas of a website. + +Works by repeated invokation of a browser and scraping of the resulting page. +Areas that differ will be added to the auto-generated mask. The mask generator +considers the mask complete when further scrapes fail to produce any differences +in the mask. +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates +import time # Used for sleep() and naming masks by time + +import command_line +import drivers +from PIL import Image +from PIL import ImageChops +import scrapers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["maskmaker"], + "Automatically generates a mask from a list of URLs", + ValidateMaskmaker, + ExecuteMaskmaker) + + cmd.AddArgument( + ["-bp", "--browserpath"], "Full path to browser's executable", + type="readfile", metaname="PATH") + cmd.AddArgument( + ["-b", "--browser"], "Which browser to use", type="string", + default="chrome") + cmd.AddArgument( + ["-bv", "--browserver"], "Version of the browser", metaname="VERSION") + cmd.AddArgument( + ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR", + required=True) + cmd.AddArgument( + ["-u", "--url"], "URL to compare") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to compare", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-w", "--wait"], + "Amount of time (in seconds) to wait between successive scrapes", + type="int", default=60) + cmd.AddArgument( + ["-sc", "--scrapes"], + "Number of successive scrapes which must result in no change to a mask " + "before mask creation is considered complete", type="int", default=10) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes") + cmd.AddArgument( + ["-gu", "--giveup"], + "Number of times to scrape before giving up", type="int", default=50) + cmd.AddArgument( + ["-th", "--threshhold"], + "Percentage of different pixels (0-100) above which the scrape will be" + "discarded and the mask not updated.", type="int", default=100) + cmd.AddArgument( + ["--er", "--errors"], + "Number of times a scrape can fail before giving up on the URL.", + type="int", default=1) + + +def ValidateMaskmaker(command): + """Validate the arguments to maskmaker. Raises ParseError if failed.""" + executables = [".exe", ".com", ".bat"] + if command["--browserpath"]: + if os.path.splitext(command["--browserpath"])[1].lower() not in executables: + raise command_line.ParseError("Browser filename must be an executable") + + +def ExecuteMaskmaker(command): + """Performs automatic mask generation.""" + + # Get the list of URLs to generate masks for + class MaskmakerURL(object): + """Helper class for holding information about a URL passed to maskmaker.""" + __slots__ = ['url', 'consecutive_successes', 'errors'] + def __init__(self, url): + self.url = url + self.consecutive_successes = 0 + self.errors = 0 + + if command["--url"]: + url_list = [MaskmakerURL(command["--url"])] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [MaskmakerURL(url.strip()) for url in + open(command["--list"], "r").readlines()[startline:endline]] + + complete_list = [] + error_list = [] + + outdir = command["--outdir"] + scrapes = command["--scrapes"] + errors = command["--errors"] + size = command["--size"] + scrape_pass = 0 + + scrapedir = command["--scrapedir"] + if not scrapedir: scrapedir = tempfile.gettempdir() + + # Get the scraper + scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) + + # Repeatedly iterate through the list of URLs until either every URL has + # a successful mask or too many errors, or we've exceeded the giveup limit + while url_list and scrape_pass < command["--giveup"]: + # Scrape each URL + for url in url_list: + print "Processing %r..." % url.url + mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp") + + # Load the existing mask. This is in a loop so we can try to recover + # from error conditions + while True: + try: + mask = Image.open(mask_filename) + if mask.size != size: + print " %r already exists and is the wrong size! (%r vs %r)" % ( + mask_filename, mask.size, size) + mask_filename = "%s_%r%s" % ( + mask_filename[:-4], size, mask_filename[-4:]) + print " Trying again as %r..." % mask_filename + continue + break + except IOError: + print " %r does not exist, creating" % mask_filename + mask = Image.new("1", size, 1) + mask.save(mask_filename) + + # Find the stored scrape path + mask_scrape_dir = os.path.join( + scrapedir, os.path.splitext(os.path.basename(mask_filename))[0]) + drivers.windowing.PreparePath(mask_scrape_dir) + + # Find the baseline image + mask_scrapes = os.listdir(mask_scrape_dir) + mask_scrapes.sort() + + if not mask_scrapes: + print " No baseline image found, mask will not be updated" + baseline = None + else: + baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0])) + + mask_scrape_filename = os.path.join(mask_scrape_dir, + time.strftime("%y%m%d-%H%M%S.bmp")) + + # Do the scrape + result = scraper.Scrape( + [url.url], mask_scrape_dir, size, (0, 0), + command["--timeout"], path=command["--browserpath"], + filename=mask_scrape_filename) + + if result: + # Return value other than None means an error + print " Scrape failed with error '%r'" % result + url.errors += 1 + if url.errors >= errors: + print " ** Exceeded maximum error count for this URL, giving up" + continue + + # Load the new scrape + scrape = Image.open(mask_scrape_filename) + + # Calculate the difference between the new scrape and the baseline, + # subject to the current mask + if baseline: + diff = ImageChops.multiply(ImageChops.difference(scrape, baseline), + mask.convert(scrape.mode)) + + # If the difference is none, there's nothing to update + if max(diff.getextrema()) == (0, 0): + print " Scrape identical to baseline, no change in mask" + url.consecutive_successes += 1 + if url.consecutive_successes >= scrapes: + print " ** No change for %r scrapes, done!" % scrapes + else: + # convert the difference to black and white, then change all + # black pixels (where the scrape and the baseline were identical) + # to white, all others (where the scrape and the baseline differed) + # to black. + # + # Since the below command is a little unclear, here's how it works. + # 1. convert("L") converts the RGB image to grayscale + # 2. point() maps grayscale values (or the individual channels) + # of an RGB image) to different ones. Because it operates on + # individual channels, the grayscale conversion from step 1 + # is necessary. + # 3. The "1" second parameter to point() outputs the result as + # a monochrome bitmap. If the original RGB image were converted + # directly to monochrome, PIL would dither it. + diff = diff.convert("L").point([255]+[0]*255, "1") + + # count the number of different pixels + diff_pixels = diff.getcolors()[0][0] + + # is this too much? + diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1]) + if diff_pixel_percent > command["--threshhold"]: + print (" Scrape differed from baseline by %.2f percent, ignoring" + % diff_pixel_percent) + else: + print " Scrape differed in %d pixels, updating mask" % diff_pixels + mask = ImageChops.multiply(mask, diff) + mask.save(mask_filename) + + # reset the number of consecutive "good" scrapes + url.consecutive_successes = 0 + + # Remove URLs whose mask is deemed done + complete_list.extend( + [url for url in url_list if url.consecutive_successes >= scrapes]) + error_list.extend( + [url for url in url_list if url.errors >= errors]) + url_list = [ + url for url in url_list if + url.consecutive_successes < scrapes and + url.errors < errors] + + scrape_pass += 1 + print "**Done with scrape pass %d\n" % scrape_pass + + if scrape_pass >= command["--giveup"]: + print "**Exceeded giveup threshhold. Giving up." + else: + print "Waiting %d seconds..." % command["--wait"] + time.sleep(command["--wait"]) + + print + print "*** MASKMAKER COMPLETE ***" + print "Summary report:" + print " %d masks successfully generated" % len(complete_list) + for url in complete_list: + print " ", url.url + print " %d masks failed with too many errors" % len(error_list) + for url in error_list: + print " ", url.url + if scrape_pass >= command["--giveup"]: + print (" %d masks were not completed before " + "reaching the giveup threshhold" % len(url_list)) + for url in url_list: + print " ", url.url diff --git a/tools/site_compare/commands/measure.py b/tools/site_compare/commands/measure.py new file mode 100644 index 0000000..477db57 --- /dev/null +++ b/tools/site_compare/commands/measure.py @@ -0,0 +1,78 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Command for measuring how long pages take to load in a browser. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line +import win32process + +from drivers import windowing +from utils import browser_iterate + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["measure"], + "Measures how long a series of URLs takes to load in one or more browsers.", + None, + ExecuteMeasure) + + browser_iterate.SetupIterationCommandLine(cmd) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + + +def ExecuteMeasure(command): + """Executes the Measure command.""" + + def LogResult(url, proc, wnd, result): + """Write the result of the browse to the log file.""" + log_file.write(result) + + log_file = open(command["--logfile"], "w") + + browser_iterate.Iterate(command, LogResult) + + # Close the log file and return. We're done. + log_file.close() diff --git a/tools/site_compare/commands/scrape.py b/tools/site_compare/commands/scrape.py new file mode 100644 index 0000000..a9b3398 --- /dev/null +++ b/tools/site_compare/commands/scrape.py @@ -0,0 +1,85 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Command for scraping images from a URL or list of URLs. + +Prerequisites: + 1. The command_line package from tools/site_compare + 2. Either the IE BHO or Firefox extension (or both) + +Installation: + 1. Build the IE BHO, or call regsvr32 on a prebuilt binary + 2. Add a file called "measurepageloadtimeextension@google.com" to + the default Firefox profile directory under extensions, containing + the path to the Firefox extension root + +Invoke with the command line arguments as documented within +the command line. +""" + +import command_line + +from drivers import windowing +from utils import browser_iterate + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["scrape"], + "Scrapes an image from a URL or series of URLs.", + None, + ExecuteScrape) + + browser_iterate.SetupIterationCommandLine(cmd) + cmd.AddArgument( + ["-log", "--logfile"], "File to write text output", type="string") + cmd.AddArgument( + ["-out", "--outdir"], "Directory to store scrapes", type="string", required=True) + + +def ExecuteScrape(command): + """Executes the Scrape command.""" + + def ScrapeResult(url, proc, wnd, result): + """Capture and save the scrape.""" + if log_file: log_file.write(result) + + # Scrape the page + image = windowing.ScrapeWindow(wnd) + filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp") + image.save(filename) + + if command["--logfile"]: log_file = open(command["--logfile"], "w") + else: log_file = None + + browser_iterate.Iterate(command, ScrapeResult) + + # Close the log file and return. We're done. + if log_file: log_file.close() diff --git a/tools/site_compare/commands/timeload.py b/tools/site_compare/commands/timeload.py new file mode 100644 index 0000000..a983173 --- /dev/null +++ b/tools/site_compare/commands/timeload.py @@ -0,0 +1,170 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SiteCompare command to time page loads + +Loads a series of URLs in a series of browsers (and browser versions) +and measures how long the page takes to load in each. Outputs a +comma-delimited file. The first line is "URL,[browser names", each +additional line is a URL follored by comma-delimited times (in seconds), +or the string "timeout" or "crashed". + +""" + +import os # Functions for walking the directory tree +import tempfile # Get a temporary directory to hold intermediates + +import command_line +import drivers # Functions for driving keyboard/mouse/windows, OS-specific +import operators # Functions that, given two bitmaps as input, produce + # output depending on the performance of an operation +import scrapers # Functions that know how to capture a render from + # particular browsers + + +def CreateCommand(cmdline): + """Inserts the command and arguments into a command line for parsing.""" + cmd = cmdline.AddCommand( + ["timeload"], + "Measures how long a series of URLs takes to load in one or more browsers.", + None, + ExecuteTimeLoad) + + cmd.AddArgument( + ["-b", "--browsers"], "List of browsers to use. Comma-separated", + type="string", required=True) + cmd.AddArgument( + ["-bp", "--browserpaths"], "List of paths to browsers. Comma-separated", + type="string", required=False) + cmd.AddArgument( + ["-bv", "--browserversions"], "List of versions of browsers. Comma-separated", + type="string", required=False) + cmd.AddArgument( + ["-u", "--url"], "URL to time") + cmd.AddArgument( + ["-l", "--list"], "List of URLs to time", type="readfile") + cmd.AddMutualExclusion(["--url", "--list"]) + cmd.AddArgument( + ["-s", "--startline"], "First line of URL list", type="int") + cmd.AddArgument( + ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") + cmd.AddArgument( + ["-c", "--count"], "Number of lines of URL file to use", type="int") + cmd.AddDependency("--startline", "--list") + cmd.AddRequiredGroup(["--url", "--list"]) + cmd.AddDependency("--endline", "--list") + cmd.AddDependency("--count", "--list") + cmd.AddMutualExclusion(["--count", "--endline"]) + cmd.AddDependency("--count", "--startline") + cmd.AddArgument( + ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " + "finish loading", + type="int", default=60) + cmd.AddArgument( + ["-log", "--logfile"], "File to write output", type="string", required=True) + cmd.AddArgument( + ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") + + +def ExecuteTimeLoad(command): + """Executes the TimeLoad command.""" + browsers = command["--browsers"].split(",") + num_browsers = len(browsers) + + if command["--browserversions"]: + browser_versions = command["--browserversions"].split(",") + else: + browser_versions = [None] * num_browsers + + if command["--browserpaths"]: + browser_paths = command["--browserpaths"].split(",") + else: + browser_paths = [None] * num_browsers + + if len(browser_versions) != num_browsers: + raise ValueError( + "--browserversions must be same length as --browser_paths") + if len(browser_paths) != num_browsers: + raise ValueError( + "--browserversions must be same length as --browser_paths") + + if [b for b in browsers if b not in ["chrome", "ie", "firefox"]]: + raise ValueError("unknown browsers: %r" % b) + + scraper_list = [] + + for b in xrange(num_browsers): + version = browser_versions[b] + if not version: version = None + + scraper = scrapers.GetScraper( (browsers[b], version) ) + if not scraper: + raise ValueError("could not find scraper for (%r, %r)" % + (browsers[b], version)) + scraper_list.append(scraper) + + if command["--url"]: + url_list = [command["--url"]] + else: + startline = command["--startline"] + if command["--count"]: + endline = startline+command["--count"] + else: + endline = command["--endline"] + url_list = [url.strip() for url in + open(command["--list"], "r").readlines()[startline:endline]] + + log_file = open(command["--logfile"], "w") + + log_file.write("URL") + for b in xrange(num_browsers): + log_file.write(",%s" % browsers[b]) + + if browser_versions[b]: log_file.write(" %s" % browser_versions[b]) + log_file.write("\n") + + results = {} + for url in url_list: + results[url] = [None] * num_browsers + + for b in xrange(num_browsers): + result = scraper_list[b].Time(url_list, command["--size"], + command["--timeout"], + path=browser_paths[b]) + + for (url, time) in result: + results[url][b] = time + + # output the results + for url in url_list: + log_file.write(url) + for b in xrange(num_browsers): + log_file.write(",%r" % results[url][b]) + |