1 files changed, 298 insertions, 0 deletions
diff --git a/tools/site_compare/commands/maskmaker.py b/tools/site_compare/commands/maskmaker.py
new file mode 100644
index 0000000..95bdeb45
--- /dev/null
+++ b/tools/site_compare/commands/maskmaker.py
@@ -0,0 +1,298 @@
+#!/usr/bin/python2.4
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Component for automatically creating masks of changing areas of a website.
+
+Works by repeated invokation of a browser and scraping of the resulting page.
+Areas that differ will be added to the auto-generated mask. The mask generator
+considers the mask complete when further scrapes fail to produce any differences
+in the mask.
+"""
+
+import os            # Functions for walking the directory tree
+import tempfile      # Get a temporary directory to hold intermediates
+import time          # Used for sleep() and naming masks by time
+
+import command_line
+import drivers
+from PIL import Image
+from PIL import ImageChops
+import scrapers
+
+
+def CreateCommand(cmdline):
+  """Inserts the command and arguments into a command line for parsing."""
+  cmd = cmdline.AddCommand(
+    ["maskmaker"],
+    "Automatically generates a mask from a list of URLs",
+    ValidateMaskmaker,
+    ExecuteMaskmaker)
+
+  cmd.AddArgument(
+    ["-bp", "--browserpath"], "Full path to browser's executable",
+    type="readfile", metaname="PATH")
+  cmd.AddArgument(
+    ["-b", "--browser"], "Which browser to use", type="string",
+    default="chrome")
+  cmd.AddArgument(
+    ["-bv", "--browserver"], "Version of the browser", metaname="VERSION")
+  cmd.AddArgument(
+    ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR",
+    required=True)
+  cmd.AddArgument(
+    ["-u", "--url"], "URL to compare")
+  cmd.AddArgument(
+    ["-l", "--list"], "List of URLs to compare", type="readfile")
+  cmd.AddMutualExclusion(["--url", "--list"])
+  cmd.AddArgument(
+    ["-s", "--startline"], "First line of URL list", type="int")
+  cmd.AddArgument(
+    ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
+  cmd.AddArgument(
+    ["-c", "--count"], "Number of lines of URL file to use", type="int")
+  cmd.AddDependency("--startline", "--list")
+  cmd.AddRequiredGroup(["--url", "--list"])
+  cmd.AddDependency("--endline", "--list")
+  cmd.AddDependency("--count", "--list")
+  cmd.AddMutualExclusion(["--count", "--endline"])
+  cmd.AddDependency("--count", "--startline")
+  cmd.AddArgument(
+    ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
+    "finish loading",
+    type="int", default=60)
+  cmd.AddArgument(
+    ["-w", "--wait"],
+    "Amount of time (in seconds) to wait between successive scrapes",
+    type="int", default=60)
+  cmd.AddArgument(
+    ["-sc", "--scrapes"],
+    "Number of successive scrapes which must result in no change to a mask "
+    "before mask creation is considered complete", type="int", default=10)
+  cmd.AddArgument(
+    ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
+  cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes")
+  cmd.AddArgument(
+    ["-gu", "--giveup"],
+    "Number of times to scrape before giving up", type="int", default=50)
+  cmd.AddArgument(
+    ["-th", "--threshhold"],
+    "Percentage of different pixels (0-100) above which the scrape will be"
+    "discarded and the mask not updated.", type="int", default=100)
+  cmd.AddArgument(
+    ["--er", "--errors"],
+    "Number of times a scrape can fail before giving up on the URL.",
+    type="int", default=1)
+
+
+def ValidateMaskmaker(command):
+  """Validate the arguments to maskmaker. Raises ParseError if failed."""
+  executables = [".exe", ".com", ".bat"]
+  if command["--browserpath"]:
+    if os.path.splitext(command["--browserpath"])[1].lower() not in executables:
+      raise command_line.ParseError("Browser filename must be an executable")
+
+
+def ExecuteMaskmaker(command):
+  """Performs automatic mask generation."""
+  
+  # Get the list of URLs to generate masks for
+  class MaskmakerURL(object):
+    """Helper class for holding information about a URL passed to maskmaker."""
+    __slots__ = ['url', 'consecutive_successes', 'errors']
+    def __init__(self, url):
+      self.url = url
+      self.consecutive_successes = 0
+      self.errors = 0
+      
+  if command["--url"]:
+    url_list = [MaskmakerURL(command["--url"])]
+  else:
+    startline = command["--startline"]
+    if command["--count"]:
+      endline = startline+command["--count"]
+    else:
+      endline = command["--endline"]
+    url_list = [MaskmakerURL(url.strip()) for url in
+                open(command["--list"], "r").readlines()[startline:endline]]
+    
+  complete_list = []
+  error_list = []
+  
+  outdir = command["--outdir"]
+  scrapes = command["--scrapes"]
+  errors = command["--errors"]
+  size = command["--size"]
+  scrape_pass = 0
+  
+  scrapedir = command["--scrapedir"]
+  if not scrapedir: scrapedir = tempfile.gettempdir()
+   
+  # Get the scraper
+  scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
+  
+  # Repeatedly iterate through the list of URLs until either every URL has
+  # a successful mask or too many errors, or we've exceeded the giveup limit
+  while url_list and scrape_pass < command["--giveup"]:
+    # Scrape each URL
+    for url in url_list:
+      print "Processing %r..." % url.url
+      mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp")
+
+      # Load the existing mask. This is in a loop so we can try to recover
+      # from error conditions
+      while True:
+        try:
+          mask = Image.open(mask_filename)
+          if mask.size != size:
+            print "  %r already exists and is the wrong size! (%r vs %r)" % (
+              mask_filename, mask.size, size)
+            mask_filename = "%s_%r%s" % (
+              mask_filename[:-4], size, mask_filename[-4:])
+            print "  Trying again as %r..." % mask_filename
+            continue
+          break
+        except IOError:
+          print "  %r does not exist, creating" % mask_filename
+          mask = Image.new("1", size, 1)
+          mask.save(mask_filename)
+      
+      # Find the stored scrape path
+      mask_scrape_dir = os.path.join(
+        scrapedir, os.path.splitext(os.path.basename(mask_filename))[0])
+      drivers.windowing.PreparePath(mask_scrape_dir)
+      
+      # Find the baseline image
+      mask_scrapes = os.listdir(mask_scrape_dir)
+      mask_scrapes.sort()
+      
+      if not mask_scrapes:
+        print "  No baseline image found, mask will not be updated"
+        baseline = None
+      else:
+        baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0]))
+        
+      mask_scrape_filename = os.path.join(mask_scrape_dir,
+                                          time.strftime("%y%m%d-%H%M%S.bmp"))
+      
+      # Do the scrape
+      result = scraper.Scrape(
+        [url.url], mask_scrape_dir, size, (0, 0),
+        command["--timeout"], path=command["--browserpath"],
+        filename=mask_scrape_filename)
+      
+      if result:
+        # Return value other than None means an error
+        print "  Scrape failed with error '%r'" % result
+        url.errors += 1
+        if url.errors >= errors:
+          print "  ** Exceeded maximum error count for this URL, giving up"
+        continue
+      
+      # Load the new scrape
+      scrape = Image.open(mask_scrape_filename)
+      
+      # Calculate the difference between the new scrape and the baseline,
+      # subject to the current mask
+      if baseline:
+        diff = ImageChops.multiply(ImageChops.difference(scrape, baseline),
+                                   mask.convert(scrape.mode))
+        
+        # If the difference is none, there's nothing to update
+        if max(diff.getextrema()) == (0, 0):
+          print "  Scrape identical to baseline, no change in mask"
+          url.consecutive_successes += 1
+          if url.consecutive_successes >= scrapes:
+            print "  ** No change for %r scrapes, done!" % scrapes
+        else:
+          # convert the difference to black and white, then change all
+          # black pixels (where the scrape and the baseline were identical)
+          # to white, all others (where the scrape and the baseline differed)
+          # to black.
+          #
+          # Since the below command is a little unclear, here's how it works.
+          #    1. convert("L") converts the RGB image to grayscale
+          #    2. point() maps grayscale values (or the individual channels)
+          #       of an RGB image) to different ones. Because it operates on
+          #       individual channels, the grayscale conversion from step 1
+          #       is necessary.
+          #    3. The "1" second parameter to point() outputs the result as
+          #       a monochrome bitmap. If the original RGB image were converted
+          #       directly to monochrome, PIL would dither it.
+          diff = diff.convert("L").point([255]+[0]*255, "1")
+          
+          # count the number of different pixels
+          diff_pixels = diff.getcolors()[0][0]
+          
+          # is this too much?
+          diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1])
+          if diff_pixel_percent > command["--threshhold"]:
+            print ("  Scrape differed from baseline by %.2f percent, ignoring"
+                   % diff_pixel_percent)
+          else:
+            print "  Scrape differed in %d pixels, updating mask" % diff_pixels
+            mask = ImageChops.multiply(mask, diff)
+            mask.save(mask_filename)
+            
+            # reset the number of consecutive "good" scrapes
+            url.consecutive_successes = 0
+      
+    # Remove URLs whose mask is deemed done
+    complete_list.extend(
+      [url for url in url_list if url.consecutive_successes >= scrapes])
+    error_list.extend(
+      [url for url in url_list if url.errors >= errors])
+    url_list = [
+      url for url in url_list if
+      url.consecutive_successes < scrapes and
+      url.errors < errors]
+    
+    scrape_pass += 1
+    print "**Done with scrape pass %d\n" % scrape_pass
+    
+    if scrape_pass >= command["--giveup"]:
+      print "**Exceeded giveup threshhold. Giving up."
+    else:
+      print "Waiting %d seconds..." % command["--wait"]
+      time.sleep(command["--wait"])
+  
+  print
+  print "*** MASKMAKER COMPLETE ***"
+  print "Summary report:"
+  print "  %d masks successfully generated" % len(complete_list)
+  for url in complete_list:
+    print "    ", url.url
+  print "  %d masks failed with too many errors" % len(error_list)
+  for url in error_list:
+    print "    ", url.url
+  if scrape_pass >= command["--giveup"]:
+    print ("  %d masks were not completed before "
+           "reaching the giveup threshhold" % len(url_list))
+    for url in url_list:
+      print "    ", url.url