diff options
Diffstat (limited to 'tools/site_compare/scrapers/ie/ie7.py')
-rw-r--r-- | tools/site_compare/scrapers/ie/ie7.py | 66 |
1 files changed, 33 insertions, 33 deletions
diff --git a/tools/site_compare/scrapers/ie/ie7.py b/tools/site_compare/scrapers/ie/ie7.py index f5d7583..da26d9b 100644 --- a/tools/site_compare/scrapers/ie/ie7.py +++ b/tools/site_compare/scrapers/ie/ie7.py @@ -23,29 +23,29 @@ def GetBrowser(path): Args: path: full path to browser - + Returns: A tuple of (process handle, render pane) """ if not path: path = DEFAULT_PATH - + (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path) return (ieproc, iewnd, render_pane) def InvokeBrowser(path): """Invoke the IE browser. - + Args: path: full path to browser - + Returns: A tuple of (main window, process handle, address bar, render_pane, tab_window) """ # Invoke IE (ieproc, iewnd) = windowing.InvokeAndWait(path) - + # Get windows we'll need for tries in xrange(10): try: @@ -60,13 +60,13 @@ def InvokeBrowser(path): time.sleep(1) continue break - + return (iewnd, ieproc, address_bar, render_pane, tab_window) def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): """Invoke a browser, send it to a series of URLs, and save its output. - + Args: urls: list of URLs to scrape outdir: directory to place output @@ -74,32 +74,32 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): pos: position of browser window timeout: amount of time to wait for page to load kwargs: miscellaneous keyword args - + Returns: None if success, else an error string """ path = r"c:\program files\internet explorer\iexplore.exe" - + if "path" in kwargs and kwargs["path"]: path = kwargs["path"] (iewnd, ieproc, address_bar, render_pane, tab_window) = ( InvokeBrowser(path) ) - + # Resize and reposition the frame windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane) - + # Visit each URL we're given if type(urls) in types.StringTypes: urls = [urls] - + timedout = False - + for url in urls: - + # Double-click in the address bar, type the name, and press Enter mouse.DoubleClickInWindow(address_bar) keyboard.TypeString(url) keyboard.TypeString("\n") - + # Wait for the page to finish loading load_time = windowing.WaitForThrobber( tab_window, (6, 8, 22, 24), timeout) @@ -107,10 +107,10 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): if timedout: break - + # Scrape the page image = windowing.ScrapeWindow(render_pane) - + # Save to disk if "filename" in kwargs: if callable(kwargs["filename"]): @@ -120,55 +120,55 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): else: filename = windowing.URLtoFilename(url, outdir, ".bmp") image.save(filename) - + windowing.EndProcess(ieproc) - + if timedout: return "timeout" - - + + def Time(urls, size, timeout, **kwargs): """Measure how long it takes to load each of a series of URLs - + Args: urls: list of URLs to time size: size of browser window to use timeout: amount of time to wait for page to load kwargs: miscellaneous keyword args - + Returns: A list of tuples (url, time). "time" can be "crashed" or "timeout" """ if "path" in kwargs and kwargs["path"]: path = kwargs["path"] else: path = DEFAULT_PATH proc = None - + # Visit each URL we're given if type(urls) in types.StringTypes: urls = [urls] - + ret = [] for url in urls: try: # Invoke the browser if necessary if not proc: (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path) - + # Resize and reposition the frame windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) - + # Double-click in the address bar, type the name, and press Enter mouse.DoubleClickInWindow(address_bar) keyboard.TypeString(url) keyboard.TypeString("\n") - + # Wait for the page to finish loading load_time = windowing.WaitForThrobber( tab_window, (6, 8, 22, 24), timeout) timedout = load_time < 0 - + if timedout: load_time = "timeout" - + # Send an alt-F4 to make the browser close; if this times out, # we've probably got a crash keyboard.TypeString(r"{\4}", use_modifiers=True) @@ -179,9 +179,9 @@ def Time(urls, size, timeout, **kwargs): except pywintypes.error: load_time = "crashed" proc = None - + ret.append( (url, load_time) ) - + # Send an alt-F4 to make the browser close; if this times out, # we've probably got a crash if proc: @@ -191,7 +191,7 @@ def Time(urls, size, timeout, **kwargs): return ret - + if __name__ == "__main__": # We're being invoked rather than imported, so run some tests path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11" |