summaryrefslogtreecommitdiffstats
path: root/tools/site_compare/scrapers/ie/ie7.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/site_compare/scrapers/ie/ie7.py')
-rw-r--r--tools/site_compare/scrapers/ie/ie7.py66
1 files changed, 33 insertions, 33 deletions
diff --git a/tools/site_compare/scrapers/ie/ie7.py b/tools/site_compare/scrapers/ie/ie7.py
index f5d7583..da26d9b 100644
--- a/tools/site_compare/scrapers/ie/ie7.py
+++ b/tools/site_compare/scrapers/ie/ie7.py
@@ -23,29 +23,29 @@ def GetBrowser(path):
Args:
path: full path to browser
-
+
Returns:
A tuple of (process handle, render pane)
"""
if not path: path = DEFAULT_PATH
-
+
(iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
return (ieproc, iewnd, render_pane)
def InvokeBrowser(path):
"""Invoke the IE browser.
-
+
Args:
path: full path to browser
-
+
Returns:
A tuple of (main window, process handle, address bar,
render_pane, tab_window)
"""
# Invoke IE
(ieproc, iewnd) = windowing.InvokeAndWait(path)
-
+
# Get windows we'll need
for tries in xrange(10):
try:
@@ -60,13 +60,13 @@ def InvokeBrowser(path):
time.sleep(1)
continue
break
-
+
return (iewnd, ieproc, address_bar, render_pane, tab_window)
def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
"""Invoke a browser, send it to a series of URLs, and save its output.
-
+
Args:
urls: list of URLs to scrape
outdir: directory to place output
@@ -74,32 +74,32 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
pos: position of browser window
timeout: amount of time to wait for page to load
kwargs: miscellaneous keyword args
-
+
Returns:
None if success, else an error string
"""
path = r"c:\program files\internet explorer\iexplore.exe"
-
+
if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
(iewnd, ieproc, address_bar, render_pane, tab_window) = (
InvokeBrowser(path) )
-
+
# Resize and reposition the frame
windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane)
-
+
# Visit each URL we're given
if type(urls) in types.StringTypes: urls = [urls]
-
+
timedout = False
-
+
for url in urls:
-
+
# Double-click in the address bar, type the name, and press Enter
mouse.DoubleClickInWindow(address_bar)
keyboard.TypeString(url)
keyboard.TypeString("\n")
-
+
# Wait for the page to finish loading
load_time = windowing.WaitForThrobber(
tab_window, (6, 8, 22, 24), timeout)
@@ -107,10 +107,10 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
if timedout:
break
-
+
# Scrape the page
image = windowing.ScrapeWindow(render_pane)
-
+
# Save to disk
if "filename" in kwargs:
if callable(kwargs["filename"]):
@@ -120,55 +120,55 @@ def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
else:
filename = windowing.URLtoFilename(url, outdir, ".bmp")
image.save(filename)
-
+
windowing.EndProcess(ieproc)
-
+
if timedout:
return "timeout"
-
-
+
+
def Time(urls, size, timeout, **kwargs):
"""Measure how long it takes to load each of a series of URLs
-
+
Args:
urls: list of URLs to time
size: size of browser window to use
timeout: amount of time to wait for page to load
kwargs: miscellaneous keyword args
-
+
Returns:
A list of tuples (url, time). "time" can be "crashed" or "timeout"
"""
if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
else: path = DEFAULT_PATH
proc = None
-
+
# Visit each URL we're given
if type(urls) in types.StringTypes: urls = [urls]
-
+
ret = []
for url in urls:
try:
# Invoke the browser if necessary
if not proc:
(wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
-
+
# Resize and reposition the frame
windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
-
+
# Double-click in the address bar, type the name, and press Enter
mouse.DoubleClickInWindow(address_bar)
keyboard.TypeString(url)
keyboard.TypeString("\n")
-
+
# Wait for the page to finish loading
load_time = windowing.WaitForThrobber(
tab_window, (6, 8, 22, 24), timeout)
timedout = load_time < 0
-
+
if timedout:
load_time = "timeout"
-
+
# Send an alt-F4 to make the browser close; if this times out,
# we've probably got a crash
keyboard.TypeString(r"{\4}", use_modifiers=True)
@@ -179,9 +179,9 @@ def Time(urls, size, timeout, **kwargs):
except pywintypes.error:
load_time = "crashed"
proc = None
-
+
ret.append( (url, load_time) )
-
+
# Send an alt-F4 to make the browser close; if this times out,
# we've probably got a crash
if proc:
@@ -191,7 +191,7 @@ def Time(urls, size, timeout, **kwargs):
return ret
-
+
if __name__ == "__main__":
# We're being invoked rather than imported, so run some tests
path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11"