1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
|
#!/usr/bin/python2.4
# Copyright 2008, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""SiteCompare module for invoking, locating, and manipulating windows.
This module is a catch-all wrapper for operating system UI functionality
that doesn't belong in other modules. It contains functions for finding
particular windows, scraping their contents, and invoking processes to
create them.
"""
import os
import string
import time
import PIL.ImageGrab
import pywintypes
import win32event
import win32gui
import win32process
def FindChildWindows(hwnd, path):
"""Find a set of windows through a path specification.
Args:
hwnd: Handle of the parent window
path: Path to the window to find. Has the following form:
"foo/bar/baz|foobar/|foobarbaz"
The slashes specify the "path" to the child window.
The text is the window class, a pipe (if present) is a title.
* is a wildcard and will find all child windows at that level
Returns:
A list of the windows that were found
"""
windows_to_check = [hwnd]
# The strategy will be to take windows_to_check and use it
# to find a list of windows that match the next specification
# in the path, then repeat with the list of found windows as the
# new list of windows to check
for segment in path.split("/"):
windows_found = []
check_values = segment.split("|")
# check_values is now a list with the first element being
# the window class, the second being the window caption.
# If the class is absent (or wildcarded) set it to None
if check_values[0] == "*" or not check_values[0]: check_values[0] = None
# If the window caption is also absent, force it to None as well
if len(check_values) == 1: check_values.append(None)
# Loop through the list of windows to check
for window_check in windows_to_check:
window_found = None
while window_found != 0: # lint complains, but 0 != None
if window_found is None: window_found = 0
try:
# Look for the next sibling (or first sibling if window_found is 0)
# of window_check with the specified caption and/or class
window_found = win32gui.FindWindowEx(
window_check, window_found, check_values[0], check_values[1])
except pywintypes.error, e:
# FindWindowEx() raises error 2 if not found
if e[0] == 2:
window_found = 0
else:
raise e
# If FindWindowEx struck gold, add to our list of windows found
if window_found: windows_found.append(window_found)
# The windows we found become the windows to check for the next segment
windows_to_check = windows_found
return windows_found
def FindChildWindow(hwnd, path):
"""Find a window through a path specification.
This method is a simple wrapper for FindChildWindows() for the
case (the majority case) where you expect to find a single window
Args:
hwnd: Handle of the parent window
path: Path to the window to find. See FindChildWindows()
Returns:
The window that was found
"""
return FindChildWindows(hwnd, path)[0]
def ScrapeWindow(hwnd, rect=None):
"""Scrape a visible window and return its contents as a bitmap.
Args:
hwnd: handle of the window to scrape
rect: rectangle to scrape in client coords, defaults to the whole thing
If specified, it's a 4-tuple of (left, top, right, bottom)
Returns:
An Image containing the scraped data
"""
# Activate the window
SetForegroundWindow(hwnd)
# If no rectangle was specified, use the fill client rectangle
if not rect: rect = win32gui.GetClientRect(hwnd)
upper_left = win32gui.ClientToScreen(hwnd, (rect[0], rect[1]))
lower_right = win32gui.ClientToScreen(hwnd, (rect[2], rect[3]))
rect = upper_left+lower_right
return PIL.ImageGrab.grab(rect)
def SetForegroundWindow(hwnd):
"""Bring a window to the foreground."""
win32gui.SetForegroundWindow(hwnd)
def InvokeAndWait(path, cmdline="", timeout=10, tick=1.):
"""Invoke an application and wait for it to bring up a window.
Args:
path: full path to the executable to invoke
cmdline: command line to pass to executable
timeout: how long (in seconds) to wait before giving up
tick: length of time to wait between checks
Returns:
A tuple of handles to the process and the application's window,
or (None, None) if it timed out waiting for the process
"""
def EnumWindowProc(hwnd, ret):
"""Internal enumeration func, checks for visibility and proper PID."""
if win32gui.IsWindowVisible(hwnd): # don't bother even checking hidden wnds
pid = win32process.GetWindowThreadProcessId(hwnd)[1]
if pid == ret[0]:
ret[1] = hwnd
return 0 # 0 means stop enumeration
return 1 # 1 means continue enumeration
# We don't need to change anything about the startupinfo structure
# (the default is quite sufficient) but we need to create it just the
# same.
sinfo = win32process.STARTUPINFO()
proc = win32process.CreateProcess(
path, # path to new process's executable
cmdline, # application's command line
None, # process security attributes (default)
None, # thread security attributes (default)
False, # inherit parent's handles
0, # creation flags
None, # environment variables
None, # directory
sinfo) # default startup info
# Create process returns (prochandle, pid, threadhandle, tid). At
# some point we may care about the other members, but for now, all
# we're after is the pid
pid = proc[2]
# Enumeration APIs can take an arbitrary integer, usually a pointer,
# to be passed to the enumeration function. We'll pass a pointer to
# a structure containing the PID we're looking for, and an empty out
# parameter to hold the found window ID
ret = [pid, None]
tries_until_timeout = timeout/tick
num_tries = 0
# Enumerate top-level windows, look for one with our PID
while num_tries < tries_until_timeout and ret[1] is None:
try:
win32gui.EnumWindows(EnumWindowProc, ret)
except pywintypes.error, e:
# error 0 isn't an error, it just meant the enumeration was
# terminated early
if e[0]: raise e
time.sleep(tick)
num_tries += 1
# TODO(jhaas): Should we throw an exception if we timeout? Or is returning
# a window ID of None sufficient?
return (proc[0], ret[1])
def WaitForProcessExit(proc, timeout=None):
"""Waits for a given process to terminate.
Args:
proc: handle to process
timeout: timeout (in seconds). None = wait indefinitely
Returns:
True if process ended, False if timed out
"""
if timeout is None:
timeout = win32event.INFINITE
else:
# convert sec to msec
timeout *= 1000
return (win32event.WaitForSingleObject(proc, timeout) ==
win32event.WAIT_OBJECT_0)
def WaitForThrobber(hwnd, rect=None, timeout=20, tick=0.1, done=10):
"""Wait for a browser's "throbber" (loading animation) to complete.
Args:
hwnd: window containing the throbber
rect: rectangle of the throbber, in client coords. If None, whole window
timeout: if the throbber is still throbbing after this long, give up
tick: how often to check the throbber
done: how long the throbber must be unmoving to be considered done
Returns:
Number of seconds waited, -1 if timed out
"""
if not rect: rect = win32gui.GetClientRect(hwnd)
# last_throbber will hold the results of the preceding scrape;
# we'll compare it against the current scrape to see if we're throbbing
last_throbber = ScrapeWindow(hwnd, rect)
start_clock = time.clock()
timeout_clock = start_clock + timeout
last_changed_clock = start_clock;
while time.clock() < timeout_clock:
time.sleep(tick)
current_throbber = ScrapeWindow(hwnd, rect)
if current_throbber.tostring() != last_throbber.tostring():
last_throbber = current_throbber
last_changed_clock = time.clock()
else:
if time.clock() - last_changed_clock > done:
return last_changed_clock - start_clock
return -1
def MoveAndSizeWindow(wnd, position=None, size=None, child=None):
"""Moves and/or resizes a window.
Repositions and resizes a window. If a child window is provided,
the parent window is resized so the child window has the given size
Args:
wnd: handle of the frame window
position: new location for the frame window
size: new size for the frame window (or the child window)
child: handle of the child window
Returns:
None
"""
rect = win32gui.GetWindowRect(wnd)
if position is None: position = (rect[0], rect[1])
if size is None:
size = (rect[2]-rect[0], rect[3]-rect[1])
elif child is not None:
child_rect = win32gui.GetWindowRect(child)
slop = (rect[2]-rect[0]-child_rect[2]+child_rect[0],
rect[3]-rect[1]-child_rect[3]+child_rect[1])
size = (size[0]+slop[0], size[1]+slop[1])
win32gui.MoveWindow(wnd, # window to move
position[0], # new x coord
position[1], # new y coord
size[0], # new width
size[1], # new height
True) # repaint?
def EndProcess(proc, code=0):
"""Ends a process.
Wraps the OS TerminateProcess call for platform-independence
Args:
proc: process ID
code: process exit code
Returns:
None
"""
win32process.TerminateProcess(proc, code)
def URLtoFilename(url, path=None, extension=None):
"""Converts a URL to a filename, given a path.
This in theory could cause collisions if two URLs differ only
in unprintable characters (eg. http://www.foo.com/?bar and
http://www.foo.com/:bar. In practice this shouldn't be a problem.
Args:
url: The URL to convert
path: path to the directory to store the file
extension: string to append to filename
Returns:
filename
"""
trans = string.maketrans(r'\/:*?"<>|', '_________')
if path is None: path = ""
if extension is None: extension = ""
if len(path) > 0 and path[-1] != '\\': path += '\\'
url = url.translate(trans)
return "%s%s%s" % (path, url, extension)
def PreparePath(path):
"""Ensures that a given path exists, making subdirectories if necessary.
Args:
path: fully-qualified path of directory to ensure exists
Returns:
None
"""
try:
os.makedirs(path)
except OSError, e:
if e[0] != 17: raise e # error 17: path already exists
if __name__ == "__main__":
PreparePath(r"c:\sitecompare\scrapes\ie7")
# We're being invoked rather than imported. Let's do some tests
# Hardcode IE's location for the purpose of this test
(proc, wnd) = InvokeAndWait(
r"c:\program files\internet explorer\iexplore.exe")
# Find the browser pane in the IE window
browser = FindChildWindow(
wnd, "TabWindowClass/Shell DocObject View/Internet Explorer_Server")
# Move and size the window
MoveAndSizeWindow(wnd, (0, 0), (1024, 768), browser)
# Take a screenshot
i = ScrapeWindow(browser)
i.show()
EndProcess(proc, 0)
|