summaryrefslogtreecommitdiffstats
path: root/chrome/tools/webforms_aggregator_tests.py
diff options
context:
space:
mode:
authordyu@chromium.org <dyu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-29 19:15:53 +0000
committerdyu@chromium.org <dyu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-29 19:15:53 +0000
commit5883af9856b04d06dc8572ac381d2207b737c78e (patch)
treece48d5f8b47a634d19ac1aa5ef2820335b90a548 /chrome/tools/webforms_aggregator_tests.py
parent70e64f8b4bb55b01555feb4c340943630f19f3ef (diff)
downloadchromium_src-5883af9856b04d06dc8572ac381d2207b737c78e.zip
chromium_src-5883af9856b04d06dc8572ac381d2207b737c78e.tar.gz
chromium_src-5883af9856b04d06dc8572ac381d2207b737c78e.tar.bz2
Aggregator script used for collecting web pages with filliable forms
such as registration forms. The script parses through a set of links from a text file and crawls the domain looking for web pages with forms then downloads the entire page to an html file. webforms_aggregator.py TEST=none BUG=none Review URL: http://codereview.chromium.org/6577026 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@83567 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/tools/webforms_aggregator_tests.py')
-rw-r--r--chrome/tools/webforms_aggregator_tests.py56
1 files changed, 56 insertions, 0 deletions
diff --git a/chrome/tools/webforms_aggregator_tests.py b/chrome/tools/webforms_aggregator_tests.py
new file mode 100644
index 0000000..fc12dc3
--- /dev/null
+++ b/chrome/tools/webforms_aggregator_tests.py
@@ -0,0 +1,56 @@
+#!/usr/bin/python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import tempfile
+import unittest
+
+import webforms_aggregator
+
+
+class WebformsAggregatorTest(unittest.TestCase):
+ """Unit tests for the webforms_aggregator module."""
+
+ def setUp(self):
+ self.cookie_file = 'test.cookie'
+ self.url1 = 'http://www.google.com'
+ self.url2 = 'http://www.macys.com'
+ self.domain = 'google.com'
+ self.url_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False)
+ self.url_file.file.write(
+ 'URLs to crawl:\n%s\n%s\n' % (self.url1, self.url2))
+ self.url_file.close()
+
+ def tearDown(self):
+ if os.path.isfile(self.cookie_file):
+ os.unlink(self.cookie_file)
+ if os.path.isfile(self.url_file.name):
+ self.url_file.close()
+ os.unlink(self.url_file.name)
+
+ def testRetrieverDownloadsPage(self):
+ """Verify the retriever can download a page."""
+ r = webforms_aggregator.Retriever(self.url1, self.domain, self.cookie_file)
+ self.assertTrue(r.Download(),
+ msg='Retriever could not download "%s"' % self.url1)
+
+ def testCrawlerFindsRegPageFromUrl(self):
+ """Verify that the crawler is able to find a reg page from the given URL."""
+ c = webforms_aggregator.Crawler(self.url2)
+ self.assertTrue(
+ c.Run(), msg='Crawler could not find the reg page of "%s"' % self.url2)
+
+ def testThreadedCrawlerFindsRegPageFromUrlsFile(self):
+ """Verify the threaded crawler finds reg page from a file of URLs."""
+ c = webforms_aggregator.ThreadedCrawler(self.url_file.name)
+ self.assertNotEqual(
+ c.Run(), -1,
+ msg='Threaded crawler could not find the reg page from the URLs file')
+
+
+if __name__ == '__main__':
+ suite = unittest.TestLoader().loadTestsFromTestCase(
+ WebformsAggregatorTest)
+ unittest.TextTestRunner(verbosity=2).run(suite)