diff options
Diffstat (limited to 'net/tools')
-rw-r--r-- | net/tools/crash_cache/crash_cache.cc | 337 | ||||
-rw-r--r-- | net/tools/testserver/dist/_socket.pyd | bin | 0 -> 49152 bytes | |||
-rw-r--r-- | net/tools/testserver/dist/_ssl.pyd | bin | 0 -> 499712 bytes | |||
-rw-r--r-- | net/tools/testserver/testserver.py | 943 | ||||
-rw-r--r-- | net/tools/tld_cleanup/SConscript | 121 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup.cc | 266 |
6 files changed, 1667 insertions, 0 deletions
diff --git a/net/tools/crash_cache/crash_cache.cc b/net/tools/crash_cache/crash_cache.cc new file mode 100644 index 0000000..859cf94 --- /dev/null +++ b/net/tools/crash_cache/crash_cache.cc @@ -0,0 +1,337 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This command-line program generates the set of files needed for the crash- +// cache unit tests (DiskCacheTest,CacheBackend_Recover*). This program only +// works properly on debug mode, because the crash functionality is not compiled +// on release builds of the cache. + +#include <windows.h> +#include <string> + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/string_util.h" + +#include "net/disk_cache/backend_impl.h" +#include "net/disk_cache/disk_cache.h" +#include "net/disk_cache/disk_cache_test_util.h" +#include "net/disk_cache/rankings.h" + +enum Errors { + GENERIC = -1, + ALL_GOOD = 0, + INVALID_ARGUMENT = 1, + CRASH_OVERWRITE, + NOT_REACHED +}; + +using disk_cache::RankCrashes; + +// Starts a new process, to generate the files. +int RunSlave(RankCrashes action) { + std::wstring exe; + PathService::Get(base::FILE_EXE, &exe); + + std::wstring command = StringPrintf(L"%s %d", exe.c_str(), action); + + STARTUPINFO startup_info = {0}; + startup_info.cb = sizeof(startup_info); + PROCESS_INFORMATION process_info; + + // I really don't care about this call modifying the string. + if (!::CreateProcess(exe.c_str(), const_cast<wchar_t*>(command.c_str()), NULL, + NULL, FALSE, 0, NULL, NULL, &startup_info, + &process_info)) { + printf("Unable to run test %d\n", action); + return GENERIC; + } + + DWORD reason = ::WaitForSingleObject(process_info.hProcess, INFINITE); + + int code; + bool ok = ::GetExitCodeProcess(process_info.hProcess, + reinterpret_cast<PDWORD>(&code)) ? true : + false; + + ::CloseHandle(process_info.hProcess); + ::CloseHandle(process_info.hThread); + + if (!ok) { + printf("Unable to get return code, test %d\n", action); + return GENERIC; + } + + if (ALL_GOOD != code) + printf("Test %d failed, code %d\n", action, code); + + return code; +} + +// Main loop for the master process. +int MasterCode() { + for (int i = disk_cache::NO_CRASH + 1; i < disk_cache::MAX_CRASH; i++) { + int ret = RunSlave(static_cast<RankCrashes>(i)); + if (ALL_GOOD != ret) + return ret; + } + + return ALL_GOOD; +} + +// ----------------------------------------------------------------------- + +extern RankCrashes g_rankings_crash; +const char* kCrashEntryName = "the first key"; + +// Creates the destinaton folder for this run, and returns it on full_path. +bool CreateTargetFolder(const std::wstring& path, RankCrashes action, + std::wstring* full_path) { + const wchar_t* folders[] = { + L"", + L"insert_empty1", + L"insert_empty2", + L"insert_empty3", + L"insert_one1", + L"insert_one2", + L"insert_one3", + L"insert_load1", + L"insert_load2", + L"remove_one1", + L"remove_one2", + L"remove_one3", + L"remove_one4", + L"remove_head1", + L"remove_head2", + L"remove_head3", + L"remove_head4", + L"remove_tail1", + L"remove_tail2", + L"remove_tail3", + L"remove_load1", + L"remove_load2", + L"remove_load3" + }; + COMPILE_ASSERT(arraysize(folders) == disk_cache::MAX_CRASH, sync_folders); + DCHECK(action > disk_cache::NO_CRASH && action < disk_cache::MAX_CRASH); + + *full_path = path; + file_util::AppendToPath(full_path, folders[action]); + + return file_util::CreateDirectory(*full_path); +} + +// Generates the files for an empty and one item cache. +int SimpleInsert(const std::wstring& path, RankCrashes action) { + disk_cache::Backend* cache = disk_cache::CreateCacheBackend(path, false, 0); + if (!cache || cache->GetEntryCount()) + return GENERIC; + + const char* test_name = "some other key"; + + if (action <= disk_cache::INSERT_EMPTY_3) { + test_name = kCrashEntryName; + g_rankings_crash = action; + } + + disk_cache::Entry* entry; + if (!cache->CreateEntry(test_name, &entry)) + return GENERIC; + + entry->Close(); + + DCHECK(action <= disk_cache::INSERT_ONE_3); + g_rankings_crash = action; + test_name = kCrashEntryName; + + if (!cache->CreateEntry(test_name, &entry)) + return GENERIC; + + return NOT_REACHED; +} + +// Generates the files for a one item cache, and removing the head. +int SimpleRemove(const std::wstring& path, RankCrashes action) { + DCHECK(action >= disk_cache::REMOVE_ONE_1); + DCHECK(action <= disk_cache::REMOVE_TAIL_3); + + disk_cache::Backend* cache = disk_cache::CreateCacheBackend(path, false, 0); + if (!cache || cache->GetEntryCount()) + return GENERIC; + + disk_cache::Entry* entry; + if (!cache->CreateEntry(kCrashEntryName, &entry)) + return GENERIC; + + entry->Close(); + + if (action >= disk_cache::REMOVE_TAIL_1) { + if (!cache->CreateEntry("some other key", &entry)) + return GENERIC; + + entry->Close(); + } + + if (!cache->OpenEntry(kCrashEntryName, &entry)) + return GENERIC; + + g_rankings_crash = action; + entry->Doom(); + entry->Close(); + + return NOT_REACHED; +} + +int HeadRemove(const std::wstring& path, RankCrashes action) { + DCHECK(action >= disk_cache::REMOVE_HEAD_1); + DCHECK(action <= disk_cache::REMOVE_HEAD_4); + + disk_cache::Backend* cache = disk_cache::CreateCacheBackend(path, false, 0); + if (!cache || cache->GetEntryCount()) + return GENERIC; + + disk_cache::Entry* entry; + if (!cache->CreateEntry("some other key", &entry)) + return GENERIC; + + entry->Close(); + if (!cache->CreateEntry(kCrashEntryName, &entry)) + return GENERIC; + + entry->Close(); + + if (!cache->OpenEntry(kCrashEntryName, &entry)) + return GENERIC; + + g_rankings_crash = action; + entry->Doom(); + entry->Close(); + + return NOT_REACHED; +} + +// Generates the files for insertion and removals on heavy loaded caches. +int LoadOperations(const std::wstring& path, RankCrashes action) { + DCHECK(action >= disk_cache::INSERT_LOAD_1); + + // Work with a tiny index table (16 entries) + disk_cache::BackendImpl* cache = new disk_cache::BackendImpl(path, 0xf); + if (!cache || !cache->SetMaxSize(0x100000) || !cache->Init() || + cache->GetEntryCount()) + return GENERIC; + + int seed = static_cast<int>(Time::Now().ToInternalValue()); + srand(seed); + + disk_cache::Entry* entry; + for (int i = 0; i < 100; i++) { + std::string key = GenerateKey(true); + if (!cache->CreateEntry(key, &entry)) + return GENERIC; + entry->Close(); + if (50 == i && action >= disk_cache::REMOVE_LOAD_1) { + if (!cache->CreateEntry(kCrashEntryName, &entry)) + return GENERIC; + entry->Close(); + } + } + + if (action <= disk_cache::INSERT_LOAD_2) { + g_rankings_crash = action; + + if (!cache->CreateEntry(kCrashEntryName, &entry)) + return GENERIC; + } + + if (!cache->OpenEntry(kCrashEntryName, &entry)) + return GENERIC; + + g_rankings_crash = action; + + entry->Doom(); + entry->Close(); + + return NOT_REACHED; +} + +// Main function on the child process. +int SlaveCode(const std::wstring& path, RankCrashes action) { + MessageLoop message_loop; + + std::wstring full_path; + if (!CreateTargetFolder(path, action, &full_path)) { + printf("Destination folder found, please remove it.\n"); + return CRASH_OVERWRITE; + } + + if (action <= disk_cache::INSERT_ONE_3) + return SimpleInsert(full_path, action); + + if (action <= disk_cache::INSERT_LOAD_2) + return LoadOperations(full_path, action); + + if (action <= disk_cache::REMOVE_ONE_4) + return SimpleRemove(full_path, action); + + if (action <= disk_cache::REMOVE_HEAD_4) + return HeadRemove(full_path, action); + + if (action <= disk_cache::REMOVE_TAIL_3) + return SimpleRemove(full_path, action); + + if (action <= disk_cache::REMOVE_LOAD_3) + return LoadOperations(full_path, action); + + return NOT_REACHED; +} + +// ----------------------------------------------------------------------- + +int main(int argc, const char* argv[]) { + if (argc < 2) + return MasterCode(); + + char* end; + RankCrashes action = static_cast<RankCrashes>(strtol(argv[1], &end, 0)); + if (action <= disk_cache::NO_CRASH || action >= disk_cache::MAX_CRASH) { + printf("Invalid action\n"); + return INVALID_ARGUMENT; + } + + std::wstring path; + PathService::Get(base::DIR_SOURCE_ROOT, &path); + file_util::AppendToPath(&path, L"net"); + file_util::AppendToPath(&path, L"data"); + file_util::AppendToPath(&path, L"cache_tests"); + file_util::AppendToPath(&path, L"new_crashes"); + + return SlaveCode(path, action); +} diff --git a/net/tools/testserver/dist/_socket.pyd b/net/tools/testserver/dist/_socket.pyd Binary files differnew file mode 100644 index 0000000..5ae91b7 --- /dev/null +++ b/net/tools/testserver/dist/_socket.pyd diff --git a/net/tools/testserver/dist/_ssl.pyd b/net/tools/testserver/dist/_ssl.pyd Binary files differnew file mode 100644 index 0000000..6a9b73c --- /dev/null +++ b/net/tools/testserver/dist/_ssl.pyd diff --git a/net/tools/testserver/testserver.py b/net/tools/testserver/testserver.py new file mode 100644 index 0000000..381b6a8 --- /dev/null +++ b/net/tools/testserver/testserver.py @@ -0,0 +1,943 @@ +#!/usr/bin/python2.4 +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""This is a simple HTTP server used for testing Chrome. + +It supports several test URLs, as specified by the handlers in TestPageHandler. +It defaults to living on localhost:8888. +It can use https if you specify the flag --https=CERT where CERT is the path +to a pem file containing the certificate and private key that should be used. +To shut it down properly, visit localhost:8888/kill. +""" + +import base64 +import BaseHTTPServer +import cgi +import md5 +import optparse +import os +import re +import SocketServer +import sys +import time +import tlslite +import tlslite.api + +debug_output = sys.stderr +def debug(str): + debug_output.write(str + "\n") + debug_output.flush() + +class StoppableHTTPServer(BaseHTTPServer.HTTPServer): + """This is a specialization of of BaseHTTPServer to allow it + to be exited cleanly (by setting its "stop" member to True).""" + + def serve_forever(self): + self.stop = False + self.nonce = None + while not self.stop: + self.handle_request() + self.socket.close() + +class HTTPSServer(tlslite.api.TLSSocketServerMixIn, StoppableHTTPServer): + """This is a specialization of StoppableHTTPerver that add https support.""" + + def __init__(self, server_address, request_hander_class, cert_path): + s = open(cert_path).read() + x509 = tlslite.api.X509() + x509.parse(s) + self.cert_chain = tlslite.api.X509CertChain([x509]) + s = open(cert_path).read() + self.private_key = tlslite.api.parsePEMKey(s, private=True) + + self.session_cache = tlslite.api.SessionCache() + StoppableHTTPServer.__init__(self, server_address, request_hander_class) + + def handshake(self, tlsConnection): + """Creates the SSL connection.""" + try: + tlsConnection.handshakeServer(certChain=self.cert_chain, + privateKey=self.private_key, + sessionCache=self.session_cache) + tlsConnection.ignoreAbruptClose = True + return True + except tlslite.api.TLSError, error: + print "Handshake failure:", str(error) + return False + +class TestPageHandler(BaseHTTPServer.BaseHTTPRequestHandler): + + def __init__(self, request, client_address, socket_server): + self._get_handlers = [ + self.KillHandler, + self.NoCacheMaxAgeTimeHandler, + self.NoCacheTimeHandler, + self.CacheTimeHandler, + self.CacheExpiresHandler, + self.CacheProxyRevalidateHandler, + self.CachePrivateHandler, + self.CachePublicHandler, + self.CacheSMaxAgeHandler, + self.CacheMustRevalidateHandler, + self.CacheMustRevalidateMaxAgeHandler, + self.CacheNoStoreHandler, + self.CacheNoStoreMaxAgeHandler, + self.CacheNoTransformHandler, + self.DownloadHandler, + self.DownloadFinishHandler, + self.EchoHeader, + self.FileHandler, + self.RealFileWithCommonHeaderHandler, + self.RealBZ2FileWithCommonHeaderHandler, + self.AuthBasicHandler, + self.AuthDigestHandler, + self.SlowServerHandler, + self.ContentTypeHandler, + self.ServerRedirectHandler, + self.ClientRedirectHandler, + self.DefaultResponseHandler] + self._post_handlers = [ + self.EchoTitleHandler, + self.EchoAllHandler, + self.EchoHandler] + self._get_handlers + + self._mime_types = { 'gif': 'image/gif', 'jpeg' : 'image/jpeg', 'jpg' : 'image/jpeg' } + self._default_mime_type = 'text/html' + + BaseHTTPServer.BaseHTTPRequestHandler.__init__(self, request, client_address, socket_server) + + def GetMIMETypeFromName(self, file_name): + """Returns the mime type for the specified file_name. So far it only looks + at the file extension.""" + + (shortname, extension) = os.path.splitext(file_name) + if len(extension) == 0: + # no extension. + return self._default_mime_type + + return self._mime_types.get(extension, self._default_mime_type) + + def KillHandler(self): + """This request handler kills the server, for use when we're done" + with the a particular test.""" + + if (self.path.find("kill") < 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=0') + self.end_headers() + self.wfile.write("Time to die") + self.server.stop = True + + return True + + def NoCacheMaxAgeTimeHandler(self): + """This request handler yields a page with the title set to the current + system time, and no caching requested.""" + + if (self.path.find("/nocachetime/maxage") != 0): + return False + + self.send_response(200) + self.send_header('Cache-Control', 'max-age=0') + self.send_header('Content-type', 'text/html') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def NoCacheTimeHandler(self): + """This request handler yields a page with the title set to the current + system time, and no caching requested.""" + + if (self.path.find("/nocachetime") != 0): + return False + + self.send_response(200) + self.send_header('Cache-Control', 'no-cache') + self.send_header('Content-type', 'text/html') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheTimeHandler(self): + """This request handler yields a page with the title set to the current + system time, and allows caching for one minute.""" + + if self.path.find("/cachetime") != 0: + return False + + self.send_response(200) + self.send_header('Cache-Control', 'max-age=60') + self.send_header('Content-type', 'text/html') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheExpiresHandler(self): + """This request handler yields a page with the title set to the current + system time, and set the page to expire on 1 Jan 2099.""" + + if (self.path.find("/cache/expires") != 0): + return False + + self.send_response(200) + self.send_header('Expires', 'Thu, 1 Jan 2099 00:00:00 GMT') + self.send_header('Content-type', 'text/html') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheProxyRevalidateHandler(self): + """This request handler yields a page with the title set to the current + system time, and allows caching for 60 seconds""" + + if (self.path.find("/cache/proxy-revalidate") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=60, proxy-revalidate') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CachePrivateHandler(self): + """This request handler yields a page with the title set to the current + system time, and allows caching for 5 seconds.""" + + if (self.path.find("/cache/private") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=5, private') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CachePublicHandler(self): + """This request handler yields a page with the title set to the current + system time, and allows caching for 5 seconds.""" + + if (self.path.find("/cache/public") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=5, public') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheSMaxAgeHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow for caching.""" + + if (self.path.find("/cache/s-maxage") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'public, s-maxage = 60, max-age = 0') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheMustRevalidateHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow caching.""" + + if (self.path.find("/cache/must-revalidate") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'must-revalidate') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheMustRevalidateMaxAgeHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow caching event though max-age of 60 + seconds is specified.""" + + if (self.path.find("/cache/must-revalidate/max-age") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=60, must-revalidate') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + + def CacheNoStoreHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow the page to be stored.""" + + if (self.path.find("/cache/no-store") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'no-store') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def CacheNoStoreMaxAgeHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow the page to be stored even though max-age + of 60 seconds is specified.""" + + if (self.path.find("/cache/no-store/max-age") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=60, no-store') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + + def CacheNoTransformHandler(self): + """This request handler yields a page with the title set to the current + system time, and does not allow the content to transformed during + user-agent caching""" + + if (self.path.find("/cache/no-transform") != 0): + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'no-transform') + self.end_headers() + + self.wfile.write('<html><head><title>%s</title></head></html>' % time.time()) + + return True + + def EchoHeader(self): + """This handler echoes back the value of a specific request header.""" + + if self.path.find("/echoheader") != 0: + return False + + query_char = self.path.find('?') + if query_char != -1: + header_name = self.path[query_char+1:] + + self.send_response(200) + self.send_header('Content-type', 'text/plain') + self.send_header('Cache-control', 'max-age=60000') + # insert a vary header to properly indicate that the cachability of this + # request is subject to value of the request header being echoed. + if len(header_name) > 0: + self.send_header('Vary', header_name) + self.end_headers() + + if len(header_name) > 0: + self.wfile.write(self.headers.getheader(header_name)) + + return True + + def EchoHandler(self): + """This handler just echoes back the payload of the request, for testing + form submission.""" + + if self.path.find("/echo") != 0: + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + length = int(self.headers.getheader('content-length')) + request = self.rfile.read(length) + self.wfile.write(request) + return True + + def EchoTitleHandler(self): + """This handler is like Echo, but sets the page title to the request.""" + + if self.path.find("/echotitle") != 0: + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + length = int(self.headers.getheader('content-length')) + request = self.rfile.read(length) + self.wfile.write('<html><head><title>') + self.wfile.write(request) + self.wfile.write('</title></head></html>') + return True + + def EchoAllHandler(self): + """This handler yields a (more) human-readable page listing information + about the request header & contents.""" + + if self.path.find("/echoall") != 0: + return False + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head><style>' + 'pre { border: 1px solid black; margin: 5px; padding: 5px }' + '</style></head><body>' + '<div style="float: right">' + '<a href="http://localhost:8888/echo">back to referring page</a></div>' + '<h1>Request Body:</h1><pre>') + length = int(self.headers.getheader('content-length')) + qs = self.rfile.read(length) + params = cgi.parse_qs(qs, keep_blank_values=1) + + for param in params: + self.wfile.write('%s=%s\n' % (param, params[param][0])) + + self.wfile.write('</pre>') + + self.wfile.write('<h1>Request Headers:</h1><pre>%s</pre>' % self.headers) + + self.wfile.write('</body></html>') + return True + + def DownloadHandler(self): + """This handler sends a downloadable file with or without reporting + the size (6K).""" + + if self.path.startswith("/download-unknown-size"): + send_length = False + elif self.path.startswith("/download-known-size"): + send_length = True + else: + return False + + # + # The test which uses this functionality is attempting to send + # small chunks of data to the client. Use a fairly large buffer + # so that we'll fill chrome's IO buffer enough to force it to + # actually write the data. + # See also the comments in the client-side of this test in + # download_uitest.cc + # + size_chunk1 = 35*1024 + size_chunk2 = 10*1024 + + self.send_response(200) + self.send_header('Content-type', 'application/octet-stream') + self.send_header('Cache-Control', 'max-age=0') + if send_length: + self.send_header('Content-Length', size_chunk1 + size_chunk2) + self.end_headers() + + # First chunk of data: + self.wfile.write("*" * size_chunk1) + self.wfile.flush() + + # handle requests until one of them clears this flag. + self.server.waitForDownload = True + while self.server.waitForDownload: + self.server.handle_request() + + # Second chunk of data: + self.wfile.write("*" * size_chunk2) + return True + + def DownloadFinishHandler(self): + """This handler just tells the server to finish the current download.""" + + if not self.path.startswith("/download-finish"): + return False + + self.server.waitForDownload = False + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-Control', 'max-age=0') + self.end_headers() + return True + + def FileHandler(self): + """This handler sends the contents of the requested file. Wow, it's like + a real webserver!""" + + prefix='/files/' + if not self.path.startswith(prefix): + return False + + file = self.path[len(prefix):] + entries = file.split('/'); + path = os.path.join(self.server.data_dir, *entries) + + if not os.path.isfile(path): + print "File not found " + file + " full path:" + path + self.send_error(404) + return True + + f = open(path, "rb") + data = f.read() + f.close() + + # If file.mock-http-headers exists, it contains the headers we + # should send. Read them in and parse them. + headers_path = path + '.mock-http-headers' + if os.path.isfile(headers_path): + f = open(headers_path, "r") + + # "HTTP/1.1 200 OK" + response = f.readline() + status_code = re.findall('HTTP/\d+.\d+ (\d+)', response)[0] + self.send_response(int(status_code)) + + for line in f: + # "name: value" + name, value = re.findall('(\S+):\s*(.*)', line)[0] + self.send_header(name, value) + f.close() + else: + # Could be more generic once we support mime-type sniffing, but for + # now we need to set it explicitly. + self.send_response(200) + self.send_header('Content-type', self.GetMIMETypeFromName(file)) + self.send_header('Content-Length', len(data)) + self.end_headers() + + self.wfile.write(data) + + return True + + def RealFileWithCommonHeaderHandler(self): + """This handler sends the contents of the requested file without the pseudo + http head!""" + + prefix='/realfiles/' + if not self.path.startswith(prefix): + return False + + file = self.path[len(prefix):] + path = os.path.join(self.server.data_dir, file) + + try: + f = open(path, "rb") + data = f.read() + f.close() + + # just simply set the MIME as octal stream + self.send_response(200) + self.send_header('Content-type', 'application/octet-stream') + self.end_headers() + + self.wfile.write(data) + except: + self.send_error(404) + + return True + + def RealBZ2FileWithCommonHeaderHandler(self): + """This handler sends the bzip2 contents of the requested file with + corresponding Content-Encoding field in http head!""" + + prefix='/realbz2files/' + if not self.path.startswith(prefix): + return False + + parts = self.path.split('?') + file = parts[0][len(prefix):] + path = os.path.join(self.server.data_dir, file) + '.bz2' + + if len(parts) > 1: + options = parts[1] + else: + options = '' + + try: + self.send_response(200) + accept_encoding = self.headers.get("Accept-Encoding") + if accept_encoding.find("bzip2") != -1: + f = open(path, "rb") + data = f.read() + f.close() + self.send_header('Content-Encoding', 'bzip2') + self.send_header('Content-type', 'application/x-bzip2') + self.end_headers() + if options == 'incremental-header': + self.wfile.write(data[:1]) + self.wfile.flush() + time.sleep(1.0) + self.wfile.write(data[1:]) + else: + self.wfile.write(data) + else: + """client do not support bzip2 format, send pseudo content + """ + self.send_header('Content-type', 'text/html; charset=ISO-8859-1') + self.end_headers() + self.wfile.write("you do not support bzip2 encoding") + except: + self.send_error(404) + + return True + + def AuthBasicHandler(self): + """This handler tests 'Basic' authentication. It just sends a page with + title 'user/pass' if you succeed.""" + + if not self.path.startswith("/auth-basic"): + return False + + username = userpass = password = b64str = "" + + auth = self.headers.getheader('authorization') + try: + if not auth: + raise Exception('no auth') + b64str = re.findall(r'Basic (\S+)', auth)[0] + userpass = base64.b64decode(b64str) + username, password = re.findall(r'([^:]+):(\S+)', userpass)[0] + if password != 'secret': + raise Exception('wrong password') + except Exception, e: + # Authentication failed. + self.send_response(401) + self.send_header('WWW-Authenticate', 'Basic realm="testrealm"') + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('<title>Denied: %s</title>' % e) + self.wfile.write('</head><body>') + self.wfile.write('auth=%s<p>' % auth) + self.wfile.write('b64str=%s<p>' % b64str) + self.wfile.write('username: %s<p>' % username) + self.wfile.write('userpass: %s<p>' % userpass) + self.wfile.write('password: %s<p>' % password) + self.wfile.write('You sent:<br>%s<p>' % self.headers) + self.wfile.write('</body></html>') + return True + + # Authentication successful. (Return a cachable response to allow for + # testing cached pages that require authentication.) + if_none_match = self.headers.getheader('if-none-match') + if if_none_match == "abc": + self.send_response(304) + self.end_headers() + else: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header('Cache-control', 'max-age=60000') + self.send_header('Etag', 'abc') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('<title>%s/%s</title>' % (username, password)) + self.wfile.write('</head><body>') + self.wfile.write('auth=%s<p>' % auth) + self.wfile.write('</body></html>') + + return True + + def AuthDigestHandler(self): + """This handler tests 'Digest' authentication. It just sends a page with + title 'user/pass' if you succeed.""" + + if not self.path.startswith("/auth-digest"): + return False + + # Periodically generate a new nonce. Technically we should incorporate + # the request URL into this, but we don't care for testing. + nonce_life = 10 + stale = False + if not self.server.nonce or (time.time() - self.server.nonce_time > nonce_life): + if self.server.nonce: + stale = True + self.server.nonce_time = time.time() + self.server.nonce = \ + md5.new(time.ctime(self.server.nonce_time) + 'privatekey').hexdigest() + + nonce = self.server.nonce + opaque = md5.new('opaque').hexdigest() + password = 'secret' + realm = 'testrealm' + + auth = self.headers.getheader('authorization') + pairs = {} + try: + if not auth: + raise Exception('no auth') + if not auth.startswith('Digest'): + raise Exception('not digest') + # Pull out all the name="value" pairs as a dictionary. + pairs = dict(re.findall(r'(\b[^ ,=]+)="?([^",]+)"?', auth)) + + # Make sure it's all valid. + if pairs['nonce'] != nonce: + raise Exception('wrong nonce') + if pairs['opaque'] != opaque: + raise Exception('wrong opaque') + + # Check the 'response' value and make sure it matches our magic hash. + # See http://www.ietf.org/rfc/rfc2617.txt + hash_a1 = md5.new(':'.join([pairs['username'], realm, password])).hexdigest() + hash_a2 = md5.new(':'.join([self.command, pairs['uri']])).hexdigest() + if 'qop' in pairs and 'nc' in pairs and 'cnonce' in pairs: + response = md5.new(':'.join([hash_a1, nonce, pairs['nc'], + pairs['cnonce'], pairs['qop'], hash_a2])).hexdigest() + else: + response = md5.new(':'.join([hash_a1, nonce, hash_a2])).hexdigest() + + if pairs['response'] != response: + raise Exception('wrong password') + except Exception, e: + # Authentication failed. + self.send_response(401) + hdr = ('Digest ' + 'realm="%s", ' + 'domain="/", ' + 'qop="auth", ' + 'algorithm=MD5, ' + 'nonce="%s", ' + 'opaque="%s"') % (realm, nonce, opaque) + if stale: + hdr += ', stale="TRUE"' + self.send_header('WWW-Authenticate', hdr) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('<title>Denied: %s</title>' % e) + self.wfile.write('</head><body>') + self.wfile.write('auth=%s<p>' % auth) + self.wfile.write('pairs=%s<p>' % pairs) + self.wfile.write('You sent:<br>%s<p>' % self.headers) + self.wfile.write('We are replying:<br>%s<p>' % hdr) + self.wfile.write('</body></html>') + return True + + # Authentication successful. + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('<title>%s/%s</title>' % (pairs['username'], password)) + self.wfile.write('</head><body>') + self.wfile.write('auth=%s<p>' % auth) + self.wfile.write('pairs=%s<p>' % pairs) + self.wfile.write('</body></html>') + + return True + + def SlowServerHandler(self): + """Wait for the user suggested time before responding. The syntax is + /slow?0.5 to wait for half a second.""" + if not self.path.startswith("/slow"): + return False + query_char = self.path.find('?') + wait_sec = 1.0 + if query_char >= 0: + try: + wait_sec = int(self.path[query_char + 1:]) + except ValueError: + pass + time.sleep(wait_sec) + self.send_response(200) + self.send_header('Content-type', 'text/plain') + self.end_headers() + self.wfile.write("waited %d seconds" % wait_sec) + return True + + def ContentTypeHandler(self): + """Returns a string of html with the given content type. E.g., + /contenttype?text/css returns an html file with the Content-Type + header set to text/css.""" + if not self.path.startswith('/contenttype'): + return False + query_char = self.path.find('?') + content_type = self.path[query_char + 1:].strip() + if not content_type: + content_type = 'text/html' + self.send_response(200) + self.send_header('Content-Type', content_type) + self.end_headers() + self.wfile.write("<html>\n<body>\n<p>HTML text</p>\n</body>\n</html>\n"); + return True + + def ServerRedirectHandler(self): + """Sends a server redirect to the given URL. The syntax is + '/server-redirect?http://foo.bar/asdf' to redirect to 'http://foo.bar/asdf'""" + + test_name = "/server-redirect" + if not self.path.startswith(test_name): + return False + + query_char = self.path.find('?') + if query_char < 0 or len(self.path) <= query_char + 1: + self.sendRedirectHelp(test_name) + return True + dest = self.path[query_char + 1:] + + self.send_response(301) # moved permanently + self.send_header('Location', dest) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('</head><body>Redirecting to %s</body></html>' % dest) + + return True; + + def ClientRedirectHandler(self): + """Sends a client redirect to the given URL. The syntax is + '/client-redirect?http://foo.bar/asdf' to redirect to 'http://foo.bar/asdf'""" + + test_name = "/client-redirect" + if not self.path.startswith(test_name): + return False + + query_char = self.path.find('?'); + if query_char < 0 or len(self.path) <= query_char + 1: + self.sendRedirectHelp(test_name) + return True + dest = self.path[query_char + 1:] + + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><head>') + self.wfile.write('<meta http-equiv="refresh" content="0;url=%s">' % dest) + self.wfile.write('</head><body>Redirecting to %s</body></html>' % dest) + + return True + + def DefaultResponseHandler(self): + """This is the catch-all response handler for requests that aren't handled + by one of the special handlers above. + Note that we specify the content-length as without it the https connection + is not closed properly (and the browser keeps expecting data).""" + + contents = "Default response given for path: " + self.path + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.send_header("Content-Length", len(contents)) + self.end_headers() + self.wfile.write(contents) + return True + + def do_GET(self): + for handler in self._get_handlers: + if (handler()): + return + + def do_POST(self): + for handler in self._post_handlers: + if(handler()): + return + + # called by the redirect handling function when there is no parameter + def sendRedirectHelp(self, redirect_name): + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write('<html><body><h1>Error: no redirect destination</h1>') + self.wfile.write('Use <pre>%s?http://dest...</pre>' % redirect_name) + self.wfile.write('</body></html>') + +def main(options, args): + # redirect output to a log file so it doesn't spam the unit test output + logfile = open('testserver.log', 'w') + sys.stderr = sys.stdout = logfile + + port = options.port + + if options.cert: + # let's make sure the cert file exists. + if not os.path.isfile(options.cert): + print 'specified cert file not found: ' + options.cert + ' exiting...' + return + server = HTTPSServer(('127.0.0.1', port), TestPageHandler, options.cert) + print 'HTTPS server started on port %d...' % port + else: + server = StoppableHTTPServer(('127.0.0.1', port), TestPageHandler) + print 'HTTP server started on port %d...' % port + + if options.data_dir: + if not os.path.isdir(options.data_dir): + print 'specified data dir not found: ' + options.data_dir + ' exiting...' + return + server.data_dir = options.data_dir + else: + # Create the default path to our data dir, relative to the exe dir. + server.data_dir = os.path.dirname(sys.argv[0]) + server.data_dir = os.path.join(server.data_dir, "..", "..", "..", "..", + "test", "data") + + try: + server.serve_forever() + except KeyboardInterrupt: + print 'shutting down server' + server.stop = True + +if __name__ == '__main__': + option_parser = optparse.OptionParser() + option_parser.add_option('', '--port', default='8888', type='int', + help='Port used by the server') + option_parser.add_option('', '--data-dir', dest='data_dir', + help='Directory from which to read the files') + option_parser.add_option('', '--https', dest='cert', + help='Specify that https should be used, specify ' + 'the path to the cert containing the private key ' + 'the server should use') + options, args = option_parser.parse_args() + + sys.exit(main(options, args)) diff --git a/net/tools/tld_cleanup/SConscript b/net/tools/tld_cleanup/SConscript new file mode 100644 index 0000000..f4ef035 --- /dev/null +++ b/net/tools/tld_cleanup/SConscript @@ -0,0 +1,121 @@ +# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Import('env')
+
+env = env.Clone()
+
+env.Prepend(
+ CPPPATH = [
+ '../../..',
+ ],
+)
+
+env.Append(
+ CCFLAGS = [
+ '/TP',
+
+ '/wd4503',
+ '/wd4819',
+ ],
+
+ LINKFLAGS = [
+ '/INCREMENTAL',
+ '/MANIFEST',
+ '/DELAYLOAD:"dwmapi.dll"',
+ '/DELAYLOAD:"uxtheme.dll"',
+ '/DEBUG',
+ '/SUBSYSTEM:CONSOLE',
+ '/MACHINE:X86',
+ '/FIXED:No',
+ '/safeseh',
+ '/dynamicbase',
+ '/ignore:4199',
+ '/nxcompat',
+ ],
+
+ LIBS = [
+ 'wininet.lib',
+ 'version.lib',
+ 'msimg32.lib',
+ 'ws2_32.lib',
+ 'usp10.lib',
+ 'psapi.lib',
+ 'kernel32.lib',
+ 'user32.lib',
+ 'gdi32.lib',
+ 'winspool.lib',
+ 'comdlg32.lib',
+ 'advapi32.lib',
+ 'shell32.lib',
+ 'ole32.lib',
+ 'oleaut32.lib',
+ 'uuid.lib',
+ 'odbc32.lib',
+ 'odbccp32.lib',
+ 'DelayImp.lib',
+ ],
+)
+
+input_files = [
+ 'tld_cleanup.cc',
+]
+
+libs = [
+ '$GOOGLEURL_DIR/googleurl.lib',
+ '$ICU38_DIR/icuuc.lib',
+ '$BASE_DIR/base.lib',
+ # We only need to link with net.lib due to use precompiled_net.pch.
+ '$NET_DIR/net.lib',
+]
+
+exe_targets = env.Program(['tld_cleanup',
+ 'tld_cleanup.ilk',
+ 'tld_cleanup.pdb'],
+ input_files + libs)
+i = env.Install('$TARGET_ROOT', exe_targets)
+env.Alias('net', i)
+
+#env.Program('tld_cleanup', input_files + libs,
+# #WINDOWS_INSERT_MANIFEST=1,
+# PDB='tld_cleanup.pdb')
+
+#env.Command('tld_cleanup.exe.embed.manifest',
+# 'tld_cleanup.exe.intermediate.manifest',
+# '-mt.exe /out:${TARGET} /notify_update /manifest $SOURCE')
+
+#env.RES('tld_cleanup.exe.embed.manifest.res',
+# 'tld_cleanup.exe.embed.manifest',
+# CFLAGS=None,
+# CCFLAGS=None,
+# CXXFLAGS=None,
+# CPPDEFINES=[],
+# CPPPATH=[])
+
+env.Install('$TARGET_ROOT', exe_targets)
diff --git a/net/tools/tld_cleanup/tld_cleanup.cc b/net/tools/tld_cleanup/tld_cleanup.cc new file mode 100644 index 0000000..2efac1b7 --- /dev/null +++ b/net/tools/tld_cleanup/tld_cleanup.cc @@ -0,0 +1,266 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This command-line program converts an effective-TLD data file in UTF-8 from +// the format provided by Mozilla to the format expected by Chrome. Any errors +// or warnings are recorded in tld_cleanup.log. +// +// In particular, it +// * Strips blank lines and comments, as well as notes for individual rules. +// * Changes all line endings to LF. +// * Strips a single leading and/or trailing dot from each rule, if present. +// * Logs a warning if a rule contains '!' or '*.' other than at the beginning +// of the rule. (This also catches multiple ! or *. at the start of a rule.) +// * Logs a warning if GURL reports a rule as invalid, but keeps the rule. +// * Canonicalizes each rule's domain by converting it to a GURL and back. +// * Adds explicit rules for true TLDs found in any rule. + +#include <windows.h> +#include <set> +#include <string> + +#include "base/file_util.h" +#include "base/icu_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_parse.h" + +static const wchar_t* const kLogFileName = L"tld_cleanup.log"; +typedef std::set<std::string> StringSet; + +// Writes the list of domain rules contained in the 'rules' set to the +// 'outfile', with each rule terminated by a LF. The file must already have +// been created with write access. +bool WriteRules(const StringSet& rules, HANDLE outfile) { + std::string data; + for (StringSet::const_iterator iter = rules.begin(); + iter != rules.end(); + ++iter) { + data.append(*iter); + data.append(1, '\n'); + } + + unsigned long written = 0; + BOOL success = WriteFile(outfile, + data.data(), + static_cast<long>(data.size()), + &written, + NULL); + return (success && written == static_cast<long>(data.size())); +} + +// These result codes should be in increasing order of severity. +typedef enum { + kSuccess, + kWarning, + kError, +} NormalizeResult; + +// Adjusts the rule to a standard form: removes single extraneous dots and +// canonicalizes it using GURL. Returns kSuccess if the rule is interpreted as +// valid; logs a warning and returns kWarning if it is probably invalid; and +// logs an error and returns kError if the rule is (almost) certainly invalid. +NormalizeResult NormalizeRule(std::string* rule) { + NormalizeResult result = kSuccess; + + // Strip single leading and trailing dots. + if (rule->at(0) == '.') + rule->erase(0, 1); + if (rule->size() == 0) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + if (rule->at(rule->size() - 1) == '.') + rule->erase(rule->size() - 1, 1); + if (rule->size() == 0) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + + // Allow single leading '*.' or '!', saved here so it's not canonicalized. + bool wildcard = false; + bool exception = false; + size_t start_offset = 0; + if (rule->at(0) == '!') { + rule->erase(0, 1); + exception = true; + } else if (rule->find("*.") == 0) { + rule->erase(0, 2); + wildcard = true; + } + if (rule->size() == 0) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + + // Warn about additional '*.' or '!'. + if (rule->find("*.", start_offset) != std::string::npos || + rule->find('!', start_offset) != std::string::npos) { + LOG(WARNING) << "Keeping probably invalid rule: " << *rule; + result = kWarning; + } + + // Make a GURL and normalize it, then get the host back out. + std::string url = "http://"; + url.append(*rule); + GURL gurl(url); + const std::string& spec = gurl.possibly_invalid_spec(); + url_parse::Component host = gurl.parsed_for_possibly_invalid_spec().host; + if (host.len < 0) { + LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *rule; + return kError; + } + if (!gurl.is_valid()) { + LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *rule; + result = kWarning; + } + rule->assign(spec.substr(host.begin, host.len)); + + // Restore wildcard or exception marker. + if (exception) + rule->insert(0, 1, '!'); + else if (wildcard) + rule->insert(0, "*."); + + return result; +} + +// Loads the file described by 'in_filename', converts it to the desired format +// (see the file comments above), and saves it into 'out_filename'. Returns +// the most severe of the result codes encountered when normalizing the rules. +NormalizeResult NormalizeFile(const std::wstring& in_filename, + const std::wstring& out_filename) { + std::string data; + if (!file_util::ReadFileToString(in_filename, &data)) { + fwprintf(stderr, L"Unable to read file %s\n", in_filename.c_str()); + // We return success since we've already reported the error. + return kSuccess; + } + + HANDLE outfile(CreateFile(out_filename.c_str(), + GENERIC_WRITE, + 0, + NULL, + CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, + NULL)); + if (outfile == INVALID_HANDLE_VALUE) { + fwprintf(stderr, L"Unable to write file %s\n", out_filename.c_str()); + // We return success since we've already reported the error. + return kSuccess; + } + + // We do a lot of string assignment during parsing, but simplicity is more + // important than performance here. + std::string rule; + NormalizeResult result = kSuccess; + size_t line_start = 0; + size_t line_end = 0; + StringSet rules; + while (line_start < data.size()) { + // Skip comments. + if (line_start + 1 < data.size() && + data[line_start] == '/' && + data[line_start + 1] == '/') { + line_end = data.find_first_of("\r\n", line_start); + if (line_end == std::string::npos) + line_end = data.size(); + } else { + // Truncate at first whitespace. + line_end = data.find_first_of("\r\n \t", line_start); + if (line_end == std::string::npos) + line_end = data.size(); + rule.assign(data.data(), line_start, line_end - line_start); + + NormalizeResult new_result = NormalizeRule(&rule); + if (new_result != kError) { + rules.insert(rule); + // Add true TLD for multi-level rules. + size_t tld_start = rule.find_last_of('.'); + if (tld_start != std::string::npos && tld_start + 1 < rule.size()) + rules.insert(rule.substr(tld_start + 1)); + } + result = std::max(result, new_result); + } + + // Find beginning of next non-empty line. + line_start = data.find_first_of("\r\n", line_end); + if (line_start == std::string::npos) + line_start = data.size(); + line_start = data.find_first_not_of("\r\n", line_start); + if (line_start == std::string::npos) + line_start = data.size(); + } + + if (!WriteRules(rules, outfile)) { + LOG(ERROR) << "Error(s) writing " << out_filename; + result = kError; + } + + return result; +} + +int main(int argc, const char* argv[]) { + if (argc != 3) { + fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); + fprintf(stderr, "Usage: %s <input> <output>\n", argv[0]); + return 1; + } + + // Only use OutputDebugString in debug mode. +#ifdef NDEBUG + logging::LoggingDestination destination = logging::LOG_ONLY_TO_FILE; +#else + logging::LoggingDestination destination = + logging::LOG_TO_BOTH_FILE_AND_SYSTEM_DEBUG_LOG; +#endif + + std::wstring log_filename; + PathService::Get(base::DIR_EXE, &log_filename); + file_util::AppendToPath(&log_filename, kLogFileName); + logging::InitLogging(log_filename.c_str(), + destination, + logging::LOCK_LOG_FILE, + logging::DELETE_OLD_LOG_FILE); + + icu_util::Initialize(); + + NormalizeResult result = NormalizeFile(UTF8ToWide(argv[1]), + UTF8ToWide(argv[2])); + if (result != kSuccess) { + fwprintf(stderr, L"Errors or warnings processing file. See log in %s.", + kLogFileName); + } + + if (result == kError) + return 1; + return 0; +} |