diff options
author | nyquist@chromium.org <nyquist@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-05-13 18:12:48 +0000 |
---|---|---|
committer | nyquist@chromium.org <nyquist@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-05-13 18:12:48 +0000 |
commit | 1e690d65afc4f616671ddbabbd2f47dce45c514d (patch) | |
tree | f4312c7ca39b90b6647b1c6089c37aab74efb7ab /net/tools | |
parent | c5cba4cb2c4eceda230f8e6a07121285725bff3a (diff) | |
download | chromium_src-1e690d65afc4f616671ddbabbd2f47dce45c514d.zip chromium_src-1e690d65afc4f616671ddbabbd2f47dce45c514d.tar.gz chromium_src-1e690d65afc4f616671ddbabbd2f47dce45c514d.tar.bz2 |
Add support for split Public Suffix List distinctions.
This adds support for the private additions to the Public Suffix List.
* Since net::RegistryControlledDomainService only contained static methods, this
CL changes these methods to be contained within the namespace
net::registry_controlled_domains and removes the class entirely.
* All methods defined as part of net::registry_controlled_domains now
have a mandatory argument to specify whether the private registries
should be included.
* Since the old implementation did not take into account the private
registries, this sets all old callers to use EXCLUDE_PRIVATE as the
net::registry_controlled_domains::PrivateRegistryFilter argument.
* Changes the parameter for including unknown registries or not to be an enum
instead of a boolean, using a similar naming scheme as for the private
registries: net::registry_controlled_domains::UnknownRegistryFilter.
* This also updates the effective-TLD data file to:
45cfff9c781f 2013-04-23 11:51 +0100
It includes changes from a number of Mozilla bugs, listed on
https://hg.mozilla.org/mozilla-central/log/45cfff9c781f/netwerk/dns/effective_tld_names.dat
between 290afd57d2a8 (2012-07-04 16:08 +0100) and
45cfff9c781f (2013-04-23 11:51 +0100).
BUG=37436,96086
R=brettw@chromium.org, erikwright@chromium.org, pam@chromium.org, rsleevi@chromium.org, sky@chromium.org
Review URL: https://codereview.chromium.org/13979002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@199771 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/tools')
-rw-r--r-- | net/tools/tld_cleanup/README | 15 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup.cc | 238 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup.gyp | 7 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup_util.cc | 251 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup_util.h | 48 | ||||
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup_util_unittest.cc | 168 |
6 files changed, 490 insertions, 237 deletions
diff --git a/net/tools/tld_cleanup/README b/net/tools/tld_cleanup/README index adaac7e..7b468b5 100644 --- a/net/tools/tld_cleanup/README +++ b/net/tools/tld_cleanup/README @@ -4,18 +4,27 @@ When updating src/net/base/registry_controlled_domains/effective_tld_names.dat: http://goo.gl/Ji2bB 2. Remove whitespace from the ends of the lines. + You could possibly use something like: + sed -i -e "s/\s*$//g" \ + src/net/base/registry_controlled_domains/effective_tld_names.dat -3. Add the Chromium note back in. +3. Add the Chromium note back in just after the license at the top, and just + before '===BEGIN ICANN DOMAINS==='. Ensure there is an empty line above and + two empty lines below the note. The note should say: +// Chromium note: this is based on Mozilla's file: +// http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1 -4. Build tld_cleanup.exe (the "(net)" > "tld_cleanup" project) +4. Build tld_cleanup (the "(net)" > "tld_cleanup" project) 5. Run it (no arguments needed), typically from src/build/Release or src/build/Debug. It will re-generate src/net/base/registry_controlled_domains/effective_tld_names.gperf. 6. Run gperf on the new effective_tld_names.gperf: + pushd src/net/base/registry_controlled_domains; gperf -a -L "C++" -C -c -o -t -k '*' -NFindDomain -D -m 5 \ - effective_tld_names.gperf > effective_tld_names.cc + effective_tld_names.gperf > effective_tld_names.cc; + popd; It will produce a new effective_tld_names.cc. 7. Check in the updated effective_tld_names.dat, effective_tld_names.gperf, diff --git a/net/tools/tld_cleanup/tld_cleanup.cc b/net/tools/tld_cleanup/tld_cleanup.cc index 1162d98..485bece 100644 --- a/net/tools/tld_cleanup/tld_cleanup.cc +++ b/net/tools/tld_cleanup/tld_cleanup.cc @@ -21,243 +21,18 @@ // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. // * Canonicalizes each rule's domain by converting it to a GURL and back. // * Adds explicit rules for true TLDs found in any rule. -// TODO(pamg): Remove this comment when http://crbug.com/96086 is fixed. -// * Ignores any entries in the file between "// ===BEGIN PRIVATE DOMAINS===" -// and "// ===END PRIVATE DOMAINS===". - -#include <map> -#include <set> -#include <string> +// * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS===" +// and "// ===END PRIVATE DOMAINS===" as private. #include "base/at_exit.h" #include "base/command_line.h" #include "base/file_util.h" -#include "base/file_util.h" #include "base/files/file_path.h" #include "base/i18n/icu_util.h" #include "base/logging.h" #include "base/path_service.h" #include "base/process_util.h" -#include "base/string_util.h" -#include "googleurl/src/gurl.h" -#include "googleurl/src/url_parse.h" - -namespace { -struct Rule { - bool exception; - bool wildcard; -}; - -typedef std::map<std::string, Rule> RuleMap; -typedef std::set<std::string> RuleSet; - -const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS==="; -const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS==="; -} - -// Writes the list of domain rules contained in the 'rules' set to the -// 'outfile', with each rule terminated by a LF. The file must already have -// been created with write access. -bool WriteRules(const RuleMap& rules, const base::FilePath& outfile) { - std::string data; - data.append( -"%{\n" -"// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n" -"// Use of this source code is governed by a BSD-style license that can be\n" -"// found in the LICENSE file.\n\n" -"// This file is generated by net/tools/tld_cleanup/.\n" -"// DO NOT MANUALLY EDIT!\n" -"%}\n" -"struct DomainRule {\n" -" const char *name;\n" -" int type; // 1: exception, 2: wildcard\n" -"};\n" -"%%\n" - ); - - for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) { - data.append(i->first); - data.append(", "); - if (i->second.exception) { - data.append("1"); - } else if (i->second.wildcard) { - data.append("2"); - } else { - data.append("0"); - } - data.append("\n"); - } - - data.append("%%\n"); - - int written = file_util::WriteFile(outfile, data.data(), data.size()); - - return written == static_cast<int>(data.size()); -} - -// These result codes should be in increasing order of severity. -typedef enum { - kSuccess, - kWarning, - kError, -} NormalizeResult; - -// Adjusts the rule to a standard form: removes single extraneous dots and -// canonicalizes it using GURL. Returns kSuccess if the rule is interpreted as -// valid; logs a warning and returns kWarning if it is probably invalid; and -// logs an error and returns kError if the rule is (almost) certainly invalid. -NormalizeResult NormalizeRule(std::string* domain, Rule* rule) { - NormalizeResult result = kSuccess; - - // Strip single leading and trailing dots. - if (domain->at(0) == '.') - domain->erase(0, 1); - if (domain->empty()) { - LOG(WARNING) << "Ignoring empty rule"; - return kWarning; - } - if (domain->at(domain->size() - 1) == '.') - domain->erase(domain->size() - 1, 1); - if (domain->empty()) { - LOG(WARNING) << "Ignoring empty rule"; - return kWarning; - } - - // Allow single leading '*.' or '!', saved here so it's not canonicalized. - size_t start_offset = 0; - if (domain->at(0) == '!') { - domain->erase(0, 1); - rule->exception = true; - } else if (domain->find("*.") == 0) { - domain->erase(0, 2); - rule->wildcard = true; - } - if (domain->empty()) { - LOG(WARNING) << "Ignoring empty rule"; - return kWarning; - } - - // Warn about additional '*.' or '!'. - if (domain->find("*.", start_offset) != std::string::npos || - domain->find('!', start_offset) != std::string::npos) { - LOG(WARNING) << "Keeping probably invalid rule: " << *domain; - result = kWarning; - } - - // Make a GURL and normalize it, then get the host back out. - std::string url = "http://"; - url.append(*domain); - GURL gurl(url); - const std::string& spec = gurl.possibly_invalid_spec(); - url_parse::Component host = gurl.parsed_for_possibly_invalid_spec().host; - if (host.len < 0) { - LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *domain; - return kError; - } - if (!gurl.is_valid()) { - LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *domain; - result = kWarning; - } - domain->assign(spec.substr(host.begin, host.len)); - - return result; -} - -// Loads the file described by 'in_filename', converts it to the desired format -// (see the file comments above), and saves it into 'out_filename'. Returns -// the most severe of the result codes encountered when normalizing the rules. -NormalizeResult NormalizeFile(const base::FilePath& in_filename, - const base::FilePath& out_filename) { - std::string data; - if (!file_util::ReadFileToString(in_filename, &data)) { - LOG(ERROR) << "Unable to read file"; - // We return success since we've already reported the error. - return kSuccess; - } - - // We do a lot of string assignment during parsing, but simplicity is more - // important than performance here. - std::string domain; - NormalizeResult result = kSuccess; - size_t line_start = 0; - size_t line_end = 0; - RuleMap rules; - RuleSet extra_rules; - int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1; - while (line_start < data.size()) { - // Skip the entire section of private domains. - // TODO(pamg): remove this when http://crbug.com/96086 is fixed. - if (line_start + begin_private_length < data.size() && - !data.compare(line_start, begin_private_length, - kBeginPrivateDomainsComment)) { - line_end = data.find(kEndPrivateDomainsComment, line_start); - if (line_end == std::string::npos) { - LOG(WARNING) << "Private-domain section had no end marker."; - line_end = data.size(); - } - } else if (line_start + 1 < data.size() && - data[line_start] == '/' && - data[line_start + 1] == '/') { - // Skip comments. - line_end = data.find_first_of("\r\n", line_start); - if (line_end == std::string::npos) - line_end = data.size(); - } else { - // Truncate at first whitespace. - line_end = data.find_first_of("\r\n \t", line_start); - if (line_end == std::string::npos) - line_end = data.size(); - domain.assign(data.data(), line_start, line_end - line_start); - - Rule rule; - rule.wildcard = false; - rule.exception = false; - NormalizeResult new_result = NormalizeRule(&domain, &rule); - if (new_result != kError) { - // Check the existing rules to make sure we don't have an exception and - // wildcard for the same rule. If we did, we'd have to update our - // parsing code to handle this case. - CHECK(rules.find(domain) == rules.end()); - - rules[domain] = rule; - // Add true TLD for multi-level rules. We don't add them right now, in - // case there's an exception or wild card that either exists or might be - // added in a later iteration. In those cases, there's no need to add - // it and it would just slow down parsing the data. - size_t tld_start = domain.find_last_of('.'); - if (tld_start != std::string::npos && tld_start + 1 < domain.size()) - extra_rules.insert(domain.substr(tld_start + 1)); - } - result = std::max(result, new_result); - } - - // Find beginning of next non-empty line. - line_start = data.find_first_of("\r\n", line_end); - if (line_start == std::string::npos) - line_start = data.size(); - line_start = data.find_first_not_of("\r\n", line_start); - if (line_start == std::string::npos) - line_start = data.size(); - } - - for (RuleSet::const_iterator iter = extra_rules.begin(); - iter != extra_rules.end(); - ++iter) { - if (rules.find(*iter) == rules.end()) { - Rule rule; - rule.exception = false; - rule.wildcard = false; - rules[*iter] = rule; - } - } - - if (!WriteRules(rules, out_filename)) { - LOG(ERROR) << "Error(s) writing output file"; - result = kError; - } - - return result; -} +#include "net/tools/tld_cleanup/tld_cleanup_util.h" int main(int argc, const char* argv[]) { base::EnableTerminationOnHeapCorruption(); @@ -307,13 +82,14 @@ int main(int argc, const char* argv[]) { "registry_controlled_domains")) .Append(FILE_PATH_LITERAL( "effective_tld_names.gperf")); - NormalizeResult result = NormalizeFile(input_file, output_file); - if (result != kSuccess) { + net::tld_cleanup::NormalizeResult result = + net::tld_cleanup::NormalizeFile(input_file, output_file); + if (result != net::tld_cleanup::kSuccess) { fprintf(stderr, "Errors or warnings processing file. See log in tld_cleanup.log."); } - if (result == kError) + if (result == net::tld_cleanup::kError) return 1; return 0; } diff --git a/net/tools/tld_cleanup/tld_cleanup.gyp b/net/tools/tld_cleanup/tld_cleanup.gyp index 245df98..227022c 100644 --- a/net/tools/tld_cleanup/tld_cleanup.gyp +++ b/net/tools/tld_cleanup/tld_cleanup.gyp @@ -8,14 +8,15 @@ }, 'targets': [ { - 'target_name': 'tld_cleanup', - 'type': 'executable', + 'target_name': 'tld_cleanup_util', + 'type': 'static_library', 'dependencies': [ '../../../base/base.gyp:base', '../../../build/temp_gyp/googleurl.gyp:googleurl', ], 'sources': [ - 'tld_cleanup.cc', + 'tld_cleanup_util.h', + 'tld_cleanup_util.cc', ], }, ], diff --git a/net/tools/tld_cleanup/tld_cleanup_util.cc b/net/tools/tld_cleanup/tld_cleanup_util.cc new file mode 100644 index 0000000..2f5496e --- /dev/null +++ b/net/tools/tld_cleanup/tld_cleanup_util.cc @@ -0,0 +1,251 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/tld_cleanup/tld_cleanup_util.h" + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_parse.h" + +namespace { + +const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS==="; +const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS==="; +} + +namespace net { +namespace tld_cleanup { + +// Writes the list of domain rules contained in the 'rules' set to the +// 'outfile', with each rule terminated by a LF. The file must already have +// been created with write access. +bool WriteRules(const RuleMap& rules, const base::FilePath& outfile) { + std::string data; + data.append( +"%{\n" +"// Copyright 2012 The Chromium Authors. All rights reserved.\n" +"// Use of this source code is governed by a BSD-style license that can be\n" +"// found in the LICENSE file.\n\n" +"// This file is generated by net/tools/tld_cleanup/.\n" +"// DO NOT MANUALLY EDIT!\n" +"%}\n" +"struct DomainRule {\n" +" const char *name;\n" +" int type; // 1: exception, 2: wildcard\n" +" bool is_private;\n" +"};\n" +"%%\n" + ); + + for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) { + data.append(i->first); + data.append(", "); + if (i->second.exception) { + data.append("1"); + } else if (i->second.wildcard) { + data.append("2"); + } else { + data.append("0"); + } + if (i->second.is_private) { + data.append(", true"); + } else { + data.append(", false"); + } + data.append("\n"); + } + + data.append("%%\n"); + + int written = file_util::WriteFile(outfile, data.data(), data.size()); + + return written == static_cast<int>(data.size()); +} + +// Adjusts the rule to a standard form: removes single extraneous dots and +// canonicalizes it using GURL. Returns kSuccess if the rule is interpreted as +// valid; logs a warning and returns kWarning if it is probably invalid; and +// logs an error and returns kError if the rule is (almost) certainly invalid. +NormalizeResult NormalizeRule(std::string* domain, Rule* rule) { + NormalizeResult result = kSuccess; + + // Strip single leading and trailing dots. + if (domain->at(0) == '.') + domain->erase(0, 1); + if (domain->empty()) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + if (domain->at(domain->size() - 1) == '.') + domain->erase(domain->size() - 1, 1); + if (domain->empty()) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + + // Allow single leading '*.' or '!', saved here so it's not canonicalized. + size_t start_offset = 0; + if (domain->at(0) == '!') { + domain->erase(0, 1); + rule->exception = true; + } else if (domain->find("*.") == 0) { + domain->erase(0, 2); + rule->wildcard = true; + } + if (domain->empty()) { + LOG(WARNING) << "Ignoring empty rule"; + return kWarning; + } + + // Warn about additional '*.' or '!'. + if (domain->find("*.", start_offset) != std::string::npos || + domain->find('!', start_offset) != std::string::npos) { + LOG(WARNING) << "Keeping probably invalid rule: " << *domain; + result = kWarning; + } + + // Make a GURL and normalize it, then get the host back out. + std::string url = "http://"; + url.append(*domain); + GURL gurl(url); + const std::string& spec = gurl.possibly_invalid_spec(); + url_parse::Component host = gurl.parsed_for_possibly_invalid_spec().host; + if (host.len < 0) { + LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *domain; + return kError; + } + if (!gurl.is_valid()) { + LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *domain; + result = kWarning; + } + domain->assign(spec.substr(host.begin, host.len)); + + return result; +} + +NormalizeResult NormalizeDataToRuleMap(const std::string data, + RuleMap* rules) { + CHECK(rules); + // We do a lot of string assignment during parsing, but simplicity is more + // important than performance here. + std::string domain; + NormalizeResult result = kSuccess; + size_t line_start = 0; + size_t line_end = 0; + bool is_private = false; + RuleMap extra_rules; + int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1; + int end_private_length = arraysize(kEndPrivateDomainsComment) - 1; + while (line_start < data.size()) { + if (line_start + begin_private_length < data.size() && + !data.compare(line_start, begin_private_length, + kBeginPrivateDomainsComment)) { + is_private = true; + line_end = line_start + begin_private_length; + } else if (line_start + end_private_length < data.size() && + !data.compare(line_start, end_private_length, + kEndPrivateDomainsComment)) { + is_private = false; + line_end = line_start + end_private_length; + } else if (line_start + 1 < data.size() && + data[line_start] == '/' && + data[line_start + 1] == '/') { + // Skip comments. + line_end = data.find_first_of("\r\n", line_start); + if (line_end == std::string::npos) + line_end = data.size(); + } else { + // Truncate at first whitespace. + line_end = data.find_first_of("\r\n \t", line_start); + if (line_end == std::string::npos) + line_end = data.size(); + domain.assign(data.data(), line_start, line_end - line_start); + + Rule rule; + rule.wildcard = false; + rule.exception = false; + rule.is_private = is_private; + NormalizeResult new_result = NormalizeRule(&domain, &rule); + if (new_result != kError) { + // Check the existing rules to make sure we don't have an exception and + // wildcard for the same rule, or that the same domain is listed as both + // private and not private. If we did, we'd have to update our + // parsing code to handle this case. + CHECK(rules->find(domain) == rules->end()); + + (*rules)[domain] = rule; + // Add true TLD for multi-level rules. We don't add them right now, in + // case there's an exception or wild card that either exists or might be + // added in a later iteration. In those cases, there's no need to add + // it and it would just slow down parsing the data. + size_t tld_start = domain.find_last_of('.'); + if (tld_start != std::string::npos && tld_start + 1 < domain.size()) { + std::string extra_rule_domain = domain.substr(tld_start + 1); + RuleMap::const_iterator iter = extra_rules.find(extra_rule_domain); + Rule extra_rule; + extra_rule.exception = false; + extra_rule.wildcard = false; + if (iter == extra_rules.end()) { + extra_rule.is_private = is_private; + } else { + // A rule already exists, so we ensure that if any of the entries is + // not private the result should be that the entry is not private. + // An example is .au which is not listed as a real TLD, but only + // lists second-level domains such as com.au. Subdomains of .au + // (eg. blogspot.com.au) are also listed in the private section, + // which is processed later, so this ensures that the real TLD + // (eg. .au) is listed as public. + extra_rule.is_private = is_private && iter->second.is_private; + } + extra_rules[extra_rule_domain] = extra_rule; + } + } + result = std::max(result, new_result); + } + + // Find beginning of next non-empty line. + line_start = data.find_first_of("\r\n", line_end); + if (line_start == std::string::npos) + line_start = data.size(); + line_start = data.find_first_not_of("\r\n", line_start); + if (line_start == std::string::npos) + line_start = data.size(); + } + + for (RuleMap::const_iterator iter = extra_rules.begin(); + iter != extra_rules.end(); + ++iter) { + if (rules->find(iter->first) == rules->end()) { + (*rules)[iter->first] = iter->second; + } + } + + return result; +} + +NormalizeResult NormalizeFile(const base::FilePath& in_filename, + const base::FilePath& out_filename) { + RuleMap rules; + std::string data; + if (!file_util::ReadFileToString(in_filename, &data)) { + LOG(ERROR) << "Unable to read file"; + // We return success since we've already reported the error. + return kSuccess; + } + + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + + if (!WriteRules(rules, out_filename)) { + LOG(ERROR) << "Error(s) writing output file"; + result = kError; + } + + return result; +} + + +} // namespace tld_cleanup +} // namespace net diff --git a/net/tools/tld_cleanup/tld_cleanup_util.h b/net/tools/tld_cleanup/tld_cleanup_util.h new file mode 100644 index 0000000..5900206 --- /dev/null +++ b/net/tools/tld_cleanup/tld_cleanup_util.h @@ -0,0 +1,48 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_TLD_CLEANUP_TLD_CLEANUP_UTIL_H_ +#define NET_TOOLS_TLD_CLEANUP_TLD_CLEANUP_UTIL_H_ + +#include <map> +#include <string> + +namespace base { +class FilePath; +} // namespace base + +namespace net { +namespace tld_cleanup { + +struct Rule { + bool exception; + bool wildcard; + bool is_private; +}; + +typedef std::map<std::string, Rule> RuleMap; + +// These result codes should be in increasing order of severity. +typedef enum { + kSuccess, + kWarning, + kError, +} NormalizeResult; + +// Loads the file described by |in_filename|, converts it to the desired format +// (see the file comments in tld_cleanup.cc), and saves it into |out_filename|. +// Returns the most severe of the result codes encountered when normalizing the +// rules. +NormalizeResult NormalizeFile(const base::FilePath& in_filename, + const base::FilePath& out_filename); + +// Parses |data|, and converts it to the internal data format RuleMap. Returns +// the most severe of the result codes encountered when normalizing the rules. +NormalizeResult NormalizeDataToRuleMap(const std::string data, + RuleMap* rules); + +} // namespace tld_cleanup +} // namespace net + +#endif // NET_TOOLS_TLD_CLEANUP_TLD_CLEANUP_UTIL_H_ diff --git a/net/tools/tld_cleanup/tld_cleanup_util_unittest.cc b/net/tools/tld_cleanup/tld_cleanup_util_unittest.cc new file mode 100644 index 0000000..6b1d02a --- /dev/null +++ b/net/tools/tld_cleanup/tld_cleanup_util_unittest.cc @@ -0,0 +1,168 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/tld_cleanup/tld_cleanup_util.h" + +#include "base/files/file_path.h" +#include "base/path_service.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace net { +namespace tld_cleanup { + +std::string SetupData(std::string icann_domains, std::string private_domains) { + return "// ===BEGIN ICANN DOMAINS===\n" + + icann_domains + + "// ===END ICANN DOMAINS===\n" + + "// ===BEGIN PRIVATE DOMAINS===\n" + + private_domains + + "// ===END PRIVATE DOMAINS===\n"; +} + +TEST(TldCleanupUtilTest, TwoRealTldsSuccessfullyRead) { + std::string icann_domains = "foo\n" + "bar\n"; + std::string private_domains = ""; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(2U, rules.size()); + RuleMap::const_iterator foo_iter = rules.find("foo"); + ASSERT_FALSE(rules.end() == foo_iter); + EXPECT_FALSE(foo_iter->second.wildcard); + EXPECT_FALSE(foo_iter->second.exception); + EXPECT_FALSE(foo_iter->second.is_private); + RuleMap::const_iterator bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_FALSE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); +} + +TEST(TldCleanupUtilTest, RealTldAutomaticallyAddedForSubdomain) { + std::string icann_domains = "foo.bar\n"; + std::string private_domains = ""; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(2U, rules.size()); + RuleMap::const_iterator foo_bar_iter = rules.find("foo.bar"); + ASSERT_FALSE(rules.end() == foo_bar_iter); + EXPECT_FALSE(foo_bar_iter->second.wildcard); + EXPECT_FALSE(foo_bar_iter->second.exception); + EXPECT_FALSE(foo_bar_iter->second.is_private); + RuleMap::const_iterator bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_FALSE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); +} + +TEST(TldCleanupUtilTest, PrivateTldMarkedAsPrivate) { + std::string icann_domains = "foo\n" + "bar\n"; + std::string private_domains = "baz\n"; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(3U, rules.size()); + RuleMap::const_iterator foo_iter = rules.find("foo"); + ASSERT_FALSE(rules.end() == foo_iter); + EXPECT_FALSE(foo_iter->second.wildcard); + EXPECT_FALSE(foo_iter->second.exception); + EXPECT_FALSE(foo_iter->second.is_private); + RuleMap::const_iterator bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_FALSE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); + RuleMap::const_iterator baz_iter = rules.find("baz"); + ASSERT_FALSE(rules.end() == baz_iter); + EXPECT_FALSE(baz_iter->second.wildcard); + EXPECT_FALSE(baz_iter->second.exception); + EXPECT_TRUE(baz_iter->second.is_private); +} + +TEST(TldCleanupUtilTest, PrivateDomainMarkedAsPrivate) { + std::string icann_domains = "bar\n"; + std::string private_domains = "foo.bar\n"; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(2U, rules.size()); + RuleMap::const_iterator bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_FALSE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); + RuleMap::const_iterator foo_bar_iter = rules.find("foo.bar"); + ASSERT_FALSE(rules.end() == foo_bar_iter); + EXPECT_FALSE(foo_bar_iter->second.wildcard); + EXPECT_FALSE(foo_bar_iter->second.exception); + EXPECT_TRUE(foo_bar_iter->second.is_private); +} + +TEST(TldCleanupUtilTest, ExtraTldRuleIsNotMarkedPrivate) { + std::string icann_domains = "foo.bar\n" + "baz.bar\n"; + std::string private_domains = "qux.bar\n"; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(4U, rules.size()); + RuleMap::const_iterator foo_bar_iter = rules.find("foo.bar"); + ASSERT_FALSE(rules.end() == foo_bar_iter); + EXPECT_FALSE(foo_bar_iter->second.wildcard); + EXPECT_FALSE(foo_bar_iter->second.exception); + EXPECT_FALSE(foo_bar_iter->second.is_private); + RuleMap::const_iterator baz_bar_iter = rules.find("baz.bar"); + ASSERT_FALSE(rules.end() == baz_bar_iter); + EXPECT_FALSE(baz_bar_iter->second.wildcard); + EXPECT_FALSE(baz_bar_iter->second.exception); + EXPECT_FALSE(baz_bar_iter->second.is_private); + RuleMap::const_iterator bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_FALSE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); + RuleMap::const_iterator qux_bar_iter = rules.find("qux.bar"); + ASSERT_FALSE(rules.end() == qux_bar_iter); + EXPECT_FALSE(qux_bar_iter->second.wildcard); + EXPECT_FALSE(qux_bar_iter->second.exception); + EXPECT_TRUE(qux_bar_iter->second.is_private); +} + +TEST(TldCleanupUtilTest, WildcardAndExceptionParsedCorrectly) { + std::string icann_domains = "*.bar\n" + "!foo.bar\n"; + std::string private_domains = "!baz.bar\n"; + std::string data = SetupData(icann_domains, private_domains); + RuleMap rules; + NormalizeResult result = NormalizeDataToRuleMap(data, &rules); + ASSERT_EQ(kSuccess, result); + ASSERT_EQ(3U, rules.size()); + RuleMap::const_iterator foo_bar_iter = rules.find("bar"); + ASSERT_FALSE(rules.end() == foo_bar_iter); + EXPECT_TRUE(foo_bar_iter->second.wildcard); + EXPECT_FALSE(foo_bar_iter->second.exception); + EXPECT_FALSE(foo_bar_iter->second.is_private); + RuleMap::const_iterator bar_iter = rules.find("foo.bar"); + ASSERT_FALSE(rules.end() == bar_iter); + EXPECT_FALSE(bar_iter->second.wildcard); + EXPECT_TRUE(bar_iter->second.exception); + EXPECT_FALSE(bar_iter->second.is_private); + RuleMap::const_iterator baz_bar_iter = rules.find("baz.bar"); + ASSERT_FALSE(rules.end() == baz_bar_iter); + EXPECT_FALSE(baz_bar_iter->second.wildcard); + EXPECT_TRUE(baz_bar_iter->second.exception); + EXPECT_TRUE(baz_bar_iter->second.is_private); +} + +} // namespace tld_cleanup +} // namespace net |