diff options
author | pam@chromium.org <pam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-07-18 14:24:28 +0000 |
---|---|---|
committer | pam@chromium.org <pam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-07-18 14:24:28 +0000 |
commit | b427d6526c0d81c921592e8beb0ac18b2e75e8db (patch) | |
tree | 3aef065f5912a2ecf2ff39dd851cb6cfc0d60af3 /net/tools | |
parent | fa61980a4da46415b69292508cdd4ab5e10b15be (diff) | |
download | chromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.zip chromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.tar.gz chromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.tar.bz2 |
Update effective-TLD data files using the latest data from Mozilla, ignoring all private domains.
Private domains are now ignored by the tld_cleanup.cc preprocessor due to bug 96086 (also see https://bugzilla.mozilla.org/show_bug.cgi?id=687165), in order to allow users to navigate to the "TLDs" directly.
This data file is Mozilla's changeset 290afd57d2a8, from 2012-07-04 16:08 +0100.
It includes changes (some ignored, as they involved private domains) from a number of Mozilla bugs, listed on http://hg.mozilla.org/mozilla-central/filelog/ba8463beab13/netwerk/dns/effective_tld_names.dat between 9411dffc948b (2011-09-02 14:08 -0400) and 290afd57d2a8 (2012-07-04 16:08 +0100).
BUG=37436, 96086
TEST=none
Review URL: https://chromiumcodereview.appspot.com/10789035
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@147236 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/tools')
-rw-r--r-- | net/tools/tld_cleanup/tld_cleanup.cc | 49 |
1 files changed, 33 insertions, 16 deletions
diff --git a/net/tools/tld_cleanup/tld_cleanup.cc b/net/tools/tld_cleanup/tld_cleanup.cc index d6e42e4..2094a37 100644 --- a/net/tools/tld_cleanup/tld_cleanup.cc +++ b/net/tools/tld_cleanup/tld_cleanup.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,7 +8,7 @@ // generate a perfect hash map. The benefit of this approach is that no time is // spent on program initialization to generate the map of this data. // -// Running this program finds "effective_tld_names.cc" in the expected location +// Running this program finds "effective_tld_names.dat" in the expected location // in the source checkout and generates "effective_tld_names.gperf" next to it. // // Any errors or warnings from this program are recorded in tld_cleanup.log. @@ -21,6 +21,9 @@ // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. // * Canonicalizes each rule's domain by converting it to a GURL and back. // * Adds explicit rules for true TLDs found in any rule. +// TODO(pamg): Remove this comment when http://crbug.com/96086 is fixed. +// * Ignores any entries in the file between "// ===BEGIN PRIVATE DOMAINS===" +// and "// ===END PRIVATE DOMAINS===". #include <map> #include <set> @@ -47,6 +50,9 @@ struct Rule { typedef std::map<std::string, Rule> RuleMap; typedef std::set<std::string> RuleSet; + +const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS==="; +const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS==="; } // Writes the list of domain rules contained in the 'rules' set to the @@ -55,18 +61,18 @@ typedef std::set<std::string> RuleSet; bool WriteRules(const RuleMap& rules, const FilePath& outfile) { std::string data; data.append( - "%{\n" - "// Copyright (c) 2009 The Chromium Authors. All rights reserved.\n" - "// Use of this source code is governed by a BSD-style license that\n" - "// can be found in the LICENSE file.\n\n" - "// This file is generated by net/tools/tld_cleanup/.\n" - "// DO NOT MANUALLY EDIT!\n" - "%}\n" - "struct DomainRule {\n" - " const char *name;\n" - " int type; // 1: exception, 2: wildcard\n" - "};\n" - "%%\n" +"%{\n" +"// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n" +"// Use of this source code is governed by a BSD-style license that can be\n" +"// found in the LICENSE file.\n\n" +"// This file is generated by net/tools/tld_cleanup/.\n" +"// DO NOT MANUALLY EDIT!\n" +"%}\n" +"struct DomainRule {\n" +" const char *name;\n" +" int type; // 1: exception, 2: wildcard\n" +"};\n" +"%%\n" ); for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) { @@ -177,11 +183,22 @@ NormalizeResult NormalizeFile(const FilePath& in_filename, size_t line_end = 0; RuleMap rules; RuleSet extra_rules; + int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1; while (line_start < data.size()) { - // Skip comments. - if (line_start + 1 < data.size() && + // Skip the entire section of private domains. + // TODO(pamg): remove this when http://crbug.com/96086 is fixed. + if (line_start + begin_private_length < data.size() && + !data.compare(line_start, begin_private_length, + kBeginPrivateDomainsComment)) { + line_end = data.find(kEndPrivateDomainsComment, line_start); + if (line_end == std::string::npos) { + LOG(WARNING) << "Private-domain section had no end marker."; + line_end = data.size(); + } + } else if (line_start + 1 < data.size() && data[line_start] == '/' && data[line_start + 1] == '/') { + // Skip comments. line_end = data.find_first_of("\r\n", line_start); if (line_end == std::string::npos) line_end = data.size(); |