summaryrefslogtreecommitdiffstats
path: root/net/tools
diff options
context:
space:
mode:
authorpam@chromium.org <pam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-07-18 14:24:28 +0000
committerpam@chromium.org <pam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-07-18 14:24:28 +0000
commitb427d6526c0d81c921592e8beb0ac18b2e75e8db (patch)
tree3aef065f5912a2ecf2ff39dd851cb6cfc0d60af3 /net/tools
parentfa61980a4da46415b69292508cdd4ab5e10b15be (diff)
downloadchromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.zip
chromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.tar.gz
chromium_src-b427d6526c0d81c921592e8beb0ac18b2e75e8db.tar.bz2
Update effective-TLD data files using the latest data from Mozilla, ignoring all private domains.
Private domains are now ignored by the tld_cleanup.cc preprocessor due to bug 96086 (also see https://bugzilla.mozilla.org/show_bug.cgi?id=687165), in order to allow users to navigate to the "TLDs" directly. This data file is Mozilla's changeset 290afd57d2a8, from 2012-07-04 16:08 +0100. It includes changes (some ignored, as they involved private domains) from a number of Mozilla bugs, listed on http://hg.mozilla.org/mozilla-central/filelog/ba8463beab13/netwerk/dns/effective_tld_names.dat between 9411dffc948b (2011-09-02 14:08 -0400) and 290afd57d2a8 (2012-07-04 16:08 +0100). BUG=37436, 96086 TEST=none Review URL: https://chromiumcodereview.appspot.com/10789035 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@147236 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/tools')
-rw-r--r--net/tools/tld_cleanup/tld_cleanup.cc49
1 files changed, 33 insertions, 16 deletions
diff --git a/net/tools/tld_cleanup/tld_cleanup.cc b/net/tools/tld_cleanup/tld_cleanup.cc
index d6e42e4..2094a37 100644
--- a/net/tools/tld_cleanup/tld_cleanup.cc
+++ b/net/tools/tld_cleanup/tld_cleanup.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -8,7 +8,7 @@
// generate a perfect hash map. The benefit of this approach is that no time is
// spent on program initialization to generate the map of this data.
//
-// Running this program finds "effective_tld_names.cc" in the expected location
+// Running this program finds "effective_tld_names.dat" in the expected location
// in the source checkout and generates "effective_tld_names.gperf" next to it.
//
// Any errors or warnings from this program are recorded in tld_cleanup.log.
@@ -21,6 +21,9 @@
// * Logs a warning if GURL reports a rule as invalid, but keeps the rule.
// * Canonicalizes each rule's domain by converting it to a GURL and back.
// * Adds explicit rules for true TLDs found in any rule.
+// TODO(pamg): Remove this comment when http://crbug.com/96086 is fixed.
+// * Ignores any entries in the file between "// ===BEGIN PRIVATE DOMAINS==="
+// and "// ===END PRIVATE DOMAINS===".
#include <map>
#include <set>
@@ -47,6 +50,9 @@ struct Rule {
typedef std::map<std::string, Rule> RuleMap;
typedef std::set<std::string> RuleSet;
+
+const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS===";
+const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS===";
}
// Writes the list of domain rules contained in the 'rules' set to the
@@ -55,18 +61,18 @@ typedef std::set<std::string> RuleSet;
bool WriteRules(const RuleMap& rules, const FilePath& outfile) {
std::string data;
data.append(
- "%{\n"
- "// Copyright (c) 2009 The Chromium Authors. All rights reserved.\n"
- "// Use of this source code is governed by a BSD-style license that\n"
- "// can be found in the LICENSE file.\n\n"
- "// This file is generated by net/tools/tld_cleanup/.\n"
- "// DO NOT MANUALLY EDIT!\n"
- "%}\n"
- "struct DomainRule {\n"
- " const char *name;\n"
- " int type; // 1: exception, 2: wildcard\n"
- "};\n"
- "%%\n"
+"%{\n"
+"// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n"
+"// Use of this source code is governed by a BSD-style license that can be\n"
+"// found in the LICENSE file.\n\n"
+"// This file is generated by net/tools/tld_cleanup/.\n"
+"// DO NOT MANUALLY EDIT!\n"
+"%}\n"
+"struct DomainRule {\n"
+" const char *name;\n"
+" int type; // 1: exception, 2: wildcard\n"
+"};\n"
+"%%\n"
);
for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) {
@@ -177,11 +183,22 @@ NormalizeResult NormalizeFile(const FilePath& in_filename,
size_t line_end = 0;
RuleMap rules;
RuleSet extra_rules;
+ int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1;
while (line_start < data.size()) {
- // Skip comments.
- if (line_start + 1 < data.size() &&
+ // Skip the entire section of private domains.
+ // TODO(pamg): remove this when http://crbug.com/96086 is fixed.
+ if (line_start + begin_private_length < data.size() &&
+ !data.compare(line_start, begin_private_length,
+ kBeginPrivateDomainsComment)) {
+ line_end = data.find(kEndPrivateDomainsComment, line_start);
+ if (line_end == std::string::npos) {
+ LOG(WARNING) << "Private-domain section had no end marker.";
+ line_end = data.size();
+ }
+ } else if (line_start + 1 < data.size() &&
data[line_start] == '/' &&
data[line_start + 1] == '/') {
+ // Skip comments.
line_end = data.find_first_of("\r\n", line_start);
if (line_end == std::string::npos)
line_end = data.size();