From b427d6526c0d81c921592e8beb0ac18b2e75e8db Mon Sep 17 00:00:00 2001 From: "pam@chromium.org" Date: Wed, 18 Jul 2012 14:24:28 +0000 Subject: Update effective-TLD data files using the latest data from Mozilla, ignoring all private domains. Private domains are now ignored by the tld_cleanup.cc preprocessor due to bug 96086 (also see https://bugzilla.mozilla.org/show_bug.cgi?id=687165), in order to allow users to navigate to the "TLDs" directly. This data file is Mozilla's changeset 290afd57d2a8, from 2012-07-04 16:08 +0100. It includes changes (some ignored, as they involved private domains) from a number of Mozilla bugs, listed on http://hg.mozilla.org/mozilla-central/filelog/ba8463beab13/netwerk/dns/effective_tld_names.dat between 9411dffc948b (2011-09-02 14:08 -0400) and 290afd57d2a8 (2012-07-04 16:08 +0100). BUG=37436, 96086 TEST=none Review URL: https://chromiumcodereview.appspot.com/10789035 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@147236 0039d316-1c4b-4281-b951-d872f2087c98 --- net/tools/tld_cleanup/tld_cleanup.cc | 49 ++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'net/tools') diff --git a/net/tools/tld_cleanup/tld_cleanup.cc b/net/tools/tld_cleanup/tld_cleanup.cc index d6e42e4..2094a37 100644 --- a/net/tools/tld_cleanup/tld_cleanup.cc +++ b/net/tools/tld_cleanup/tld_cleanup.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,7 +8,7 @@ // generate a perfect hash map. The benefit of this approach is that no time is // spent on program initialization to generate the map of this data. // -// Running this program finds "effective_tld_names.cc" in the expected location +// Running this program finds "effective_tld_names.dat" in the expected location // in the source checkout and generates "effective_tld_names.gperf" next to it. // // Any errors or warnings from this program are recorded in tld_cleanup.log. @@ -21,6 +21,9 @@ // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. // * Canonicalizes each rule's domain by converting it to a GURL and back. // * Adds explicit rules for true TLDs found in any rule. +// TODO(pamg): Remove this comment when http://crbug.com/96086 is fixed. +// * Ignores any entries in the file between "// ===BEGIN PRIVATE DOMAINS===" +// and "// ===END PRIVATE DOMAINS===". #include #include @@ -47,6 +50,9 @@ struct Rule { typedef std::map RuleMap; typedef std::set RuleSet; + +const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS==="; +const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS==="; } // Writes the list of domain rules contained in the 'rules' set to the @@ -55,18 +61,18 @@ typedef std::set RuleSet; bool WriteRules(const RuleMap& rules, const FilePath& outfile) { std::string data; data.append( - "%{\n" - "// Copyright (c) 2009 The Chromium Authors. All rights reserved.\n" - "// Use of this source code is governed by a BSD-style license that\n" - "// can be found in the LICENSE file.\n\n" - "// This file is generated by net/tools/tld_cleanup/.\n" - "// DO NOT MANUALLY EDIT!\n" - "%}\n" - "struct DomainRule {\n" - " const char *name;\n" - " int type; // 1: exception, 2: wildcard\n" - "};\n" - "%%\n" +"%{\n" +"// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n" +"// Use of this source code is governed by a BSD-style license that can be\n" +"// found in the LICENSE file.\n\n" +"// This file is generated by net/tools/tld_cleanup/.\n" +"// DO NOT MANUALLY EDIT!\n" +"%}\n" +"struct DomainRule {\n" +" const char *name;\n" +" int type; // 1: exception, 2: wildcard\n" +"};\n" +"%%\n" ); for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) { @@ -177,11 +183,22 @@ NormalizeResult NormalizeFile(const FilePath& in_filename, size_t line_end = 0; RuleMap rules; RuleSet extra_rules; + int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1; while (line_start < data.size()) { - // Skip comments. - if (line_start + 1 < data.size() && + // Skip the entire section of private domains. + // TODO(pamg): remove this when http://crbug.com/96086 is fixed. + if (line_start + begin_private_length < data.size() && + !data.compare(line_start, begin_private_length, + kBeginPrivateDomainsComment)) { + line_end = data.find(kEndPrivateDomainsComment, line_start); + if (line_end == std::string::npos) { + LOG(WARNING) << "Private-domain section had no end marker."; + line_end = data.size(); + } + } else if (line_start + 1 < data.size() && data[line_start] == '/' && data[line_start + 1] == '/') { + // Skip comments. line_end = data.find_first_of("\r\n", line_start); if (line_end == std::string::npos) line_end = data.size(); -- cgit v1.1