summaryrefslogtreecommitdiffstats
path: root/net/base
diff options
context:
space:
mode:
authordeanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2008-11-25 02:17:17 +0000
committerdeanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2008-11-25 02:17:17 +0000
commit8c9526605a46e6b4375b43a824d03ec4876573e0 (patch)
tree9c807faafb005332655611c6633655776e6dc3e4 /net/base
parent329554ffb0aee15099fb09b40be4868633d9ca37 (diff)
downloadchromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.zip
chromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.tar.gz
chromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.tar.bz2
Use StringPiece in the TLD service. This means we don't have to copy the TLD data from the resource into a std::string, we can just always refer into the resource data. This saves ~40k or something meaningless.
Also use StringPiece to replace StringSegment, and other minor cleanup. Review URL: http://codereview.chromium.org/11310 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@5960 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r--net/base/registry_controlled_domain.cc106
-rw-r--r--net/base/registry_controlled_domain.h135
2 files changed, 103 insertions, 138 deletions
diff --git a/net/base/registry_controlled_domain.cc b/net/base/registry_controlled_domain.cc
index f98a3f3..be485ac7 100644
--- a/net/base/registry_controlled_domain.cc
+++ b/net/base/registry_controlled_domain.cc
@@ -168,6 +168,7 @@ std::string RegistryControlledDomainService::GetDomainAndRegistryImpl(
return std::string(); // No registry.
// The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding
// subcomponent length.
+ DCHECK(host.length() >= 2);
if (registry_length > (host.length() - 2)) {
NOTREACHED() <<
"Host does not have at least one subcomponent before registry!";
@@ -206,27 +207,25 @@ size_t RegistryControlledDomainService::GetRegistryLengthImpl(
// Walk up the domain tree, most specific to least specific,
// looking for matches at each level.
- StringSegment match;
size_t prev_start = std::string::npos;
size_t curr_start = host_check_begin;
size_t next_dot = host.find('.', curr_start);
if (next_dot >= host_check_len) // Catches std::string::npos as well.
return 0; // This can't have a registry + domain.
while (1) {
- match.Set(host.data(), curr_start, host_check_len - curr_start);
- DomainMap::iterator iter = domain_map_.find(match);
- if (iter != domain_map_.end()) {
- DomainEntry entry = iter->second;
+ DomainSet::iterator iter = domain_set_.find(
+ DomainEntry(host.data() + curr_start, host_check_len - curr_start));
+ if (iter != domain_set_.end()) {
// Exception rules override wildcard rules when the domain is an exact
// match, but wildcards take precedence when there's a subdomain.
- if (entry.wildcard && (prev_start != std::string::npos)) {
+ if (iter->attributes.wildcard && (prev_start != std::string::npos)) {
// If prev_start == host_check_begin, then the host is the registry
// itself, so return 0.
return (prev_start == host_check_begin) ?
0 : (host.length() - prev_start);
}
- if (entry.exception) {
+ if (iter->attributes.exception) {
if (next_dot == std::string::npos) {
// If we get here, we had an exception rule with no dots (e.g.
// "!foo"). This would only be valid if we had a corresponding
@@ -268,95 +267,66 @@ RegistryControlledDomainService* RegistryControlledDomainService::SetInstance(
return old_instance;
}
-struct RegistryControlledDomainServiceSingletonTraits :
- public DefaultSingletonTraits<RegistryControlledDomainService> {
- static RegistryControlledDomainService* New() {
- RegistryControlledDomainService* instance =
- new RegistryControlledDomainService();
- instance->Init();
- return instance;
- }
-};
-
// static
RegistryControlledDomainService* RegistryControlledDomainService::GetInstance()
{
if (test_instance_)
return test_instance_;
- return Singleton<RegistryControlledDomainService,
- RegistryControlledDomainServiceSingletonTraits>::get();
+ return Singleton<RegistryControlledDomainService>::get();
}
// static
void RegistryControlledDomainService::UseDomainData(const std::string& data) {
RegistryControlledDomainService* instance = GetInstance();
- instance->domain_data_ = data;
- instance->ParseDomainData();
+ instance->copied_domain_data_ = data;
+ instance->ParseDomainData(instance->copied_domain_data_);
}
void RegistryControlledDomainService::Init() {
- domain_data_ = NetModule::GetResource(IDR_EFFECTIVE_TLD_NAMES).as_string();
- if (domain_data_.empty()) {
- // The resource file isn't present for some unit tests, for example. Fall
- // back to a tiny, basic list of rules in that case.
- domain_data_ = kDefaultDomainData;
- }
- ParseDomainData();
+ // The resource file isn't present for some unit tests, for example. Fall
+ // back to a tiny, basic list of rules in that case.
+ StringPiece res_data = NetModule::GetResource(IDR_EFFECTIVE_TLD_NAMES);
+ ParseDomainData(!res_data.empty() ? res_data : kDefaultDomainData);
}
-void RegistryControlledDomainService::ParseDomainData() {
- domain_map_.clear();
+void RegistryControlledDomainService::ParseDomainData(const StringPiece& data) {
+ domain_set_.clear();
- StringSegment rule;
size_t line_end = 0;
size_t line_start = 0;
- while (line_start < domain_data_.size()) {
- line_end = domain_data_.find('\n', line_start);
- if (line_end == std::string::npos)
- line_end = domain_data_.size();
- rule.Set(domain_data_.data(), line_start, line_end - line_start);
- AddRule(&rule);
+ while (line_start < data.size()) {
+ line_end = data.find('\n', line_start);
+ if (line_end == StringPiece::npos)
+ line_end = data.size();
+ AddRule(StringPiece(data.data() + line_start, line_end - line_start));
line_start = line_end + 1;
}
}
-void RegistryControlledDomainService::AddRule(StringSegment* rule) {
- // Determine rule properties.
- size_t property_offset = 0;
- bool exception = false;
- bool wild = false;
+void RegistryControlledDomainService::AddRule(const StringPiece& rule_str) {
+ DomainEntry rule(rule_str.data(), rule_str.size());
// Valid rules may be either wild or exceptions, but not both.
- if (rule->CharAt(0) == '!') {
- exception = true;
- property_offset = 1;
- } else if (rule->CharAt(0) == '*' && rule->CharAt(1) == '.') {
- wild = true;
- property_offset = 2;
+ if (rule.starts_with("!")) {
+ rule.remove_prefix(1);
+ rule.attributes.exception = true;
+ } else if (rule.starts_with("*.")) {
+ rule.remove_prefix(2);
+ rule.attributes.wildcard = true;
}
- // Find or create an entry for this host.
- rule->TrimFromStart(property_offset);
- DomainEntry entry;
- DomainMap::iterator iter = domain_map_.find(*rule);
- if (iter != domain_map_.end())
- entry = iter->second;
-
- entry.exception |= exception;
- entry.wildcard |= wild;
- domain_map_[*rule] = entry;
-}
-
-bool RegistryControlledDomainService::StringSegment::operator<(
- const StringSegment &other) const {
- // If the segments are of equal length, compare their contents; otherwise,
- // the shorter segment is "less than" the longer one.
- if (len_ == other.len_) {
- int comparison = strncmp(data_ + begin_, other.data_ + other.begin_, len_);
- return (comparison < 0);
+ DomainSet::iterator prev_rule = domain_set_.find(rule);
+ if (prev_rule != domain_set_.end()) {
+ // We found a rule with the same domain, combine the attributes.
+ // This could happen for example when a domain is both a wildcard
+ // and an exception (ex *.google.com and !google.com). Sets are immutable,
+ // we'll erase the old one, and insert a new one with the new attributes.
+ rule.attributes.Combine(prev_rule->attributes);
+ domain_set_.erase(prev_rule);
}
- return (len_ < other.len_);
+
+ domain_set_.insert(rule);
}
} // namespace net
diff --git a/net/base/registry_controlled_domain.h b/net/base/registry_controlled_domain.h
index b5ba97f6..60c9f6a 100644
--- a/net/base/registry_controlled_domain.h
+++ b/net/base/registry_controlled_domain.h
@@ -110,13 +110,17 @@
#ifndef NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_
#define NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_
-#include <map>
+#include <set>
#include <string>
#include "base/basictypes.h"
+#include "base/string_piece.h"
class GURL;
+template <typename T>
+struct DefaultSingletonTraits;
+
namespace net {
struct RegistryControlledDomainServiceSingletonTraits;
@@ -197,7 +201,7 @@ class RegistryControlledDomainService {
protected:
// The entire protected API is only for unit testing. I mean it. Don't make
// me come over there!
- RegistryControlledDomainService() { }
+ RegistryControlledDomainService() { Init(); }
// Set the RegistryControledDomainService instance to be used internally.
// |instance| will supersede the singleton instance normally used. If
@@ -207,97 +211,88 @@ class RegistryControlledDomainService {
static RegistryControlledDomainService* SetInstance(
RegistryControlledDomainService* instance);
- // Sets the domain_data_ of the current instance (creating one, if necessary),
- // then parses it.
+ // Sets the copied_domain_data_ of the current instance (creating one,
+ // if necessary), then parses it.
static void UseDomainData(const std::string& data);
private:
// To allow construction of the internal singleton instance.
- friend struct RegistryControlledDomainServiceSingletonTraits;
-
- // Using the StringSegment class, we can compare portions of strings without
- // needing to allocate or copy them.
- class StringSegment {
- public:
- StringSegment() : data_(0), begin_(0), len_(0) { }
- ~StringSegment() { }
-
- void Set(const char* data, size_t begin, size_t len) {
- data_ = data;
- begin_ = begin;
- len_ = len;
- }
+ friend struct DefaultSingletonTraits<RegistryControlledDomainService>;
- // Returns the character at the given offset from the start of the segment,
- // or '\0' if the offset lies outside the segment.
- char CharAt(size_t offset) const {
- return (offset < len_) ? data_[begin_ + offset] : '\0';
- }
+ void Init();
- // Removes a maximum of |trimmed| number of characters, up to the length of
- // the segment, from the start of the StringSegment.
- void TrimFromStart(size_t trimmed) {
- if (trimmed > len_)
- trimmed = len_;
- begin_ += trimmed;
- len_ -= trimmed;
+ // A DomainEntry is a combination of the domain name (as a StringPiece, so
+ // that we can reference external memory without copying), and two bits of
+ // information, if it's an exception and/or wildcard entry. Note: we don't
+ // consider the attributes when doing comparisons, so as far as any data
+ // structures our concerned (ex our set), two DomainEntry's are equal as long
+ // as their StringPiece (the domain) is equal. This is the behavior we want.
+ class DomainEntry : public StringPiece {
+ public:
+ struct DomainEntryAttributes {
+ DomainEntryAttributes() : exception(false), wildcard(false) { }
+ ~DomainEntryAttributes() { }
+
+ void Combine(const DomainEntryAttributes& other) {
+ if (other.exception) exception = true;
+ if (other.wildcard) wildcard = true;
+ }
+
+ bool exception;
+ bool wildcard;
+ };
+
+ DomainEntry() : StringPiece() { }
+ DomainEntry(const char* ptr, size_type size) : StringPiece(ptr, size) { }
+ ~DomainEntry() { }
+
+ // We override StringPiece's operator < to make it more efficent, since we
+ // don't care that it's sorted lexigraphically and we want to ignore the
+ // attributes when we are doing the comparisons.
+ bool operator<(const DomainEntry& other) const {
+ // If we are the same size, call up to StringPiece's real less than.
+ if (size() == other.size())
+ return *static_cast<const StringPiece*>(this) < other;
+ // Consider ourselves less if we are smaller
+ return size() < other.size();
}
- const char* data() const { return data_; }
-
- // This comparator is needed by std::map. Note that since we don't care
- // about the exact sorting, we use a somewhat less intuitive, but efficient,
- // comparison.
- bool operator<(const StringSegment& other) const;
-
- private:
- const char* data_;
- size_t begin_;
- size_t len_;
+ DomainEntryAttributes attributes;
};
- // The full domain rule data, loaded from a resource or set by a unit test.
- std::string domain_data_;
-
- // An entry in the map of domain specifications, describing the properties
+ // An entry in the set of domain specifications, describing the properties
// that apply to that domain rule.
- struct DomainEntry {
- DomainEntry() : exception(false), wildcard(false) { }
- bool exception;
- bool wildcard;
- };
- typedef std::map<StringSegment, DomainEntry> DomainMap;
-
- // A map from a StringSegment holding a domain name (rule) to its DomainEntry.
- // The StringSegments in the domain_map_ hold pointers to the domain_data_
- // data; that's cheaper than copying the string data itself.
- // TODO(pamg): Since all the domain_map_ entries have the same data_, it's
- // redundant. Is it worth subclassing StringSegment to avoid that?
- DomainMap domain_map_;
+ typedef std::set<DomainEntry> DomainSet;
- // Parses a list of effective-TLD rules, building the domain_map_. Rules are
- // assumed to be syntactically valid.
- void ParseDomainData();
+ // Parses a list of effective-TLD rules, building the domain_set_. Rules are
+ // assumed to be syntactically valid. We operate on a StringPiece. If we
+ // were populated from an embedded resource, we will reference the embedded
+ // resource directly. If we were populated through UseDomainData, then our
+ // StringPiece will reference our local copy in copied_domain_data_.
+ void ParseDomainData(const StringPiece& data);
// Returns the singleton instance, after attempting to initialize it.
// NOTE that if the effective-TLD data resource can't be found, the instance
- // will be initialized and continue operation with an empty domain_map_.
+ // will be initialized and continue operation with simple default TLD data.
static RegistryControlledDomainService* GetInstance();
- // Loads and parses the effective-TLD data resource.
- void Init();
-
- // Adds one rule, assumed to be valid, to the domain_map_.
- // WARNING: As implied by the non-const status of the incoming rule, this
- // method may MODIFY that rule (in particular, change its start and length).
- // This is a performance optimization.
- void AddRule(StringSegment* rule);
+ // Adds one rule, assumed to be valid, to the domain_set_.
+ void AddRule(const StringPiece& rule_str);
// Internal workings of the static public methods. See above.
static std::string GetDomainAndRegistryImpl(const std::string& host);
size_t GetRegistryLengthImpl(const std::string& host,
bool allow_unknown_registries);
+ // A set of our DomainEntry's.
+ DomainSet domain_set_;
+
+ // An optional copy of the full domain rule data. If we're loaded from a
+ // resource, then we just reference the resource directly without copying,
+ // and copied_domain_data_ is not used. If we are populated through
+ // UseDomainData() then we copy that data here and reference it.
+ std::string copied_domain_data_;
+
DISALLOW_COPY_AND_ASSIGN(RegistryControlledDomainService);
};