diff options
author | deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-11-25 02:17:17 +0000 |
---|---|---|
committer | deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-11-25 02:17:17 +0000 |
commit | 8c9526605a46e6b4375b43a824d03ec4876573e0 (patch) | |
tree | 9c807faafb005332655611c6633655776e6dc3e4 | |
parent | 329554ffb0aee15099fb09b40be4868633d9ca37 (diff) | |
download | chromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.zip chromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.tar.gz chromium_src-8c9526605a46e6b4375b43a824d03ec4876573e0.tar.bz2 |
Use StringPiece in the TLD service. This means we don't have to copy the TLD data from the resource into a std::string, we can just always refer into the resource data. This saves ~40k or something meaningless.
Also use StringPiece to replace StringSegment, and other minor cleanup.
Review URL: http://codereview.chromium.org/11310
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@5960 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | net/base/registry_controlled_domain.cc | 106 | ||||
-rw-r--r-- | net/base/registry_controlled_domain.h | 135 |
2 files changed, 103 insertions, 138 deletions
diff --git a/net/base/registry_controlled_domain.cc b/net/base/registry_controlled_domain.cc index f98a3f3..be485ac7 100644 --- a/net/base/registry_controlled_domain.cc +++ b/net/base/registry_controlled_domain.cc @@ -168,6 +168,7 @@ std::string RegistryControlledDomainService::GetDomainAndRegistryImpl( return std::string(); // No registry. // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding // subcomponent length. + DCHECK(host.length() >= 2); if (registry_length > (host.length() - 2)) { NOTREACHED() << "Host does not have at least one subcomponent before registry!"; @@ -206,27 +207,25 @@ size_t RegistryControlledDomainService::GetRegistryLengthImpl( // Walk up the domain tree, most specific to least specific, // looking for matches at each level. - StringSegment match; size_t prev_start = std::string::npos; size_t curr_start = host_check_begin; size_t next_dot = host.find('.', curr_start); if (next_dot >= host_check_len) // Catches std::string::npos as well. return 0; // This can't have a registry + domain. while (1) { - match.Set(host.data(), curr_start, host_check_len - curr_start); - DomainMap::iterator iter = domain_map_.find(match); - if (iter != domain_map_.end()) { - DomainEntry entry = iter->second; + DomainSet::iterator iter = domain_set_.find( + DomainEntry(host.data() + curr_start, host_check_len - curr_start)); + if (iter != domain_set_.end()) { // Exception rules override wildcard rules when the domain is an exact // match, but wildcards take precedence when there's a subdomain. - if (entry.wildcard && (prev_start != std::string::npos)) { + if (iter->attributes.wildcard && (prev_start != std::string::npos)) { // If prev_start == host_check_begin, then the host is the registry // itself, so return 0. return (prev_start == host_check_begin) ? 0 : (host.length() - prev_start); } - if (entry.exception) { + if (iter->attributes.exception) { if (next_dot == std::string::npos) { // If we get here, we had an exception rule with no dots (e.g. // "!foo"). This would only be valid if we had a corresponding @@ -268,95 +267,66 @@ RegistryControlledDomainService* RegistryControlledDomainService::SetInstance( return old_instance; } -struct RegistryControlledDomainServiceSingletonTraits : - public DefaultSingletonTraits<RegistryControlledDomainService> { - static RegistryControlledDomainService* New() { - RegistryControlledDomainService* instance = - new RegistryControlledDomainService(); - instance->Init(); - return instance; - } -}; - // static RegistryControlledDomainService* RegistryControlledDomainService::GetInstance() { if (test_instance_) return test_instance_; - return Singleton<RegistryControlledDomainService, - RegistryControlledDomainServiceSingletonTraits>::get(); + return Singleton<RegistryControlledDomainService>::get(); } // static void RegistryControlledDomainService::UseDomainData(const std::string& data) { RegistryControlledDomainService* instance = GetInstance(); - instance->domain_data_ = data; - instance->ParseDomainData(); + instance->copied_domain_data_ = data; + instance->ParseDomainData(instance->copied_domain_data_); } void RegistryControlledDomainService::Init() { - domain_data_ = NetModule::GetResource(IDR_EFFECTIVE_TLD_NAMES).as_string(); - if (domain_data_.empty()) { - // The resource file isn't present for some unit tests, for example. Fall - // back to a tiny, basic list of rules in that case. - domain_data_ = kDefaultDomainData; - } - ParseDomainData(); + // The resource file isn't present for some unit tests, for example. Fall + // back to a tiny, basic list of rules in that case. + StringPiece res_data = NetModule::GetResource(IDR_EFFECTIVE_TLD_NAMES); + ParseDomainData(!res_data.empty() ? res_data : kDefaultDomainData); } -void RegistryControlledDomainService::ParseDomainData() { - domain_map_.clear(); +void RegistryControlledDomainService::ParseDomainData(const StringPiece& data) { + domain_set_.clear(); - StringSegment rule; size_t line_end = 0; size_t line_start = 0; - while (line_start < domain_data_.size()) { - line_end = domain_data_.find('\n', line_start); - if (line_end == std::string::npos) - line_end = domain_data_.size(); - rule.Set(domain_data_.data(), line_start, line_end - line_start); - AddRule(&rule); + while (line_start < data.size()) { + line_end = data.find('\n', line_start); + if (line_end == StringPiece::npos) + line_end = data.size(); + AddRule(StringPiece(data.data() + line_start, line_end - line_start)); line_start = line_end + 1; } } -void RegistryControlledDomainService::AddRule(StringSegment* rule) { - // Determine rule properties. - size_t property_offset = 0; - bool exception = false; - bool wild = false; +void RegistryControlledDomainService::AddRule(const StringPiece& rule_str) { + DomainEntry rule(rule_str.data(), rule_str.size()); // Valid rules may be either wild or exceptions, but not both. - if (rule->CharAt(0) == '!') { - exception = true; - property_offset = 1; - } else if (rule->CharAt(0) == '*' && rule->CharAt(1) == '.') { - wild = true; - property_offset = 2; + if (rule.starts_with("!")) { + rule.remove_prefix(1); + rule.attributes.exception = true; + } else if (rule.starts_with("*.")) { + rule.remove_prefix(2); + rule.attributes.wildcard = true; } - // Find or create an entry for this host. - rule->TrimFromStart(property_offset); - DomainEntry entry; - DomainMap::iterator iter = domain_map_.find(*rule); - if (iter != domain_map_.end()) - entry = iter->second; - - entry.exception |= exception; - entry.wildcard |= wild; - domain_map_[*rule] = entry; -} - -bool RegistryControlledDomainService::StringSegment::operator<( - const StringSegment &other) const { - // If the segments are of equal length, compare their contents; otherwise, - // the shorter segment is "less than" the longer one. - if (len_ == other.len_) { - int comparison = strncmp(data_ + begin_, other.data_ + other.begin_, len_); - return (comparison < 0); + DomainSet::iterator prev_rule = domain_set_.find(rule); + if (prev_rule != domain_set_.end()) { + // We found a rule with the same domain, combine the attributes. + // This could happen for example when a domain is both a wildcard + // and an exception (ex *.google.com and !google.com). Sets are immutable, + // we'll erase the old one, and insert a new one with the new attributes. + rule.attributes.Combine(prev_rule->attributes); + domain_set_.erase(prev_rule); } - return (len_ < other.len_); + + domain_set_.insert(rule); } } // namespace net diff --git a/net/base/registry_controlled_domain.h b/net/base/registry_controlled_domain.h index b5ba97f6..60c9f6a 100644 --- a/net/base/registry_controlled_domain.h +++ b/net/base/registry_controlled_domain.h @@ -110,13 +110,17 @@ #ifndef NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_ #define NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_ -#include <map> +#include <set> #include <string> #include "base/basictypes.h" +#include "base/string_piece.h" class GURL; +template <typename T> +struct DefaultSingletonTraits; + namespace net { struct RegistryControlledDomainServiceSingletonTraits; @@ -197,7 +201,7 @@ class RegistryControlledDomainService { protected: // The entire protected API is only for unit testing. I mean it. Don't make // me come over there! - RegistryControlledDomainService() { } + RegistryControlledDomainService() { Init(); } // Set the RegistryControledDomainService instance to be used internally. // |instance| will supersede the singleton instance normally used. If @@ -207,97 +211,88 @@ class RegistryControlledDomainService { static RegistryControlledDomainService* SetInstance( RegistryControlledDomainService* instance); - // Sets the domain_data_ of the current instance (creating one, if necessary), - // then parses it. + // Sets the copied_domain_data_ of the current instance (creating one, + // if necessary), then parses it. static void UseDomainData(const std::string& data); private: // To allow construction of the internal singleton instance. - friend struct RegistryControlledDomainServiceSingletonTraits; - - // Using the StringSegment class, we can compare portions of strings without - // needing to allocate or copy them. - class StringSegment { - public: - StringSegment() : data_(0), begin_(0), len_(0) { } - ~StringSegment() { } - - void Set(const char* data, size_t begin, size_t len) { - data_ = data; - begin_ = begin; - len_ = len; - } + friend struct DefaultSingletonTraits<RegistryControlledDomainService>; - // Returns the character at the given offset from the start of the segment, - // or '\0' if the offset lies outside the segment. - char CharAt(size_t offset) const { - return (offset < len_) ? data_[begin_ + offset] : '\0'; - } + void Init(); - // Removes a maximum of |trimmed| number of characters, up to the length of - // the segment, from the start of the StringSegment. - void TrimFromStart(size_t trimmed) { - if (trimmed > len_) - trimmed = len_; - begin_ += trimmed; - len_ -= trimmed; + // A DomainEntry is a combination of the domain name (as a StringPiece, so + // that we can reference external memory without copying), and two bits of + // information, if it's an exception and/or wildcard entry. Note: we don't + // consider the attributes when doing comparisons, so as far as any data + // structures our concerned (ex our set), two DomainEntry's are equal as long + // as their StringPiece (the domain) is equal. This is the behavior we want. + class DomainEntry : public StringPiece { + public: + struct DomainEntryAttributes { + DomainEntryAttributes() : exception(false), wildcard(false) { } + ~DomainEntryAttributes() { } + + void Combine(const DomainEntryAttributes& other) { + if (other.exception) exception = true; + if (other.wildcard) wildcard = true; + } + + bool exception; + bool wildcard; + }; + + DomainEntry() : StringPiece() { } + DomainEntry(const char* ptr, size_type size) : StringPiece(ptr, size) { } + ~DomainEntry() { } + + // We override StringPiece's operator < to make it more efficent, since we + // don't care that it's sorted lexigraphically and we want to ignore the + // attributes when we are doing the comparisons. + bool operator<(const DomainEntry& other) const { + // If we are the same size, call up to StringPiece's real less than. + if (size() == other.size()) + return *static_cast<const StringPiece*>(this) < other; + // Consider ourselves less if we are smaller + return size() < other.size(); } - const char* data() const { return data_; } - - // This comparator is needed by std::map. Note that since we don't care - // about the exact sorting, we use a somewhat less intuitive, but efficient, - // comparison. - bool operator<(const StringSegment& other) const; - - private: - const char* data_; - size_t begin_; - size_t len_; + DomainEntryAttributes attributes; }; - // The full domain rule data, loaded from a resource or set by a unit test. - std::string domain_data_; - - // An entry in the map of domain specifications, describing the properties + // An entry in the set of domain specifications, describing the properties // that apply to that domain rule. - struct DomainEntry { - DomainEntry() : exception(false), wildcard(false) { } - bool exception; - bool wildcard; - }; - typedef std::map<StringSegment, DomainEntry> DomainMap; - - // A map from a StringSegment holding a domain name (rule) to its DomainEntry. - // The StringSegments in the domain_map_ hold pointers to the domain_data_ - // data; that's cheaper than copying the string data itself. - // TODO(pamg): Since all the domain_map_ entries have the same data_, it's - // redundant. Is it worth subclassing StringSegment to avoid that? - DomainMap domain_map_; + typedef std::set<DomainEntry> DomainSet; - // Parses a list of effective-TLD rules, building the domain_map_. Rules are - // assumed to be syntactically valid. - void ParseDomainData(); + // Parses a list of effective-TLD rules, building the domain_set_. Rules are + // assumed to be syntactically valid. We operate on a StringPiece. If we + // were populated from an embedded resource, we will reference the embedded + // resource directly. If we were populated through UseDomainData, then our + // StringPiece will reference our local copy in copied_domain_data_. + void ParseDomainData(const StringPiece& data); // Returns the singleton instance, after attempting to initialize it. // NOTE that if the effective-TLD data resource can't be found, the instance - // will be initialized and continue operation with an empty domain_map_. + // will be initialized and continue operation with simple default TLD data. static RegistryControlledDomainService* GetInstance(); - // Loads and parses the effective-TLD data resource. - void Init(); - - // Adds one rule, assumed to be valid, to the domain_map_. - // WARNING: As implied by the non-const status of the incoming rule, this - // method may MODIFY that rule (in particular, change its start and length). - // This is a performance optimization. - void AddRule(StringSegment* rule); + // Adds one rule, assumed to be valid, to the domain_set_. + void AddRule(const StringPiece& rule_str); // Internal workings of the static public methods. See above. static std::string GetDomainAndRegistryImpl(const std::string& host); size_t GetRegistryLengthImpl(const std::string& host, bool allow_unknown_registries); + // A set of our DomainEntry's. + DomainSet domain_set_; + + // An optional copy of the full domain rule data. If we're loaded from a + // resource, then we just reference the resource directly without copying, + // and copied_domain_data_ is not used. If we are populated through + // UseDomainData() then we copy that data here and reference it. + std::string copied_domain_data_; + DISALLOW_COPY_AND_ASSIGN(RegistryControlledDomainService); }; |