diff options
author | rouslan@chromium.org <rouslan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-03-15 00:56:24 +0000 |
---|---|---|
committer | rouslan@chromium.org <rouslan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-03-15 00:56:24 +0000 |
commit | 07949ab6ce355f0ce0e1e238326d695272191cdc (patch) | |
tree | 85a0b8c223b04be48724b5b6ab3472513b5c3e94 /third_party/libaddressinput | |
parent | fbcf7ab20679679f9c4f589a34df250a01f12cdf (diff) | |
download | chromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.zip chromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.tar.gz chromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.tar.bz2 |
[rac] Improve libaddressinput suggestion performance with tries.
The patch changes suggestion algorithm from a for loop that compared
prefixes to a trie. The performance impact on a debug build:
Old (for loop that compared prefixes):
- Build rulesets = 200 ms
- Each character typed = 500 ms
New (trie):
- Build rulesets = 200 ms
- First character typed = 220 ms (200 ms due to building trie)
- Each subsequent character typed = 20 ms
The performance for each character improves significantly. Building
rulesets and the trie still have room for optimization.
BUG=347383
Review URL: https://codereview.chromium.org/183793003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@257277 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/libaddressinput')
15 files changed, 903 insertions, 220 deletions
diff --git a/third_party/libaddressinput/chromium/canonicalize_string.cc b/third_party/libaddressinput/chromium/canonicalize_string.cc new file mode 100644 index 0000000..b6d9928 --- /dev/null +++ b/third_party/libaddressinput/chromium/canonicalize_string.cc @@ -0,0 +1,63 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "cpp/src/util/canonicalize_string.h" + +#include "base/logging.h" +#include "cpp/include/libaddressinput/util/scoped_ptr.h" +#include "third_party/icu/source/i18n/unicode/coll.h" + +namespace i18n { +namespace addressinput { + +namespace { + +class ChromeStringCanonicalizer : public StringCanonicalizer { + public: + ChromeStringCanonicalizer() + : error_code_(U_ZERO_ERROR), + collator_(icu::Collator::createInstance(error_code_)) { + collator_->setStrength(icu::Collator::PRIMARY); + DCHECK(U_SUCCESS(error_code_)); + } + + virtual ~ChromeStringCanonicalizer() {} + + // StringCanonicalizer implementation. + virtual std::string CanonicalizeString(const std::string& original) { + // Returns a canonical version of the string that can be used for comparing + // strings regardless of diacritics and capitalization. + // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); + // CanonicalizeString("Texas") == CanonicalizeString("teXas"); + // CanonicalizeString("Texas") != CanonicalizeString("California"); + // + // The output is not human-readable. + // CanonicalizeString("Texas") != "Texas"; + icu::UnicodeString icu_str( + original.c_str(), static_cast<int32_t>(original.length())); + int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); + scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); + DCHECK(buffer.get()); + int32_t filled_size = + collator_->getSortKey(icu_str, buffer.get(), buffer_size); + DCHECK_EQ(buffer_size, filled_size); + return std::string(reinterpret_cast<const char*>(buffer.get())); + } + + private: + UErrorCode error_code_; + scoped_ptr<icu::Collator> collator_; + + DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer); +}; + +} // namespace + +// static +scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() { + return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer); +} + +} // namespace addressinput +} // namespace i18n diff --git a/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp b/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp index bde2537..6215898 100644 --- a/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp +++ b/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp @@ -43,10 +43,12 @@ 'src/rule.cc', 'src/ruleset.cc', 'src/time_to_string.cc', + 'src/util/canonicalize_string.cc', 'src/util/json.cc', 'src/util/md5.cc', 'src/util/string_compare.cc', 'src/util/string_split.cc', + 'src/util/trie.cc', ], 'defines': [ 'VALIDATION_DATA_URL="https://i18napis.appspot.com/ssl-aggregate-address/"', @@ -80,6 +82,7 @@ 'test/util/scoped_ptr_unittest.cc', 'test/util/stl_util_unittest.cc', 'test/util/string_split_unittest.cc', + 'test/util/trie_test.cc', ], 'defines': [ 'TEST_DATA_DIR="../testdata"', diff --git a/third_party/libaddressinput/chromium/cpp/src/address_validator.cc b/third_party/libaddressinput/chromium/cpp/src/address_validator.cc index 5ca32b1..39df55b 100644 --- a/third_party/libaddressinput/chromium/cpp/src/address_validator.cc +++ b/third_party/libaddressinput/chromium/cpp/src/address_validator.cc @@ -22,6 +22,7 @@ #include <libaddressinput/util/scoped_ptr.h> #include <algorithm> +#include <bitset> #include <cassert> #include <cstddef> #include <map> @@ -46,16 +47,17 @@ namespace addressinput { namespace { -// A bit map of MatchingRuleField values. -typedef int MatchingRuleFields; +// A type to store a list of pointers to Ruleset objects. +typedef std::set<const Ruleset*> Rulesets; -// The field in the rule that matches a user input. Can be used in a bitmask. -enum MatchingRuleField { - NONE = 0, - KEY = 1, - NAME = 2, - LATIN_NAME = 4 -}; +// A type to map the field in a rule to rulesets. +typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets; + +// A type to map the field in an address to rulesets. +typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets; + +// A set of Rule::IdentityField values that match user input. +typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields; // Returns true if |prefix_regex| matches a prefix of |value|. For example, // "(90|81)" matches a prefix of "90291". @@ -64,171 +66,6 @@ bool ValueMatchesPrefixRegex(const std::string& value, return RE2::FullMatch(value, "^(" + prefix_regex + ").*"); } -// Returns true if the first |length| characters of |a| are loosely equal to the -// first |length| characters of |b|. -bool LoosePrefixCompare(const std::string& a, - const std::string& b, - size_t length) { - return LooseStringCompare(a.substr(0, length), b.substr(0, length)); -} - -// Returns true if the fields of |address| are empty strings between the |begin| -// field exclusively and the |end| field inclusively. -bool AddressFieldsEmpty(const AddressData& address, - AddressField begin, - AddressField end) { - for (int field = begin + 1; field <= end; ++field) { - if (!address.GetFieldValue(static_cast<AddressField>(field)).empty()) { - return false; - } - } - return true; -} - -// Finds the most accurate rulesets that match |user_input| and adds them to -// |suggestions| as address data structures. -// -// Converts the rulesets into data structures by copying either the ruleset key, -// name, or latinized name into the field of the data structure. Traverses the -// tree of rulesets up to its root to capture the most complete suggestion. For -// example, converts the ruleset for "data/CN/北京市/昌平区" into (latinized) -// "Changping Qu, Beijing Shi, CN" address. -// -// Note that both "data/US" and "data/US/CA" rulesets match a "California, US, -// 90291" user input, but only "data/US/CA" ends up in |suggestions|, because -// it's more accurate than "data/US". -// -// Considers only |parent_matching_rule_fields| when comparing rules to user -// input values. For example, if |parent_matching_rule_fields| is KEY | NAME, -// then the function ignores the latinized name for the ruleset. -// -// If the number of |suggestions| is over the |suggestions_limit|, then clears -// |suggestions| and returns false. -// -// The |suggestions| parameter should not be NULL. -bool FindSuggestions(const AddressData& user_input, - AddressField focused_field, - size_t suggestions_limit, - const Ruleset& ruleset, - MatchingRuleFields parent_matching_rule_fields, - std::vector<AddressData>* suggestions) { - assert(suggestions != NULL); - - // Compare the postal code of |user_input| with the |ruleset|. - const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code); - if (!rule.GetPostalCodeFormat().empty() && - !user_input.postal_code.empty() && - !ValueMatchesPrefixRegex( - user_input.postal_code, rule.GetPostalCodeFormat())) { - return true; - } - - // Compare the value of |ruleset->field()| in |user input| with the key, - // name, and latinized name for |ruleset|. - const std::string& input = user_input.GetFieldValue(ruleset.field()); - MatchingRuleFields matching_rule_fields = NONE; - if (input.empty() || ruleset.field() == COUNTRY) { - matching_rule_fields = parent_matching_rule_fields; - } else { - if ((parent_matching_rule_fields & KEY) && - LoosePrefixCompare(input, rule.GetKey(), input.length())) { - matching_rule_fields |= KEY; - } - if ((parent_matching_rule_fields & NAME) && - LoosePrefixCompare(input, rule.GetName(), input.length())) { - matching_rule_fields |= NAME; - } - if ((parent_matching_rule_fields & LATIN_NAME) && - LoosePrefixCompare(input, rule.GetLatinName(), input.length())) { - matching_rule_fields |= LATIN_NAME; - } - } - - if (matching_rule_fields == NONE) { - return true; - } - - // Add a suggestion based on |ruleset|. - if (ruleset.field() == DEPENDENT_LOCALITY || - ruleset.field() == focused_field || - (ruleset.GetSubRegionRulesets().empty() && - AddressFieldsEmpty(user_input, ruleset.field(), DEPENDENT_LOCALITY))) { - if (suggestions->size() >= suggestions_limit) { - suggestions->clear(); - return false; - } - - // If the user's language is not one of the supported languages of a country - // that has latinized names for its regions, then prefer to suggest the - // latinized region names. If the user types in local script instead, then - // the local script names will be suggested. - bool prefer_latin_name = - !rule.GetLanguage().empty() && - rule.GetLanguage() != user_input.language_code && - !rule.GetLatinName().empty(); - - MatchingRuleField rule_field = NONE; - if (prefer_latin_name && (matching_rule_fields & LATIN_NAME)) { - rule_field = LATIN_NAME; - } else if (matching_rule_fields & KEY) { - rule_field = KEY; - } else if (matching_rule_fields & NAME) { - rule_field = NAME; - } else if (matching_rule_fields & LATIN_NAME) { - rule_field = LATIN_NAME; - } else { - assert(false); - } - - AddressData suggestion; - suggestion.postal_code = user_input.postal_code; - - // Traverse the tree of rulesets from the most specific |ruleset| to the - // country-wide "root" of the tree. Use the region names found at each of - // the levels of the ruleset tree to build the |suggestion|. - for (const Ruleset* suggestion_ruleset = &ruleset; - suggestion_ruleset->parent() != NULL; - suggestion_ruleset = suggestion_ruleset->parent()) { - const Rule& suggestion_rule = - suggestion_ruleset->GetLanguageCodeRule(user_input.language_code); - switch (rule_field) { - case KEY: - suggestion.SetFieldValue( - suggestion_ruleset->field(), suggestion_rule.GetKey()); - break; - case NAME: - suggestion.SetFieldValue( - suggestion_ruleset->field(), suggestion_rule.GetName()); - break; - case LATIN_NAME: - suggestion.SetFieldValue( - suggestion_ruleset->field(), suggestion_rule.GetLatinName()); - break; - default: - assert(false); - break; - } - } - - suggestions->push_back(suggestion); - return true; - } - - for (std::map<std::string, Ruleset*>::const_iterator - subregion_ruleset_it = ruleset.GetSubRegionRulesets().begin(); - subregion_ruleset_it != ruleset.GetSubRegionRulesets().end(); - ++subregion_ruleset_it) { - assert(subregion_ruleset_it->second != NULL); - if (!FindSuggestions(user_input, focused_field, suggestions_limit, - *subregion_ruleset_it->second, matching_rule_fields, - suggestions)) { - return false; - } - } - - return true; -} - // Returns true if the filter is empty (all problems allowed) or contains the // |field|->|problem| mapping (explicitly allowed). bool FilterAllows(const AddressProblemFilter& filter, @@ -260,6 +97,37 @@ bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) { return true; } +// Collects rulesets based on whether they have a parent in the given list. +class ParentedRulesetCollector { + public: + // Retains a reference to both of the parameters. Does not make a copy of + // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The + // |rulesets_with_parents| parameter should not be NULL. + ParentedRulesetCollector(const Rulesets& parent_rulesets, + Rulesets* rulesets_with_parents) + : parent_rulesets_(parent_rulesets), + rulesets_with_parents_(rulesets_with_parents) { + assert(rulesets_with_parents_ != NULL); + } + + ~ParentedRulesetCollector() {} + + // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the + // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test| + // parameter should not be NULL. + void operator()(const Ruleset* ruleset_to_test) { + assert(ruleset_to_test != NULL); + if (parent_rulesets_.find(ruleset_to_test->parent()) != + parent_rulesets_.end()) { + rulesets_with_parents_->insert(ruleset_to_test); + } + } + + private: + const Rulesets& parent_rulesets_; + Rulesets* rulesets_with_parents_; +}; + // Validates AddressData structure. class AddressValidatorImpl : public AddressValidator { public: @@ -297,7 +165,7 @@ class AddressValidatorImpl : public AddressValidator { const AddressData& address, const AddressProblemFilter& filter, AddressProblems* problems) const { - std::map<std::string, const Ruleset*>::const_iterator ruleset_it = + std::map<std::string, Ruleset*>::const_iterator ruleset_it = rules_.find(address.country_code); // We can still validate the required fields even if the full ruleset isn't @@ -386,7 +254,7 @@ class AddressValidatorImpl : public AddressValidator { AddressField focused_field, size_t suggestions_limit, std::vector<AddressData>* suggestions) const { - std::map<std::string, const Ruleset*>::const_iterator ruleset_it = + std::map<std::string, Ruleset*>::const_iterator ruleset_it = rules_.find(user_input.country_code); if (ruleset_it == rules_.end()) { @@ -401,16 +269,154 @@ class AddressValidatorImpl : public AddressValidator { } suggestions->clear(); - const Ruleset* country_ruleset = ruleset_it->second; - assert(country_ruleset != NULL); + assert(ruleset_it->second != NULL); + + // Initialize the prefix search index lazily. + if (!ruleset_it->second->prefix_search_index_ready()) { + ruleset_it->second->BuildPrefixSearchIndex(); + } + + const Ruleset& country_ruleset = *ruleset_it->second; + const Rule& country_rule = + country_ruleset.GetLanguageCodeRule(user_input.language_code); + + // Do not suggest anything if the user is typing the postal code that is not + // valid for the country. + if (!user_input.postal_code.empty() && + focused_field == POSTAL_CODE && + !country_rule.GetPostalCodeFormat().empty() && + !ValueMatchesPrefixRegex( + user_input.postal_code, country_rule.GetPostalCodeFormat())) { + return SUCCESS; + } + + // Determine the most specific address field that can be suggested. + AddressField suggestion_field = focused_field != POSTAL_CODE + ? focused_field : DEPENDENT_LOCALITY; + if (suggestion_field > country_ruleset.deepest_ruleset_level()) { + suggestion_field = country_ruleset.deepest_ruleset_level(); + } + if (focused_field != POSTAL_CODE) { + while (user_input.GetFieldValue(suggestion_field).empty() && + suggestion_field > ADMIN_AREA) { + suggestion_field = static_cast<AddressField>(suggestion_field - 1); + } + } + + // Find all rulesets that match user input. + AddressFieldRulesets rulesets; + for (int i = ADMIN_AREA; i <= suggestion_field; ++i) { + for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) { + AddressField address_field = static_cast<AddressField>(i); + Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j); + + // Find all rulesets at |address_field| level whose |rule_field| starts + // with user input value. + country_ruleset.FindRulesetsByPrefix( + user_input.language_code, address_field, rule_field, + user_input.GetFieldValue(address_field), + &rulesets[address_field][rule_field]); + + // Filter out the rulesets whose parents do not match the user input. + if (address_field > ADMIN_AREA) { + AddressField parent_field = + static_cast<AddressField>(address_field - 1); + Rulesets rulesets_with_parents; + std::for_each( + rulesets[address_field][rule_field].begin(), + rulesets[address_field][rule_field].end(), + ParentedRulesetCollector(rulesets[parent_field][rule_field], + &rulesets_with_parents)); + rulesets[address_field][rule_field].swap(rulesets_with_parents); + } + } + } + + // Determine the fields in the rules that match the user input. This + // operation converts a map of Rule::IdentityField value -> Ruleset into a + // map of Ruleset -> Rule::IdentityField bitset. + std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets; + for (IdentityFieldRulesets::const_iterator rule_field_it = + rulesets[suggestion_field].begin(); + rule_field_it != rulesets[suggestion_field].end(); + ++rule_field_it) { + const Rule::IdentityField rule_identity_field = rule_field_it->first; + for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin(); + ruleset_it != rule_field_it->second.end(); + ++ruleset_it) { + suggestion_rulesets[*ruleset_it].set(rule_identity_field); + } + } + + // Generate suggestions based on the rulesets. Use a Rule::IdentityField + // from the bitset to generate address field values. + for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator + suggestion_it = suggestion_rulesets.begin(); + suggestion_it != suggestion_rulesets.end(); + ++suggestion_it) { + const Ruleset& ruleset = *suggestion_it->first; + const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code); + const MatchingRuleFields& matching_rule_fields = suggestion_it->second; + + // Do not suggest this region if the postal code in user input does not + // match it. + if (!user_input.postal_code.empty() && + !rule.GetPostalCodeFormat().empty() && + !ValueMatchesPrefixRegex( + user_input.postal_code, rule.GetPostalCodeFormat())) { + continue; + } + + // Do not add more suggestions than |suggestions_limit|. + if (suggestions->size() >= suggestions_limit) { + suggestions->clear(); + return SUCCESS; + } + + // If the user's language is not one of the supported languages of a + // country that has latinized names for its regions, then prefer to + // suggest the latinized region names. If the user types in local script + // instead, then the local script names will be suggested. + Rule::IdentityField rule_field = Rule::KEY; + if (!country_rule.GetLanguage().empty() && + country_rule.GetLanguage() != user_input.language_code && + !rule.GetLatinName().empty() && + matching_rule_fields.test(Rule::LATIN_NAME)) { + rule_field = Rule::LATIN_NAME; + } else if (matching_rule_fields.test(Rule::KEY)) { + rule_field = Rule::KEY; + } else if (matching_rule_fields.test(Rule::NAME)) { + rule_field = Rule::NAME; + } else if (matching_rule_fields.test(Rule::LATIN_NAME)) { + rule_field = Rule::LATIN_NAME; + } else { + assert(false); + } + + AddressData suggestion; + suggestion.postal_code = user_input.postal_code; + + // Traverse the tree of rulesets from the most specific |ruleset| to the + // country-wide "root" of the tree. Use the region names found at each of + // the levels of the ruleset tree to build the |suggestion|. + for (const Ruleset* suggestion_ruleset = &ruleset; + suggestion_ruleset->parent() != NULL; + suggestion_ruleset = suggestion_ruleset->parent()) { + const Rule& suggestion_rule = + suggestion_ruleset->GetLanguageCodeRule(user_input.language_code); + suggestion.SetFieldValue(suggestion_ruleset->field(), + suggestion_rule.GetIdentityField(rule_field)); + } + + suggestions->push_back(suggestion); + } - FindSuggestions(user_input, focused_field, suggestions_limit, - *country_ruleset, KEY | NAME | LATIN_NAME, suggestions); return SUCCESS; } + // AddressValidator implementation. virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const { - std::map<std::string, const Ruleset*>::const_iterator ruleset_it = + std::map<std::string, Ruleset*>::const_iterator ruleset_it = rules_.find(address_data->country_code); if (ruleset_it == rules_.end()) { return false; @@ -475,7 +481,7 @@ class AddressValidatorImpl : public AddressValidator { std::set<std::string> loading_rules_; // A mapping of a country code to the owned ruleset for that country code. - std::map<std::string, const Ruleset*> rules_; + std::map<std::string, Ruleset*> rules_; DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl); }; diff --git a/third_party/libaddressinput/chromium/cpp/src/rule.cc b/third_party/libaddressinput/chromium/cpp/src/rule.cc index 1604390..a94e44d 100644 --- a/third_party/libaddressinput/chromium/cpp/src/rule.cc +++ b/third_party/libaddressinput/chromium/cpp/src/rule.cc @@ -335,6 +335,20 @@ void Rule::ParseJsonRule(const Json& json_rule) { } } +const std::string& Rule::GetIdentityField(IdentityField identity_field) const { + switch (identity_field) { + case KEY: + return key_; + case NAME: + return name_; + case LATIN_NAME: + return latin_name_; + case IDENTITY_FIELDS_SIZE: + assert(false); + } + return key_; +} + int Rule::GetInvalidFieldMessageId(AddressField field) const { switch (field) { case ADMIN_AREA: diff --git a/third_party/libaddressinput/chromium/cpp/src/rule.h b/third_party/libaddressinput/chromium/cpp/src/rule.h index 407455b..80cc830 100644 --- a/third_party/libaddressinput/chromium/cpp/src/rule.h +++ b/third_party/libaddressinput/chromium/cpp/src/rule.h @@ -62,6 +62,14 @@ struct FormatElement { // } class Rule { public: + // The types of fields that describe the rule. + enum IdentityField { + KEY, + NAME, + LATIN_NAME, + IDENTITY_FIELDS_SIZE + }; + Rule(); ~Rule(); @@ -80,6 +88,11 @@ class Rule { // Parses |json_rule|, which must contain parsed serialized rule. void ParseJsonRule(const Json& json_rule); + // Returns the value of the |identity_field| for this rule, for example, can + // return "TX" or "Texas". The |identity_field| parameter should not be + // IDENTITY_FIELDS_SIZE. + const std::string& GetIdentityField(IdentityField identity_field) const; + // Returns the key for this rule. For example, can return "TX". const std::string& GetKey() const { return key_; } diff --git a/third_party/libaddressinput/chromium/cpp/src/ruleset.cc b/third_party/libaddressinput/chromium/cpp/src/ruleset.cc index 43823b4..201e407 100644 --- a/third_party/libaddressinput/chromium/cpp/src/ruleset.cc +++ b/third_party/libaddressinput/chromium/cpp/src/ruleset.cc @@ -20,17 +20,23 @@ #include <cassert> #include <cstddef> #include <map> +#include <set> #include <string> +#include <utility> #include "rule.h" +#include "util/canonicalize_string.h" #include "util/stl_util.h" namespace i18n { namespace addressinput { Ruleset::Ruleset(AddressField field, scoped_ptr<Rule> rule) - : parent_(NULL), + : tries_(), + canonicalizer_(), + parent_(NULL), field_(field), + deepest_ruleset_level_(field), rule_(rule.Pass()), sub_regions_(), language_codes_() { @@ -42,6 +48,33 @@ Ruleset::Ruleset(AddressField field, scoped_ptr<Rule> rule) Ruleset::~Ruleset() { STLDeleteValues(&sub_regions_); STLDeleteValues(&language_codes_); + + // Delete the maps and trie objects owned by |tries_| field. + for (LanguageCodeTries::const_iterator lang_it = tries_.begin(); + lang_it != tries_.end(); ++lang_it) { + AddressFieldTries* address_field_tries = lang_it->second; + assert(address_field_tries != NULL); + + for (AddressFieldTries::const_iterator address_field_it = + address_field_tries->begin(); + address_field_it != address_field_tries->end(); + ++address_field_it) { + IdentityFieldTries* identity_field_tries = address_field_it->second; + assert(identity_field_tries != NULL); + + for (IdentityFieldTries::const_iterator identity_field_it = + identity_field_tries->begin(); + identity_field_it != identity_field_tries->end(); + ++identity_field_it) { + // The tries do not own the ruleset objects. + Trie<const Ruleset*>* trie = identity_field_it->second; + assert(trie != NULL); + delete trie; + } + delete identity_field_tries; + } + delete address_field_tries; + } } void Ruleset::AddSubRegionRuleset(const std::string& sub_region, @@ -74,5 +107,153 @@ const Rule& Ruleset::GetLanguageCodeRule( return it != language_codes_.end() ? *it->second : *rule_; } +void Ruleset::BuildPrefixSearchIndex() { + assert(field_ == COUNTRY); + assert(tries_.empty()); + + // Default language tries. + tries_[""] = new AddressFieldTries; + + // Non-default language tries. + for (std::vector<std::string>::const_iterator lang_it = + rule_->GetLanguages().begin(); + lang_it != rule_->GetLanguages().end(); + ++lang_it) { + if (*lang_it != rule_->GetLanguage() && !lang_it->empty()) { + tries_[*lang_it] = new AddressFieldTries; + } + } + + for (LanguageCodeTries::const_iterator lang_it = tries_.begin(); + lang_it != tries_.end(); ++lang_it) { + AddressFieldTries* address_field_tries = lang_it->second; + address_field_tries->insert( + std::make_pair(ADMIN_AREA, new IdentityFieldTries)); + address_field_tries->insert( + std::make_pair(LOCALITY, new IdentityFieldTries)); + address_field_tries->insert( + std::make_pair(DEPENDENT_LOCALITY, new IdentityFieldTries)); + + for (AddressFieldTries::const_iterator address_field_it = + address_field_tries->begin(); + address_field_it != address_field_tries->end(); + ++address_field_it) { + IdentityFieldTries* identity_field_tries = address_field_it->second; + identity_field_tries->insert( + std::make_pair(Rule::KEY, new Trie<const Ruleset*>)); + identity_field_tries->insert( + std::make_pair(Rule::NAME, new Trie<const Ruleset*>)); + identity_field_tries->insert( + std::make_pair(Rule::LATIN_NAME, new Trie<const Ruleset*>)); + } + } + + canonicalizer_ = StringCanonicalizer::Build(); + AddSubRegionRulesetsToTrie(*this); +} + +void Ruleset::FindRulesetsByPrefix(const std::string& language_code, + AddressField ruleset_level, + Rule::IdentityField identity_field, + const std::string& prefix, + std::set<const Ruleset*>* result) const { + assert(field_ == COUNTRY); + assert(ruleset_level >= ADMIN_AREA); + assert(ruleset_level <= DEPENDENT_LOCALITY); + assert(result != NULL); + assert(canonicalizer_ != NULL); + + LanguageCodeTries::const_iterator lang_it = tries_.find(language_code); + AddressFieldTries* address_field_tries = lang_it != tries_.end() + ? lang_it->second : tries_.find("")->second; + assert(address_field_tries != NULL); + + AddressFieldTries::const_iterator address_field_it = + address_field_tries->find(ruleset_level); + assert(address_field_it != address_field_tries->end()); + + IdentityFieldTries* identity_field_tries = address_field_it->second; + assert(identity_field_tries != NULL); + + IdentityFieldTries::const_iterator identity_field_it = + identity_field_tries->find(identity_field); + assert(identity_field_it != identity_field_tries->end()); + + Trie<const Ruleset*>* trie = identity_field_it->second; + assert(trie != NULL); + + trie->FindDataForKeyPrefix( + canonicalizer_->CanonicalizeString(prefix), result); +} + +void Ruleset::AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset) { + assert(field_ == COUNTRY); + assert(canonicalizer_ != NULL); + + for (std::map<std::string, Ruleset*>::const_iterator sub_region_it = + parent_ruleset.sub_regions_.begin(); + sub_region_it != parent_ruleset.sub_regions_.end(); + ++sub_region_it) { + const Ruleset* ruleset = sub_region_it->second; + assert(ruleset != NULL); + + if (deepest_ruleset_level_ < ruleset->field()) { + deepest_ruleset_level_ = ruleset->field(); + } + + for (LanguageCodeTries::const_iterator lang_it = tries_.begin(); + lang_it != tries_.end(); ++lang_it) { + const std::string& language_code = lang_it->first; + const Rule& rule = ruleset->GetLanguageCodeRule(language_code); + + AddressFieldTries* address_field_tries = lang_it->second; + assert(address_field_tries != NULL); + + AddressFieldTries::const_iterator address_field_it = + address_field_tries->find(ruleset->field()); + assert(address_field_it != address_field_tries->end()); + + IdentityFieldTries* identity_field_tries = address_field_it->second; + assert(identity_field_tries != NULL); + + IdentityFieldTries::const_iterator identity_field_it = + identity_field_tries->find(Rule::KEY); + assert(identity_field_it != identity_field_tries->end()); + + Trie<const Ruleset*>* key_trie = identity_field_it->second; + assert(key_trie != NULL); + + identity_field_it = identity_field_tries->find(Rule::NAME); + assert(identity_field_it != identity_field_tries->end()); + + Trie<const Ruleset*>* name_trie = identity_field_it->second; + assert(name_trie != NULL); + + identity_field_it = identity_field_tries->find(Rule::LATIN_NAME); + assert(identity_field_it != identity_field_tries->end()); + + Trie<const Ruleset*>* latin_name_trie = identity_field_it->second; + assert(latin_name_trie != NULL); + + if (!rule.GetKey().empty()) { + key_trie->AddDataForKey( + canonicalizer_->CanonicalizeString(rule.GetKey()), ruleset); + } + + if (!rule.GetName().empty()) { + name_trie->AddDataForKey( + canonicalizer_->CanonicalizeString(rule.GetName()), ruleset); + } + + if (!rule.GetLatinName().empty()) { + latin_name_trie->AddDataForKey( + canonicalizer_->CanonicalizeString(rule.GetLatinName()), ruleset); + } + } + + AddSubRegionRulesetsToTrie(*ruleset); + } +} + } // namespace addressinput } // namespace i18n diff --git a/third_party/libaddressinput/chromium/cpp/src/ruleset.h b/third_party/libaddressinput/chromium/cpp/src/ruleset.h index 350a675..fd9e3d9 100644 --- a/third_party/libaddressinput/chromium/cpp/src/ruleset.h +++ b/third_party/libaddressinput/chromium/cpp/src/ruleset.h @@ -20,14 +20,16 @@ #include <libaddressinput/util/scoped_ptr.h> #include <map> +#include <set> #include <string> -#include <vector> + +#include "rule.h" +#include "util/canonicalize_string.h" +#include "util/trie.h" namespace i18n { namespace addressinput { -class Rule; - // A recursive data structure that stores a set of rules for a region. Can store // the rules for a country, its administrative areas, localities, and dependent // localities, in addition to the language-specific rules. @@ -93,13 +95,70 @@ class Ruleset { return sub_regions_; } + // Enables using FindRulesetsByPrefix() method. Should be called only once and + // on a COUNTRY level ruleset. + void BuildPrefixSearchIndex(); + + // Returns true if BuildPrefixSearchIndex() has been called. + bool prefix_search_index_ready() const { return !tries_.empty(); } + + // Returns the deepest possible ruleset level for this country. Must be called + // on a COUNTRY level ruleset. Must be called after BuildPrefixSearchIndex() + // has been called. + AddressField deepest_ruleset_level() const { return deepest_ruleset_level_; } + + // Finds all rulesets at |ruleset_level| where the rule for |language_code| + // has the |identity_field| that starts with |prefix|. Ignores diacritics and + // capitalization differences between the rule data and |prefix|. + // + // If there're no rules for |language_code| (or |language_code| is an empty + // string), then the default language code is used. + // + // Should be called only on a COUNTRY level ruleset. Should be called only + // after BuildPrefixSearchIndex() has been called. + // + // The |field| parameter should be only ADMIN_AREA, LOCALITY, or + // DEPENDENT_LOCALITY. The result parameter should not be NULL. + void FindRulesetsByPrefix(const std::string& language_code, + AddressField ruleset_level, + Rule::IdentityField identity_field, + const std::string& prefix, + std::set<const Ruleset*>* result) const; + private: + // The type that maps rule identity field to tries of rulesets. + typedef std::map<Rule::IdentityField, Trie<const Ruleset*>*> + IdentityFieldTries; + + // The type that maps address field to IdentityFieldTries. + typedef std::map<AddressField, IdentityFieldTries*> AddressFieldTries; + + // The type that maps language code to AddressFieldTries. + typedef std::map<std::string, AddressFieldTries*> LanguageCodeTries; + + // Adds all children of |parent_ruleset| into |tries_| of this ruleset. Should + // be called only on a COUNTRY level ruleset. + void AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset); + + // The tries to lookup rulesets by a prefix of key, name, or latin-name in a + // rule. Has data only in a COUNTRY level ruleset. Owns the map and trie + // objects. Does not own the ruleset objects. + LanguageCodeTries tries_; + + // Canonicalizes region keys, names, and latin names when building a trie. + scoped_ptr<StringCanonicalizer> canonicalizer_; + // The parent ruleset of this object. The parent ruleset owns this object. Ruleset* parent_; // The field of this ruleset. const AddressField field_; + // The deepest possible ruleset level for this country. Set in + // BuildPrefixSearchIndex() method and, therefore, meaningful only on a + // COUNTRY level ruleset. + AddressField deepest_ruleset_level_; + // The region-wide rule in the default language of the country. const scoped_ptr<const Rule> rule_; diff --git a/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc new file mode 100644 index 0000000..c311a60 --- /dev/null +++ b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc @@ -0,0 +1,47 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "canonicalize_string.h" + +#include <libaddressinput/util/scoped_ptr.h> + +#include <string> + +namespace i18n { +namespace addressinput { + +namespace { + +class SimpleStringCanonicalizer : public StringCanonicalizer { + public: + SimpleStringCanonicalizer() {} + + virtual ~SimpleStringCanonicalizer() {} + + // StringCanonicalizer implementation. + virtual std::string CanonicalizeString(const std::string& original) { + // The best we can do without ICU. + return original; + } +}; + +} // namespace + +// static +scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() { + return scoped_ptr<StringCanonicalizer>(new SimpleStringCanonicalizer); +} + +} // namespace addressinput +} // namespace i18n diff --git a/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h new file mode 100644 index 0000000..10af086 --- /dev/null +++ b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h @@ -0,0 +1,38 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_ +#define I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_ + +#include <libaddressinput/util/scoped_ptr.h> + +#include <string> + +namespace i18n { +namespace addressinput { + +class StringCanonicalizer { + public: + static scoped_ptr<StringCanonicalizer> Build(); + + virtual ~StringCanonicalizer() {} + + // Returns |original| string modified to enable loose comparisons and sorting. + virtual std::string CanonicalizeString(const std::string& original) = 0; +}; + +} // namespace addressinput +} // namespace i18n + +#endif // I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_ diff --git a/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc b/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc index 2e0691f..d5f6a18 100644 --- a/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc +++ b/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc @@ -14,12 +14,17 @@ #include "string_compare.h" +#include <libaddressinput/util/scoped_ptr.h> + +#include "canonicalize_string.h" + namespace i18n { namespace addressinput { bool LooseStringCompare(const std::string& a, const std::string& b) { - // The best we can do without ICU. - return a == b; + scoped_ptr<StringCanonicalizer> canonicalizer(StringCanonicalizer::Build()); + return canonicalizer->CanonicalizeString(a) == + canonicalizer->CanonicalizeString(b); } } // namespace addressinput diff --git a/third_party/libaddressinput/chromium/cpp/src/util/trie.cc b/third_party/libaddressinput/chromium/cpp/src/util/trie.cc new file mode 100644 index 0000000..07fb877 --- /dev/null +++ b/third_party/libaddressinput/chromium/cpp/src/util/trie.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "trie.h" + +#include <cassert> +#include <cstddef> +#include <map> +#include <queue> +#include <set> +#include <string> +#include <utility> + +#include "stl_util.h" + +namespace i18n { +namespace addressinput { + +template <typename T> +Trie<T>::Trie() {} + +template <typename T> +Trie<T>::~Trie() { + STLDeleteValues(&sub_nodes_); +} + +template <typename T> +void Trie<T>::AddDataForKey(const std::string& key, const T& data_item) { + Trie<T>* current_node = this; + for (size_t key_start = 0; key_start < key.length(); ++key_start) { + typename std::map<char, Trie<T>*>::const_iterator sub_node_it = + current_node->sub_nodes_.find(key[key_start]); + if (sub_node_it == current_node->sub_nodes_.end()) { + sub_node_it = current_node->sub_nodes_.insert( + std::make_pair(key[key_start], new Trie<T>)).first; + } + current_node = sub_node_it->second; + assert(current_node != NULL); + } + current_node->data_list_.push_back(data_item); +} + +template <typename T> +void Trie<T>::FindDataForKeyPrefix(const std::string& key_prefix, + std::set<T>* results) const { + assert(results != NULL); + + // Find the sub-trie for the key prefix. + const Trie<T>* current_node = this; + for (size_t key_prefix_start = 0; key_prefix_start < key_prefix.length(); + ++key_prefix_start) { + typename std::map<char, Trie<T>*>::const_iterator sub_node_it = + current_node->sub_nodes_.find(key_prefix[key_prefix_start]); + if (sub_node_it == current_node->sub_nodes_.end()) { + return; + } + current_node = sub_node_it->second; + assert(current_node != NULL); + } + + // Collect data from all sub-tries. + std::queue<const Trie<T>*> node_queue; + node_queue.push(current_node); + while (!node_queue.empty()) { + const Trie<T>* queue_front = node_queue.front(); + node_queue.pop(); + + results->insert( + queue_front->data_list_.begin(), queue_front->data_list_.end()); + + for (typename std::map<char, Trie<T>*>::const_iterator sub_node_it = + queue_front->sub_nodes_.begin(); + sub_node_it != queue_front->sub_nodes_.end(); + ++sub_node_it) { + node_queue.push(sub_node_it->second); + } + } +} + +// Separating template definitions and declarations requires defining all +// possible template parameters to avoid linking errors. +class Ruleset; +template class Trie<const Ruleset*>; +template class Trie<std::string>; + +} // namespace addressinput +} // namespace i18n diff --git a/third_party/libaddressinput/chromium/cpp/src/util/trie.h b/third_party/libaddressinput/chromium/cpp/src/util/trie.h new file mode 100644 index 0000000..6b1792b --- /dev/null +++ b/third_party/libaddressinput/chromium/cpp/src/util/trie.h @@ -0,0 +1,68 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_ADDRESSINPUT_UTIL_TRIE_H_ +#define I18N_ADDRESSINPUT_UTIL_TRIE_H_ + +#include <libaddressinput/util/basictypes.h> + +#include <list> +#include <map> +#include <set> +#include <string> + +namespace i18n { +namespace addressinput { + +// A prefix search tree. Can return all objects whose keys start with a prefix +// string. +// +// Maps keys to multiple objects. This property is useful when mapping region +// names to region objects. For example, there's a "St. Petersburg" in Florida, +// and there's a "St. Petersburg" in Russia. A lookup for "St. Petersburg" key +// should return two distinct objects. +template <typename T> +class Trie { + public: + Trie(); + + ~Trie(); + + // Adds a mapping from |key| to |data_item|. Can be called with the same |key| + // multiple times. + void AddDataForKey(const std::string& key, const T& data_item); + + // Adds all objects whose keys start with |key_prefix| to the |results| + // parameter. The |results| parameter should not be NULL. + void FindDataForKeyPrefix(const std::string& key_prefix, + std::set<T>* results) const; + + private: + // All objects for this node in the trie. This field is a collection to enable + // mapping the same key to multiple objects. + std::list<T> data_list_; + + // Trie sub nodes. The root trie stores the objects for the empty string key. + // The children of the root trie store the objects for the one-letter keys. + // The grand-children of the root trie store the objects for the two-letter + // keys, and so on. + std::map<char, Trie<T>*> sub_nodes_; + + DISALLOW_COPY_AND_ASSIGN(Trie); +}; + +} // namespace addressinput +} // namespace i18n + +#endif // I18N_ADDRESSINPUT_UTIL_TRIE_H_ diff --git a/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc b/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc new file mode 100644 index 0000000..0baa0b4 --- /dev/null +++ b/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc @@ -0,0 +1,114 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "util/trie.h" + +#include <set> +#include <string> + +#include <gtest/gtest.h> + +namespace i18n { +namespace addressinput { + +namespace { + +TEST(TrieTest, EmptyTrieHasNoData) { + Trie<std::string> trie; + std::set<std::string> result; + trie.FindDataForKeyPrefix("key", &result); + EXPECT_TRUE(result.empty()); +} + +TEST(TrieTest, CanGetDataByExactKey) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + std::set<std::string> result; + trie.FindDataForKeyPrefix("hello", &result); + std::set<std::string> expected; + expected.insert("world"); + EXPECT_EQ(expected, result); +} + +TEST(TrieTest, CanGetDataByPrefix) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + std::set<std::string> result; + trie.FindDataForKeyPrefix("he", &result); + std::set<std::string> expected; + expected.insert("world"); + EXPECT_EQ(expected, result); +} + +TEST(TrieTest, KeyTooLongNoData) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + std::set<std::string> result; + trie.FindDataForKeyPrefix("helloo", &result); + EXPECT_TRUE(result.empty()); +} + +TEST(TrieTest, CommonPrefixFindsMultipleData) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + trie.AddDataForKey("howdy", "buddy"); + trie.AddDataForKey("foo", "bar"); + std::set<std::string> results; + trie.FindDataForKeyPrefix("h", &results); + std::set<std::string> expected; + expected.insert("world"); + expected.insert("buddy"); + EXPECT_EQ(expected, results); +} + +TEST(TrieTest, KeyCanBePrefixOfOtherKey) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + trie.AddDataForKey("helloo", "woorld"); + trie.AddDataForKey("hella", "warld"); + std::set<std::string> results; + trie.FindDataForKeyPrefix("hello", &results); + std::set<std::string> expected; + expected.insert("world"); + expected.insert("woorld"); + EXPECT_EQ(expected, results); +} + +TEST(TrieTest, AllowMutlipleKeys) { + Trie<std::string> trie; + trie.AddDataForKey("hello", "world"); + trie.AddDataForKey("hello", "woorld"); + std::set<std::string> results; + trie.FindDataForKeyPrefix("hello", &results); + std::set<std::string> expected; + expected.insert("world"); + expected.insert("woorld"); + EXPECT_EQ(expected, results); +} + +TEST(TrieTest, CanFindVeryLongKey) { + Trie<std::string> trie; + static const char kVeryLongKey[] = "1234567890qwertyuioasdfghj"; + trie.AddDataForKey(kVeryLongKey, "world"); + std::set<std::string> result; + trie.FindDataForKeyPrefix(kVeryLongKey, &result); + std::set<std::string> expected; + expected.insert("world"); + EXPECT_EQ(expected, result); +} + +} // namespace + +} // namespace addressinput +} // namespace i18n diff --git a/third_party/libaddressinput/chromium/string_compare.cc b/third_party/libaddressinput/chromium/string_compare.cc deleted file mode 100644 index 2affd30..0000000 --- a/third_party/libaddressinput/chromium/string_compare.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "cpp/src/util/string_compare.h" - -#include "base/i18n/string_compare.h" -#include "base/logging.h" -#include "base/strings/utf_string_conversions.h" -#include "cpp/include/libaddressinput/util/scoped_ptr.h" -#include "third_party/icu/source/i18n/unicode/coll.h" - -namespace i18n { -namespace addressinput { - -bool LooseStringCompare(const std::string& a, const std::string& b) { - UErrorCode error_code = U_ZERO_ERROR; - scoped_ptr<icu::Collator> collator(icu::Collator::createInstance(error_code)); - // Differences in diacriticals and case are ignored. - collator->setStrength(icu::Collator::PRIMARY); - DCHECK(U_SUCCESS(error_code)); - UCollationResult result = base::i18n::CompareString16WithCollator( - collator.get(), - base::UTF8ToUTF16(a), - base::UTF8ToUTF16(b)); - DCHECK(U_SUCCESS(error_code)); - return result == UCOL_EQUAL; -} - -} // namespace addressinput -} // namespace i18n diff --git a/third_party/libaddressinput/libaddressinput.gyp b/third_party/libaddressinput/libaddressinput.gyp index 04c3f95..b0dfee2 100644 --- a/third_party/libaddressinput/libaddressinput.gyp +++ b/third_party/libaddressinput/libaddressinput.gyp @@ -49,12 +49,12 @@ '<(SHARED_INTERMEDIATE_DIR)/libaddressinput/', ], 'sources': [ + 'chromium/canonicalize_string.cc', 'chromium/chrome_downloader_impl.cc', 'chromium/chrome_downloader_impl.h', 'chromium/chrome_storage_impl.cc', 'chromium/chrome_storage_impl.h', 'chromium/json.cc', - 'chromium/string_compare.cc', '<(libaddressinput_dir)/cpp/include/libaddressinput/address_data.h', '<(libaddressinput_dir)/cpp/include/libaddressinput/address_field.h', '<(libaddressinput_dir)/cpp/include/libaddressinput/address_problem.h', @@ -92,9 +92,13 @@ '<(libaddressinput_dir)/cpp/src/util/md5.cc', '<(libaddressinput_dir)/cpp/src/util/md5.h', '<(libaddressinput_dir)/cpp/src/util/stl_util.h', + '<(libaddressinput_dir)/cpp/src/util/canonicalize_string.h', + '<(libaddressinput_dir)/cpp/src/util/string_compare.cc', '<(libaddressinput_dir)/cpp/src/util/string_compare.h', '<(libaddressinput_dir)/cpp/src/util/string_split.cc', '<(libaddressinput_dir)/cpp/src/util/string_split.h', + '<(libaddressinput_dir)/cpp/src/util/trie.cc', + '<(libaddressinput_dir)/cpp/src/util/trie.h', ], 'defines': [ 'VALIDATION_DATA_URL="https://i18napis.appspot.com/ssl-aggregate-address/"', @@ -152,6 +156,7 @@ '<(libaddressinput_dir)/cpp/test/util/scoped_ptr_unittest.cc', '<(libaddressinput_dir)/cpp/test/util/stl_util_unittest.cc', '<(libaddressinput_dir)/cpp/test/util/string_split_unittest.cc', + '<(libaddressinput_dir)/cpp/test/util/trie_test.cc', ], 'defines': [ 'TEST_DATA_DIR="third_party/libaddressinput/src/testdata"', |