summaryrefslogtreecommitdiffstats
path: root/third_party/libaddressinput
diff options
context:
space:
mode:
authorrouslan@chromium.org <rouslan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-15 00:56:24 +0000
committerrouslan@chromium.org <rouslan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-15 00:56:24 +0000
commit07949ab6ce355f0ce0e1e238326d695272191cdc (patch)
tree85a0b8c223b04be48724b5b6ab3472513b5c3e94 /third_party/libaddressinput
parentfbcf7ab20679679f9c4f589a34df250a01f12cdf (diff)
downloadchromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.zip
chromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.tar.gz
chromium_src-07949ab6ce355f0ce0e1e238326d695272191cdc.tar.bz2
[rac] Improve libaddressinput suggestion performance with tries.
The patch changes suggestion algorithm from a for loop that compared prefixes to a trie. The performance impact on a debug build: Old (for loop that compared prefixes): - Build rulesets = 200 ms - Each character typed = 500 ms New (trie): - Build rulesets = 200 ms - First character typed = 220 ms (200 ms due to building trie) - Each subsequent character typed = 20 ms The performance for each character improves significantly. Building rulesets and the trie still have room for optimization. BUG=347383 Review URL: https://codereview.chromium.org/183793003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@257277 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/libaddressinput')
-rw-r--r--third_party/libaddressinput/chromium/canonicalize_string.cc63
-rw-r--r--third_party/libaddressinput/chromium/cpp/libaddressinput.gyp3
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/address_validator.cc370
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/rule.cc14
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/rule.h13
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/ruleset.cc183
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/ruleset.h65
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc47
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h38
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc9
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/util/trie.cc98
-rw-r--r--third_party/libaddressinput/chromium/cpp/src/util/trie.h68
-rw-r--r--third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc114
-rw-r--r--third_party/libaddressinput/chromium/string_compare.cc31
-rw-r--r--third_party/libaddressinput/libaddressinput.gyp7
15 files changed, 903 insertions, 220 deletions
diff --git a/third_party/libaddressinput/chromium/canonicalize_string.cc b/third_party/libaddressinput/chromium/canonicalize_string.cc
new file mode 100644
index 0000000..b6d9928
--- /dev/null
+++ b/third_party/libaddressinput/chromium/canonicalize_string.cc
@@ -0,0 +1,63 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpp/src/util/canonicalize_string.h"
+
+#include "base/logging.h"
+#include "cpp/include/libaddressinput/util/scoped_ptr.h"
+#include "third_party/icu/source/i18n/unicode/coll.h"
+
+namespace i18n {
+namespace addressinput {
+
+namespace {
+
+class ChromeStringCanonicalizer : public StringCanonicalizer {
+ public:
+ ChromeStringCanonicalizer()
+ : error_code_(U_ZERO_ERROR),
+ collator_(icu::Collator::createInstance(error_code_)) {
+ collator_->setStrength(icu::Collator::PRIMARY);
+ DCHECK(U_SUCCESS(error_code_));
+ }
+
+ virtual ~ChromeStringCanonicalizer() {}
+
+ // StringCanonicalizer implementation.
+ virtual std::string CanonicalizeString(const std::string& original) {
+ // Returns a canonical version of the string that can be used for comparing
+ // strings regardless of diacritics and capitalization.
+ // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
+ // CanonicalizeString("Texas") == CanonicalizeString("teXas");
+ // CanonicalizeString("Texas") != CanonicalizeString("California");
+ //
+ // The output is not human-readable.
+ // CanonicalizeString("Texas") != "Texas";
+ icu::UnicodeString icu_str(
+ original.c_str(), static_cast<int32_t>(original.length()));
+ int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
+ scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
+ DCHECK(buffer.get());
+ int32_t filled_size =
+ collator_->getSortKey(icu_str, buffer.get(), buffer_size);
+ DCHECK_EQ(buffer_size, filled_size);
+ return std::string(reinterpret_cast<const char*>(buffer.get()));
+ }
+
+ private:
+ UErrorCode error_code_;
+ scoped_ptr<icu::Collator> collator_;
+
+ DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer);
+};
+
+} // namespace
+
+// static
+scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
+ return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer);
+}
+
+} // namespace addressinput
+} // namespace i18n
diff --git a/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp b/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp
index bde2537..6215898 100644
--- a/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp
+++ b/third_party/libaddressinput/chromium/cpp/libaddressinput.gyp
@@ -43,10 +43,12 @@
'src/rule.cc',
'src/ruleset.cc',
'src/time_to_string.cc',
+ 'src/util/canonicalize_string.cc',
'src/util/json.cc',
'src/util/md5.cc',
'src/util/string_compare.cc',
'src/util/string_split.cc',
+ 'src/util/trie.cc',
],
'defines': [
'VALIDATION_DATA_URL="https://i18napis.appspot.com/ssl-aggregate-address/"',
@@ -80,6 +82,7 @@
'test/util/scoped_ptr_unittest.cc',
'test/util/stl_util_unittest.cc',
'test/util/string_split_unittest.cc',
+ 'test/util/trie_test.cc',
],
'defines': [
'TEST_DATA_DIR="../testdata"',
diff --git a/third_party/libaddressinput/chromium/cpp/src/address_validator.cc b/third_party/libaddressinput/chromium/cpp/src/address_validator.cc
index 5ca32b1..39df55b 100644
--- a/third_party/libaddressinput/chromium/cpp/src/address_validator.cc
+++ b/third_party/libaddressinput/chromium/cpp/src/address_validator.cc
@@ -22,6 +22,7 @@
#include <libaddressinput/util/scoped_ptr.h>
#include <algorithm>
+#include <bitset>
#include <cassert>
#include <cstddef>
#include <map>
@@ -46,16 +47,17 @@ namespace addressinput {
namespace {
-// A bit map of MatchingRuleField values.
-typedef int MatchingRuleFields;
+// A type to store a list of pointers to Ruleset objects.
+typedef std::set<const Ruleset*> Rulesets;
-// The field in the rule that matches a user input. Can be used in a bitmask.
-enum MatchingRuleField {
- NONE = 0,
- KEY = 1,
- NAME = 2,
- LATIN_NAME = 4
-};
+// A type to map the field in a rule to rulesets.
+typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets;
+
+// A type to map the field in an address to rulesets.
+typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets;
+
+// A set of Rule::IdentityField values that match user input.
+typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields;
// Returns true if |prefix_regex| matches a prefix of |value|. For example,
// "(90|81)" matches a prefix of "90291".
@@ -64,171 +66,6 @@ bool ValueMatchesPrefixRegex(const std::string& value,
return RE2::FullMatch(value, "^(" + prefix_regex + ").*");
}
-// Returns true if the first |length| characters of |a| are loosely equal to the
-// first |length| characters of |b|.
-bool LoosePrefixCompare(const std::string& a,
- const std::string& b,
- size_t length) {
- return LooseStringCompare(a.substr(0, length), b.substr(0, length));
-}
-
-// Returns true if the fields of |address| are empty strings between the |begin|
-// field exclusively and the |end| field inclusively.
-bool AddressFieldsEmpty(const AddressData& address,
- AddressField begin,
- AddressField end) {
- for (int field = begin + 1; field <= end; ++field) {
- if (!address.GetFieldValue(static_cast<AddressField>(field)).empty()) {
- return false;
- }
- }
- return true;
-}
-
-// Finds the most accurate rulesets that match |user_input| and adds them to
-// |suggestions| as address data structures.
-//
-// Converts the rulesets into data structures by copying either the ruleset key,
-// name, or latinized name into the field of the data structure. Traverses the
-// tree of rulesets up to its root to capture the most complete suggestion. For
-// example, converts the ruleset for "data/CN/北京市/昌平区" into (latinized)
-// "Changping Qu, Beijing Shi, CN" address.
-//
-// Note that both "data/US" and "data/US/CA" rulesets match a "California, US,
-// 90291" user input, but only "data/US/CA" ends up in |suggestions|, because
-// it's more accurate than "data/US".
-//
-// Considers only |parent_matching_rule_fields| when comparing rules to user
-// input values. For example, if |parent_matching_rule_fields| is KEY | NAME,
-// then the function ignores the latinized name for the ruleset.
-//
-// If the number of |suggestions| is over the |suggestions_limit|, then clears
-// |suggestions| and returns false.
-//
-// The |suggestions| parameter should not be NULL.
-bool FindSuggestions(const AddressData& user_input,
- AddressField focused_field,
- size_t suggestions_limit,
- const Ruleset& ruleset,
- MatchingRuleFields parent_matching_rule_fields,
- std::vector<AddressData>* suggestions) {
- assert(suggestions != NULL);
-
- // Compare the postal code of |user_input| with the |ruleset|.
- const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
- if (!rule.GetPostalCodeFormat().empty() &&
- !user_input.postal_code.empty() &&
- !ValueMatchesPrefixRegex(
- user_input.postal_code, rule.GetPostalCodeFormat())) {
- return true;
- }
-
- // Compare the value of |ruleset->field()| in |user input| with the key,
- // name, and latinized name for |ruleset|.
- const std::string& input = user_input.GetFieldValue(ruleset.field());
- MatchingRuleFields matching_rule_fields = NONE;
- if (input.empty() || ruleset.field() == COUNTRY) {
- matching_rule_fields = parent_matching_rule_fields;
- } else {
- if ((parent_matching_rule_fields & KEY) &&
- LoosePrefixCompare(input, rule.GetKey(), input.length())) {
- matching_rule_fields |= KEY;
- }
- if ((parent_matching_rule_fields & NAME) &&
- LoosePrefixCompare(input, rule.GetName(), input.length())) {
- matching_rule_fields |= NAME;
- }
- if ((parent_matching_rule_fields & LATIN_NAME) &&
- LoosePrefixCompare(input, rule.GetLatinName(), input.length())) {
- matching_rule_fields |= LATIN_NAME;
- }
- }
-
- if (matching_rule_fields == NONE) {
- return true;
- }
-
- // Add a suggestion based on |ruleset|.
- if (ruleset.field() == DEPENDENT_LOCALITY ||
- ruleset.field() == focused_field ||
- (ruleset.GetSubRegionRulesets().empty() &&
- AddressFieldsEmpty(user_input, ruleset.field(), DEPENDENT_LOCALITY))) {
- if (suggestions->size() >= suggestions_limit) {
- suggestions->clear();
- return false;
- }
-
- // If the user's language is not one of the supported languages of a country
- // that has latinized names for its regions, then prefer to suggest the
- // latinized region names. If the user types in local script instead, then
- // the local script names will be suggested.
- bool prefer_latin_name =
- !rule.GetLanguage().empty() &&
- rule.GetLanguage() != user_input.language_code &&
- !rule.GetLatinName().empty();
-
- MatchingRuleField rule_field = NONE;
- if (prefer_latin_name && (matching_rule_fields & LATIN_NAME)) {
- rule_field = LATIN_NAME;
- } else if (matching_rule_fields & KEY) {
- rule_field = KEY;
- } else if (matching_rule_fields & NAME) {
- rule_field = NAME;
- } else if (matching_rule_fields & LATIN_NAME) {
- rule_field = LATIN_NAME;
- } else {
- assert(false);
- }
-
- AddressData suggestion;
- suggestion.postal_code = user_input.postal_code;
-
- // Traverse the tree of rulesets from the most specific |ruleset| to the
- // country-wide "root" of the tree. Use the region names found at each of
- // the levels of the ruleset tree to build the |suggestion|.
- for (const Ruleset* suggestion_ruleset = &ruleset;
- suggestion_ruleset->parent() != NULL;
- suggestion_ruleset = suggestion_ruleset->parent()) {
- const Rule& suggestion_rule =
- suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
- switch (rule_field) {
- case KEY:
- suggestion.SetFieldValue(
- suggestion_ruleset->field(), suggestion_rule.GetKey());
- break;
- case NAME:
- suggestion.SetFieldValue(
- suggestion_ruleset->field(), suggestion_rule.GetName());
- break;
- case LATIN_NAME:
- suggestion.SetFieldValue(
- suggestion_ruleset->field(), suggestion_rule.GetLatinName());
- break;
- default:
- assert(false);
- break;
- }
- }
-
- suggestions->push_back(suggestion);
- return true;
- }
-
- for (std::map<std::string, Ruleset*>::const_iterator
- subregion_ruleset_it = ruleset.GetSubRegionRulesets().begin();
- subregion_ruleset_it != ruleset.GetSubRegionRulesets().end();
- ++subregion_ruleset_it) {
- assert(subregion_ruleset_it->second != NULL);
- if (!FindSuggestions(user_input, focused_field, suggestions_limit,
- *subregion_ruleset_it->second, matching_rule_fields,
- suggestions)) {
- return false;
- }
- }
-
- return true;
-}
-
// Returns true if the filter is empty (all problems allowed) or contains the
// |field|->|problem| mapping (explicitly allowed).
bool FilterAllows(const AddressProblemFilter& filter,
@@ -260,6 +97,37 @@ bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) {
return true;
}
+// Collects rulesets based on whether they have a parent in the given list.
+class ParentedRulesetCollector {
+ public:
+ // Retains a reference to both of the parameters. Does not make a copy of
+ // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The
+ // |rulesets_with_parents| parameter should not be NULL.
+ ParentedRulesetCollector(const Rulesets& parent_rulesets,
+ Rulesets* rulesets_with_parents)
+ : parent_rulesets_(parent_rulesets),
+ rulesets_with_parents_(rulesets_with_parents) {
+ assert(rulesets_with_parents_ != NULL);
+ }
+
+ ~ParentedRulesetCollector() {}
+
+ // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the
+ // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test|
+ // parameter should not be NULL.
+ void operator()(const Ruleset* ruleset_to_test) {
+ assert(ruleset_to_test != NULL);
+ if (parent_rulesets_.find(ruleset_to_test->parent()) !=
+ parent_rulesets_.end()) {
+ rulesets_with_parents_->insert(ruleset_to_test);
+ }
+ }
+
+ private:
+ const Rulesets& parent_rulesets_;
+ Rulesets* rulesets_with_parents_;
+};
+
// Validates AddressData structure.
class AddressValidatorImpl : public AddressValidator {
public:
@@ -297,7 +165,7 @@ class AddressValidatorImpl : public AddressValidator {
const AddressData& address,
const AddressProblemFilter& filter,
AddressProblems* problems) const {
- std::map<std::string, const Ruleset*>::const_iterator ruleset_it =
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
rules_.find(address.country_code);
// We can still validate the required fields even if the full ruleset isn't
@@ -386,7 +254,7 @@ class AddressValidatorImpl : public AddressValidator {
AddressField focused_field,
size_t suggestions_limit,
std::vector<AddressData>* suggestions) const {
- std::map<std::string, const Ruleset*>::const_iterator ruleset_it =
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
rules_.find(user_input.country_code);
if (ruleset_it == rules_.end()) {
@@ -401,16 +269,154 @@ class AddressValidatorImpl : public AddressValidator {
}
suggestions->clear();
- const Ruleset* country_ruleset = ruleset_it->second;
- assert(country_ruleset != NULL);
+ assert(ruleset_it->second != NULL);
+
+ // Initialize the prefix search index lazily.
+ if (!ruleset_it->second->prefix_search_index_ready()) {
+ ruleset_it->second->BuildPrefixSearchIndex();
+ }
+
+ const Ruleset& country_ruleset = *ruleset_it->second;
+ const Rule& country_rule =
+ country_ruleset.GetLanguageCodeRule(user_input.language_code);
+
+ // Do not suggest anything if the user is typing the postal code that is not
+ // valid for the country.
+ if (!user_input.postal_code.empty() &&
+ focused_field == POSTAL_CODE &&
+ !country_rule.GetPostalCodeFormat().empty() &&
+ !ValueMatchesPrefixRegex(
+ user_input.postal_code, country_rule.GetPostalCodeFormat())) {
+ return SUCCESS;
+ }
+
+ // Determine the most specific address field that can be suggested.
+ AddressField suggestion_field = focused_field != POSTAL_CODE
+ ? focused_field : DEPENDENT_LOCALITY;
+ if (suggestion_field > country_ruleset.deepest_ruleset_level()) {
+ suggestion_field = country_ruleset.deepest_ruleset_level();
+ }
+ if (focused_field != POSTAL_CODE) {
+ while (user_input.GetFieldValue(suggestion_field).empty() &&
+ suggestion_field > ADMIN_AREA) {
+ suggestion_field = static_cast<AddressField>(suggestion_field - 1);
+ }
+ }
+
+ // Find all rulesets that match user input.
+ AddressFieldRulesets rulesets;
+ for (int i = ADMIN_AREA; i <= suggestion_field; ++i) {
+ for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) {
+ AddressField address_field = static_cast<AddressField>(i);
+ Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j);
+
+ // Find all rulesets at |address_field| level whose |rule_field| starts
+ // with user input value.
+ country_ruleset.FindRulesetsByPrefix(
+ user_input.language_code, address_field, rule_field,
+ user_input.GetFieldValue(address_field),
+ &rulesets[address_field][rule_field]);
+
+ // Filter out the rulesets whose parents do not match the user input.
+ if (address_field > ADMIN_AREA) {
+ AddressField parent_field =
+ static_cast<AddressField>(address_field - 1);
+ Rulesets rulesets_with_parents;
+ std::for_each(
+ rulesets[address_field][rule_field].begin(),
+ rulesets[address_field][rule_field].end(),
+ ParentedRulesetCollector(rulesets[parent_field][rule_field],
+ &rulesets_with_parents));
+ rulesets[address_field][rule_field].swap(rulesets_with_parents);
+ }
+ }
+ }
+
+ // Determine the fields in the rules that match the user input. This
+ // operation converts a map of Rule::IdentityField value -> Ruleset into a
+ // map of Ruleset -> Rule::IdentityField bitset.
+ std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets;
+ for (IdentityFieldRulesets::const_iterator rule_field_it =
+ rulesets[suggestion_field].begin();
+ rule_field_it != rulesets[suggestion_field].end();
+ ++rule_field_it) {
+ const Rule::IdentityField rule_identity_field = rule_field_it->first;
+ for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin();
+ ruleset_it != rule_field_it->second.end();
+ ++ruleset_it) {
+ suggestion_rulesets[*ruleset_it].set(rule_identity_field);
+ }
+ }
+
+ // Generate suggestions based on the rulesets. Use a Rule::IdentityField
+ // from the bitset to generate address field values.
+ for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator
+ suggestion_it = suggestion_rulesets.begin();
+ suggestion_it != suggestion_rulesets.end();
+ ++suggestion_it) {
+ const Ruleset& ruleset = *suggestion_it->first;
+ const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
+ const MatchingRuleFields& matching_rule_fields = suggestion_it->second;
+
+ // Do not suggest this region if the postal code in user input does not
+ // match it.
+ if (!user_input.postal_code.empty() &&
+ !rule.GetPostalCodeFormat().empty() &&
+ !ValueMatchesPrefixRegex(
+ user_input.postal_code, rule.GetPostalCodeFormat())) {
+ continue;
+ }
+
+ // Do not add more suggestions than |suggestions_limit|.
+ if (suggestions->size() >= suggestions_limit) {
+ suggestions->clear();
+ return SUCCESS;
+ }
+
+ // If the user's language is not one of the supported languages of a
+ // country that has latinized names for its regions, then prefer to
+ // suggest the latinized region names. If the user types in local script
+ // instead, then the local script names will be suggested.
+ Rule::IdentityField rule_field = Rule::KEY;
+ if (!country_rule.GetLanguage().empty() &&
+ country_rule.GetLanguage() != user_input.language_code &&
+ !rule.GetLatinName().empty() &&
+ matching_rule_fields.test(Rule::LATIN_NAME)) {
+ rule_field = Rule::LATIN_NAME;
+ } else if (matching_rule_fields.test(Rule::KEY)) {
+ rule_field = Rule::KEY;
+ } else if (matching_rule_fields.test(Rule::NAME)) {
+ rule_field = Rule::NAME;
+ } else if (matching_rule_fields.test(Rule::LATIN_NAME)) {
+ rule_field = Rule::LATIN_NAME;
+ } else {
+ assert(false);
+ }
+
+ AddressData suggestion;
+ suggestion.postal_code = user_input.postal_code;
+
+ // Traverse the tree of rulesets from the most specific |ruleset| to the
+ // country-wide "root" of the tree. Use the region names found at each of
+ // the levels of the ruleset tree to build the |suggestion|.
+ for (const Ruleset* suggestion_ruleset = &ruleset;
+ suggestion_ruleset->parent() != NULL;
+ suggestion_ruleset = suggestion_ruleset->parent()) {
+ const Rule& suggestion_rule =
+ suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
+ suggestion.SetFieldValue(suggestion_ruleset->field(),
+ suggestion_rule.GetIdentityField(rule_field));
+ }
+
+ suggestions->push_back(suggestion);
+ }
- FindSuggestions(user_input, focused_field, suggestions_limit,
- *country_ruleset, KEY | NAME | LATIN_NAME, suggestions);
return SUCCESS;
}
+ // AddressValidator implementation.
virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const {
- std::map<std::string, const Ruleset*>::const_iterator ruleset_it =
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
rules_.find(address_data->country_code);
if (ruleset_it == rules_.end()) {
return false;
@@ -475,7 +481,7 @@ class AddressValidatorImpl : public AddressValidator {
std::set<std::string> loading_rules_;
// A mapping of a country code to the owned ruleset for that country code.
- std::map<std::string, const Ruleset*> rules_;
+ std::map<std::string, Ruleset*> rules_;
DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl);
};
diff --git a/third_party/libaddressinput/chromium/cpp/src/rule.cc b/third_party/libaddressinput/chromium/cpp/src/rule.cc
index 1604390..a94e44d 100644
--- a/third_party/libaddressinput/chromium/cpp/src/rule.cc
+++ b/third_party/libaddressinput/chromium/cpp/src/rule.cc
@@ -335,6 +335,20 @@ void Rule::ParseJsonRule(const Json& json_rule) {
}
}
+const std::string& Rule::GetIdentityField(IdentityField identity_field) const {
+ switch (identity_field) {
+ case KEY:
+ return key_;
+ case NAME:
+ return name_;
+ case LATIN_NAME:
+ return latin_name_;
+ case IDENTITY_FIELDS_SIZE:
+ assert(false);
+ }
+ return key_;
+}
+
int Rule::GetInvalidFieldMessageId(AddressField field) const {
switch (field) {
case ADMIN_AREA:
diff --git a/third_party/libaddressinput/chromium/cpp/src/rule.h b/third_party/libaddressinput/chromium/cpp/src/rule.h
index 407455b..80cc830 100644
--- a/third_party/libaddressinput/chromium/cpp/src/rule.h
+++ b/third_party/libaddressinput/chromium/cpp/src/rule.h
@@ -62,6 +62,14 @@ struct FormatElement {
// }
class Rule {
public:
+ // The types of fields that describe the rule.
+ enum IdentityField {
+ KEY,
+ NAME,
+ LATIN_NAME,
+ IDENTITY_FIELDS_SIZE
+ };
+
Rule();
~Rule();
@@ -80,6 +88,11 @@ class Rule {
// Parses |json_rule|, which must contain parsed serialized rule.
void ParseJsonRule(const Json& json_rule);
+ // Returns the value of the |identity_field| for this rule, for example, can
+ // return "TX" or "Texas". The |identity_field| parameter should not be
+ // IDENTITY_FIELDS_SIZE.
+ const std::string& GetIdentityField(IdentityField identity_field) const;
+
// Returns the key for this rule. For example, can return "TX".
const std::string& GetKey() const { return key_; }
diff --git a/third_party/libaddressinput/chromium/cpp/src/ruleset.cc b/third_party/libaddressinput/chromium/cpp/src/ruleset.cc
index 43823b4..201e407 100644
--- a/third_party/libaddressinput/chromium/cpp/src/ruleset.cc
+++ b/third_party/libaddressinput/chromium/cpp/src/ruleset.cc
@@ -20,17 +20,23 @@
#include <cassert>
#include <cstddef>
#include <map>
+#include <set>
#include <string>
+#include <utility>
#include "rule.h"
+#include "util/canonicalize_string.h"
#include "util/stl_util.h"
namespace i18n {
namespace addressinput {
Ruleset::Ruleset(AddressField field, scoped_ptr<Rule> rule)
- : parent_(NULL),
+ : tries_(),
+ canonicalizer_(),
+ parent_(NULL),
field_(field),
+ deepest_ruleset_level_(field),
rule_(rule.Pass()),
sub_regions_(),
language_codes_() {
@@ -42,6 +48,33 @@ Ruleset::Ruleset(AddressField field, scoped_ptr<Rule> rule)
Ruleset::~Ruleset() {
STLDeleteValues(&sub_regions_);
STLDeleteValues(&language_codes_);
+
+ // Delete the maps and trie objects owned by |tries_| field.
+ for (LanguageCodeTries::const_iterator lang_it = tries_.begin();
+ lang_it != tries_.end(); ++lang_it) {
+ AddressFieldTries* address_field_tries = lang_it->second;
+ assert(address_field_tries != NULL);
+
+ for (AddressFieldTries::const_iterator address_field_it =
+ address_field_tries->begin();
+ address_field_it != address_field_tries->end();
+ ++address_field_it) {
+ IdentityFieldTries* identity_field_tries = address_field_it->second;
+ assert(identity_field_tries != NULL);
+
+ for (IdentityFieldTries::const_iterator identity_field_it =
+ identity_field_tries->begin();
+ identity_field_it != identity_field_tries->end();
+ ++identity_field_it) {
+ // The tries do not own the ruleset objects.
+ Trie<const Ruleset*>* trie = identity_field_it->second;
+ assert(trie != NULL);
+ delete trie;
+ }
+ delete identity_field_tries;
+ }
+ delete address_field_tries;
+ }
}
void Ruleset::AddSubRegionRuleset(const std::string& sub_region,
@@ -74,5 +107,153 @@ const Rule& Ruleset::GetLanguageCodeRule(
return it != language_codes_.end() ? *it->second : *rule_;
}
+void Ruleset::BuildPrefixSearchIndex() {
+ assert(field_ == COUNTRY);
+ assert(tries_.empty());
+
+ // Default language tries.
+ tries_[""] = new AddressFieldTries;
+
+ // Non-default language tries.
+ for (std::vector<std::string>::const_iterator lang_it =
+ rule_->GetLanguages().begin();
+ lang_it != rule_->GetLanguages().end();
+ ++lang_it) {
+ if (*lang_it != rule_->GetLanguage() && !lang_it->empty()) {
+ tries_[*lang_it] = new AddressFieldTries;
+ }
+ }
+
+ for (LanguageCodeTries::const_iterator lang_it = tries_.begin();
+ lang_it != tries_.end(); ++lang_it) {
+ AddressFieldTries* address_field_tries = lang_it->second;
+ address_field_tries->insert(
+ std::make_pair(ADMIN_AREA, new IdentityFieldTries));
+ address_field_tries->insert(
+ std::make_pair(LOCALITY, new IdentityFieldTries));
+ address_field_tries->insert(
+ std::make_pair(DEPENDENT_LOCALITY, new IdentityFieldTries));
+
+ for (AddressFieldTries::const_iterator address_field_it =
+ address_field_tries->begin();
+ address_field_it != address_field_tries->end();
+ ++address_field_it) {
+ IdentityFieldTries* identity_field_tries = address_field_it->second;
+ identity_field_tries->insert(
+ std::make_pair(Rule::KEY, new Trie<const Ruleset*>));
+ identity_field_tries->insert(
+ std::make_pair(Rule::NAME, new Trie<const Ruleset*>));
+ identity_field_tries->insert(
+ std::make_pair(Rule::LATIN_NAME, new Trie<const Ruleset*>));
+ }
+ }
+
+ canonicalizer_ = StringCanonicalizer::Build();
+ AddSubRegionRulesetsToTrie(*this);
+}
+
+void Ruleset::FindRulesetsByPrefix(const std::string& language_code,
+ AddressField ruleset_level,
+ Rule::IdentityField identity_field,
+ const std::string& prefix,
+ std::set<const Ruleset*>* result) const {
+ assert(field_ == COUNTRY);
+ assert(ruleset_level >= ADMIN_AREA);
+ assert(ruleset_level <= DEPENDENT_LOCALITY);
+ assert(result != NULL);
+ assert(canonicalizer_ != NULL);
+
+ LanguageCodeTries::const_iterator lang_it = tries_.find(language_code);
+ AddressFieldTries* address_field_tries = lang_it != tries_.end()
+ ? lang_it->second : tries_.find("")->second;
+ assert(address_field_tries != NULL);
+
+ AddressFieldTries::const_iterator address_field_it =
+ address_field_tries->find(ruleset_level);
+ assert(address_field_it != address_field_tries->end());
+
+ IdentityFieldTries* identity_field_tries = address_field_it->second;
+ assert(identity_field_tries != NULL);
+
+ IdentityFieldTries::const_iterator identity_field_it =
+ identity_field_tries->find(identity_field);
+ assert(identity_field_it != identity_field_tries->end());
+
+ Trie<const Ruleset*>* trie = identity_field_it->second;
+ assert(trie != NULL);
+
+ trie->FindDataForKeyPrefix(
+ canonicalizer_->CanonicalizeString(prefix), result);
+}
+
+void Ruleset::AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset) {
+ assert(field_ == COUNTRY);
+ assert(canonicalizer_ != NULL);
+
+ for (std::map<std::string, Ruleset*>::const_iterator sub_region_it =
+ parent_ruleset.sub_regions_.begin();
+ sub_region_it != parent_ruleset.sub_regions_.end();
+ ++sub_region_it) {
+ const Ruleset* ruleset = sub_region_it->second;
+ assert(ruleset != NULL);
+
+ if (deepest_ruleset_level_ < ruleset->field()) {
+ deepest_ruleset_level_ = ruleset->field();
+ }
+
+ for (LanguageCodeTries::const_iterator lang_it = tries_.begin();
+ lang_it != tries_.end(); ++lang_it) {
+ const std::string& language_code = lang_it->first;
+ const Rule& rule = ruleset->GetLanguageCodeRule(language_code);
+
+ AddressFieldTries* address_field_tries = lang_it->second;
+ assert(address_field_tries != NULL);
+
+ AddressFieldTries::const_iterator address_field_it =
+ address_field_tries->find(ruleset->field());
+ assert(address_field_it != address_field_tries->end());
+
+ IdentityFieldTries* identity_field_tries = address_field_it->second;
+ assert(identity_field_tries != NULL);
+
+ IdentityFieldTries::const_iterator identity_field_it =
+ identity_field_tries->find(Rule::KEY);
+ assert(identity_field_it != identity_field_tries->end());
+
+ Trie<const Ruleset*>* key_trie = identity_field_it->second;
+ assert(key_trie != NULL);
+
+ identity_field_it = identity_field_tries->find(Rule::NAME);
+ assert(identity_field_it != identity_field_tries->end());
+
+ Trie<const Ruleset*>* name_trie = identity_field_it->second;
+ assert(name_trie != NULL);
+
+ identity_field_it = identity_field_tries->find(Rule::LATIN_NAME);
+ assert(identity_field_it != identity_field_tries->end());
+
+ Trie<const Ruleset*>* latin_name_trie = identity_field_it->second;
+ assert(latin_name_trie != NULL);
+
+ if (!rule.GetKey().empty()) {
+ key_trie->AddDataForKey(
+ canonicalizer_->CanonicalizeString(rule.GetKey()), ruleset);
+ }
+
+ if (!rule.GetName().empty()) {
+ name_trie->AddDataForKey(
+ canonicalizer_->CanonicalizeString(rule.GetName()), ruleset);
+ }
+
+ if (!rule.GetLatinName().empty()) {
+ latin_name_trie->AddDataForKey(
+ canonicalizer_->CanonicalizeString(rule.GetLatinName()), ruleset);
+ }
+ }
+
+ AddSubRegionRulesetsToTrie(*ruleset);
+ }
+}
+
} // namespace addressinput
} // namespace i18n
diff --git a/third_party/libaddressinput/chromium/cpp/src/ruleset.h b/third_party/libaddressinput/chromium/cpp/src/ruleset.h
index 350a675..fd9e3d9 100644
--- a/third_party/libaddressinput/chromium/cpp/src/ruleset.h
+++ b/third_party/libaddressinput/chromium/cpp/src/ruleset.h
@@ -20,14 +20,16 @@
#include <libaddressinput/util/scoped_ptr.h>
#include <map>
+#include <set>
#include <string>
-#include <vector>
+
+#include "rule.h"
+#include "util/canonicalize_string.h"
+#include "util/trie.h"
namespace i18n {
namespace addressinput {
-class Rule;
-
// A recursive data structure that stores a set of rules for a region. Can store
// the rules for a country, its administrative areas, localities, and dependent
// localities, in addition to the language-specific rules.
@@ -93,13 +95,70 @@ class Ruleset {
return sub_regions_;
}
+ // Enables using FindRulesetsByPrefix() method. Should be called only once and
+ // on a COUNTRY level ruleset.
+ void BuildPrefixSearchIndex();
+
+ // Returns true if BuildPrefixSearchIndex() has been called.
+ bool prefix_search_index_ready() const { return !tries_.empty(); }
+
+ // Returns the deepest possible ruleset level for this country. Must be called
+ // on a COUNTRY level ruleset. Must be called after BuildPrefixSearchIndex()
+ // has been called.
+ AddressField deepest_ruleset_level() const { return deepest_ruleset_level_; }
+
+ // Finds all rulesets at |ruleset_level| where the rule for |language_code|
+ // has the |identity_field| that starts with |prefix|. Ignores diacritics and
+ // capitalization differences between the rule data and |prefix|.
+ //
+ // If there're no rules for |language_code| (or |language_code| is an empty
+ // string), then the default language code is used.
+ //
+ // Should be called only on a COUNTRY level ruleset. Should be called only
+ // after BuildPrefixSearchIndex() has been called.
+ //
+ // The |field| parameter should be only ADMIN_AREA, LOCALITY, or
+ // DEPENDENT_LOCALITY. The result parameter should not be NULL.
+ void FindRulesetsByPrefix(const std::string& language_code,
+ AddressField ruleset_level,
+ Rule::IdentityField identity_field,
+ const std::string& prefix,
+ std::set<const Ruleset*>* result) const;
+
private:
+ // The type that maps rule identity field to tries of rulesets.
+ typedef std::map<Rule::IdentityField, Trie<const Ruleset*>*>
+ IdentityFieldTries;
+
+ // The type that maps address field to IdentityFieldTries.
+ typedef std::map<AddressField, IdentityFieldTries*> AddressFieldTries;
+
+ // The type that maps language code to AddressFieldTries.
+ typedef std::map<std::string, AddressFieldTries*> LanguageCodeTries;
+
+ // Adds all children of |parent_ruleset| into |tries_| of this ruleset. Should
+ // be called only on a COUNTRY level ruleset.
+ void AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset);
+
+ // The tries to lookup rulesets by a prefix of key, name, or latin-name in a
+ // rule. Has data only in a COUNTRY level ruleset. Owns the map and trie
+ // objects. Does not own the ruleset objects.
+ LanguageCodeTries tries_;
+
+ // Canonicalizes region keys, names, and latin names when building a trie.
+ scoped_ptr<StringCanonicalizer> canonicalizer_;
+
// The parent ruleset of this object. The parent ruleset owns this object.
Ruleset* parent_;
// The field of this ruleset.
const AddressField field_;
+ // The deepest possible ruleset level for this country. Set in
+ // BuildPrefixSearchIndex() method and, therefore, meaningful only on a
+ // COUNTRY level ruleset.
+ AddressField deepest_ruleset_level_;
+
// The region-wide rule in the default language of the country.
const scoped_ptr<const Rule> rule_;
diff --git a/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc
new file mode 100644
index 0000000..c311a60
--- /dev/null
+++ b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.cc
@@ -0,0 +1,47 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "canonicalize_string.h"
+
+#include <libaddressinput/util/scoped_ptr.h>
+
+#include <string>
+
+namespace i18n {
+namespace addressinput {
+
+namespace {
+
+class SimpleStringCanonicalizer : public StringCanonicalizer {
+ public:
+ SimpleStringCanonicalizer() {}
+
+ virtual ~SimpleStringCanonicalizer() {}
+
+ // StringCanonicalizer implementation.
+ virtual std::string CanonicalizeString(const std::string& original) {
+ // The best we can do without ICU.
+ return original;
+ }
+};
+
+} // namespace
+
+// static
+scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
+ return scoped_ptr<StringCanonicalizer>(new SimpleStringCanonicalizer);
+}
+
+} // namespace addressinput
+} // namespace i18n
diff --git a/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h
new file mode 100644
index 0000000..10af086
--- /dev/null
+++ b/third_party/libaddressinput/chromium/cpp/src/util/canonicalize_string.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_
+#define I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_
+
+#include <libaddressinput/util/scoped_ptr.h>
+
+#include <string>
+
+namespace i18n {
+namespace addressinput {
+
+class StringCanonicalizer {
+ public:
+ static scoped_ptr<StringCanonicalizer> Build();
+
+ virtual ~StringCanonicalizer() {}
+
+ // Returns |original| string modified to enable loose comparisons and sorting.
+ virtual std::string CanonicalizeString(const std::string& original) = 0;
+};
+
+} // namespace addressinput
+} // namespace i18n
+
+#endif // I18N_ADDRESSINPUT_UTIL_CANONICALIZE_STRING_H_
diff --git a/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc b/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc
index 2e0691f..d5f6a18 100644
--- a/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc
+++ b/third_party/libaddressinput/chromium/cpp/src/util/string_compare.cc
@@ -14,12 +14,17 @@
#include "string_compare.h"
+#include <libaddressinput/util/scoped_ptr.h>
+
+#include "canonicalize_string.h"
+
namespace i18n {
namespace addressinput {
bool LooseStringCompare(const std::string& a, const std::string& b) {
- // The best we can do without ICU.
- return a == b;
+ scoped_ptr<StringCanonicalizer> canonicalizer(StringCanonicalizer::Build());
+ return canonicalizer->CanonicalizeString(a) ==
+ canonicalizer->CanonicalizeString(b);
}
} // namespace addressinput
diff --git a/third_party/libaddressinput/chromium/cpp/src/util/trie.cc b/third_party/libaddressinput/chromium/cpp/src/util/trie.cc
new file mode 100644
index 0000000..07fb877
--- /dev/null
+++ b/third_party/libaddressinput/chromium/cpp/src/util/trie.cc
@@ -0,0 +1,98 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "trie.h"
+
+#include <cassert>
+#include <cstddef>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <utility>
+
+#include "stl_util.h"
+
+namespace i18n {
+namespace addressinput {
+
+template <typename T>
+Trie<T>::Trie() {}
+
+template <typename T>
+Trie<T>::~Trie() {
+ STLDeleteValues(&sub_nodes_);
+}
+
+template <typename T>
+void Trie<T>::AddDataForKey(const std::string& key, const T& data_item) {
+ Trie<T>* current_node = this;
+ for (size_t key_start = 0; key_start < key.length(); ++key_start) {
+ typename std::map<char, Trie<T>*>::const_iterator sub_node_it =
+ current_node->sub_nodes_.find(key[key_start]);
+ if (sub_node_it == current_node->sub_nodes_.end()) {
+ sub_node_it = current_node->sub_nodes_.insert(
+ std::make_pair(key[key_start], new Trie<T>)).first;
+ }
+ current_node = sub_node_it->second;
+ assert(current_node != NULL);
+ }
+ current_node->data_list_.push_back(data_item);
+}
+
+template <typename T>
+void Trie<T>::FindDataForKeyPrefix(const std::string& key_prefix,
+ std::set<T>* results) const {
+ assert(results != NULL);
+
+ // Find the sub-trie for the key prefix.
+ const Trie<T>* current_node = this;
+ for (size_t key_prefix_start = 0; key_prefix_start < key_prefix.length();
+ ++key_prefix_start) {
+ typename std::map<char, Trie<T>*>::const_iterator sub_node_it =
+ current_node->sub_nodes_.find(key_prefix[key_prefix_start]);
+ if (sub_node_it == current_node->sub_nodes_.end()) {
+ return;
+ }
+ current_node = sub_node_it->second;
+ assert(current_node != NULL);
+ }
+
+ // Collect data from all sub-tries.
+ std::queue<const Trie<T>*> node_queue;
+ node_queue.push(current_node);
+ while (!node_queue.empty()) {
+ const Trie<T>* queue_front = node_queue.front();
+ node_queue.pop();
+
+ results->insert(
+ queue_front->data_list_.begin(), queue_front->data_list_.end());
+
+ for (typename std::map<char, Trie<T>*>::const_iterator sub_node_it =
+ queue_front->sub_nodes_.begin();
+ sub_node_it != queue_front->sub_nodes_.end();
+ ++sub_node_it) {
+ node_queue.push(sub_node_it->second);
+ }
+ }
+}
+
+// Separating template definitions and declarations requires defining all
+// possible template parameters to avoid linking errors.
+class Ruleset;
+template class Trie<const Ruleset*>;
+template class Trie<std::string>;
+
+} // namespace addressinput
+} // namespace i18n
diff --git a/third_party/libaddressinput/chromium/cpp/src/util/trie.h b/third_party/libaddressinput/chromium/cpp/src/util/trie.h
new file mode 100644
index 0000000..6b1792b
--- /dev/null
+++ b/third_party/libaddressinput/chromium/cpp/src/util/trie.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef I18N_ADDRESSINPUT_UTIL_TRIE_H_
+#define I18N_ADDRESSINPUT_UTIL_TRIE_H_
+
+#include <libaddressinput/util/basictypes.h>
+
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+
+namespace i18n {
+namespace addressinput {
+
+// A prefix search tree. Can return all objects whose keys start with a prefix
+// string.
+//
+// Maps keys to multiple objects. This property is useful when mapping region
+// names to region objects. For example, there's a "St. Petersburg" in Florida,
+// and there's a "St. Petersburg" in Russia. A lookup for "St. Petersburg" key
+// should return two distinct objects.
+template <typename T>
+class Trie {
+ public:
+ Trie();
+
+ ~Trie();
+
+ // Adds a mapping from |key| to |data_item|. Can be called with the same |key|
+ // multiple times.
+ void AddDataForKey(const std::string& key, const T& data_item);
+
+ // Adds all objects whose keys start with |key_prefix| to the |results|
+ // parameter. The |results| parameter should not be NULL.
+ void FindDataForKeyPrefix(const std::string& key_prefix,
+ std::set<T>* results) const;
+
+ private:
+ // All objects for this node in the trie. This field is a collection to enable
+ // mapping the same key to multiple objects.
+ std::list<T> data_list_;
+
+ // Trie sub nodes. The root trie stores the objects for the empty string key.
+ // The children of the root trie store the objects for the one-letter keys.
+ // The grand-children of the root trie store the objects for the two-letter
+ // keys, and so on.
+ std::map<char, Trie<T>*> sub_nodes_;
+
+ DISALLOW_COPY_AND_ASSIGN(Trie);
+};
+
+} // namespace addressinput
+} // namespace i18n
+
+#endif // I18N_ADDRESSINPUT_UTIL_TRIE_H_
diff --git a/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc b/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc
new file mode 100644
index 0000000..0baa0b4
--- /dev/null
+++ b/third_party/libaddressinput/chromium/cpp/test/util/trie_test.cc
@@ -0,0 +1,114 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "util/trie.h"
+
+#include <set>
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace i18n {
+namespace addressinput {
+
+namespace {
+
+TEST(TrieTest, EmptyTrieHasNoData) {
+ Trie<std::string> trie;
+ std::set<std::string> result;
+ trie.FindDataForKeyPrefix("key", &result);
+ EXPECT_TRUE(result.empty());
+}
+
+TEST(TrieTest, CanGetDataByExactKey) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ std::set<std::string> result;
+ trie.FindDataForKeyPrefix("hello", &result);
+ std::set<std::string> expected;
+ expected.insert("world");
+ EXPECT_EQ(expected, result);
+}
+
+TEST(TrieTest, CanGetDataByPrefix) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ std::set<std::string> result;
+ trie.FindDataForKeyPrefix("he", &result);
+ std::set<std::string> expected;
+ expected.insert("world");
+ EXPECT_EQ(expected, result);
+}
+
+TEST(TrieTest, KeyTooLongNoData) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ std::set<std::string> result;
+ trie.FindDataForKeyPrefix("helloo", &result);
+ EXPECT_TRUE(result.empty());
+}
+
+TEST(TrieTest, CommonPrefixFindsMultipleData) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ trie.AddDataForKey("howdy", "buddy");
+ trie.AddDataForKey("foo", "bar");
+ std::set<std::string> results;
+ trie.FindDataForKeyPrefix("h", &results);
+ std::set<std::string> expected;
+ expected.insert("world");
+ expected.insert("buddy");
+ EXPECT_EQ(expected, results);
+}
+
+TEST(TrieTest, KeyCanBePrefixOfOtherKey) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ trie.AddDataForKey("helloo", "woorld");
+ trie.AddDataForKey("hella", "warld");
+ std::set<std::string> results;
+ trie.FindDataForKeyPrefix("hello", &results);
+ std::set<std::string> expected;
+ expected.insert("world");
+ expected.insert("woorld");
+ EXPECT_EQ(expected, results);
+}
+
+TEST(TrieTest, AllowMutlipleKeys) {
+ Trie<std::string> trie;
+ trie.AddDataForKey("hello", "world");
+ trie.AddDataForKey("hello", "woorld");
+ std::set<std::string> results;
+ trie.FindDataForKeyPrefix("hello", &results);
+ std::set<std::string> expected;
+ expected.insert("world");
+ expected.insert("woorld");
+ EXPECT_EQ(expected, results);
+}
+
+TEST(TrieTest, CanFindVeryLongKey) {
+ Trie<std::string> trie;
+ static const char kVeryLongKey[] = "1234567890qwertyuioasdfghj";
+ trie.AddDataForKey(kVeryLongKey, "world");
+ std::set<std::string> result;
+ trie.FindDataForKeyPrefix(kVeryLongKey, &result);
+ std::set<std::string> expected;
+ expected.insert("world");
+ EXPECT_EQ(expected, result);
+}
+
+} // namespace
+
+} // namespace addressinput
+} // namespace i18n
diff --git a/third_party/libaddressinput/chromium/string_compare.cc b/third_party/libaddressinput/chromium/string_compare.cc
deleted file mode 100644
index 2affd30..0000000
--- a/third_party/libaddressinput/chromium/string_compare.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "cpp/src/util/string_compare.h"
-
-#include "base/i18n/string_compare.h"
-#include "base/logging.h"
-#include "base/strings/utf_string_conversions.h"
-#include "cpp/include/libaddressinput/util/scoped_ptr.h"
-#include "third_party/icu/source/i18n/unicode/coll.h"
-
-namespace i18n {
-namespace addressinput {
-
-bool LooseStringCompare(const std::string& a, const std::string& b) {
- UErrorCode error_code = U_ZERO_ERROR;
- scoped_ptr<icu::Collator> collator(icu::Collator::createInstance(error_code));
- // Differences in diacriticals and case are ignored.
- collator->setStrength(icu::Collator::PRIMARY);
- DCHECK(U_SUCCESS(error_code));
- UCollationResult result = base::i18n::CompareString16WithCollator(
- collator.get(),
- base::UTF8ToUTF16(a),
- base::UTF8ToUTF16(b));
- DCHECK(U_SUCCESS(error_code));
- return result == UCOL_EQUAL;
-}
-
-} // namespace addressinput
-} // namespace i18n
diff --git a/third_party/libaddressinput/libaddressinput.gyp b/third_party/libaddressinput/libaddressinput.gyp
index 04c3f95..b0dfee2 100644
--- a/third_party/libaddressinput/libaddressinput.gyp
+++ b/third_party/libaddressinput/libaddressinput.gyp
@@ -49,12 +49,12 @@
'<(SHARED_INTERMEDIATE_DIR)/libaddressinput/',
],
'sources': [
+ 'chromium/canonicalize_string.cc',
'chromium/chrome_downloader_impl.cc',
'chromium/chrome_downloader_impl.h',
'chromium/chrome_storage_impl.cc',
'chromium/chrome_storage_impl.h',
'chromium/json.cc',
- 'chromium/string_compare.cc',
'<(libaddressinput_dir)/cpp/include/libaddressinput/address_data.h',
'<(libaddressinput_dir)/cpp/include/libaddressinput/address_field.h',
'<(libaddressinput_dir)/cpp/include/libaddressinput/address_problem.h',
@@ -92,9 +92,13 @@
'<(libaddressinput_dir)/cpp/src/util/md5.cc',
'<(libaddressinput_dir)/cpp/src/util/md5.h',
'<(libaddressinput_dir)/cpp/src/util/stl_util.h',
+ '<(libaddressinput_dir)/cpp/src/util/canonicalize_string.h',
+ '<(libaddressinput_dir)/cpp/src/util/string_compare.cc',
'<(libaddressinput_dir)/cpp/src/util/string_compare.h',
'<(libaddressinput_dir)/cpp/src/util/string_split.cc',
'<(libaddressinput_dir)/cpp/src/util/string_split.h',
+ '<(libaddressinput_dir)/cpp/src/util/trie.cc',
+ '<(libaddressinput_dir)/cpp/src/util/trie.h',
],
'defines': [
'VALIDATION_DATA_URL="https://i18napis.appspot.com/ssl-aggregate-address/"',
@@ -152,6 +156,7 @@
'<(libaddressinput_dir)/cpp/test/util/scoped_ptr_unittest.cc',
'<(libaddressinput_dir)/cpp/test/util/stl_util_unittest.cc',
'<(libaddressinput_dir)/cpp/test/util/string_split_unittest.cc',
+ '<(libaddressinput_dir)/cpp/test/util/trie_test.cc',
],
'defines': [
'TEST_DATA_DIR="third_party/libaddressinput/src/testdata"',