diff options
author | georgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-18 07:15:26 +0000 |
---|---|---|
committer | georgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-18 07:15:26 +0000 |
commit | 9f60a236323e348ab1a0343d6acdafa99125ac3b (patch) | |
tree | 06fda9b84f89ca567f84124a41ca54f2c1c1658b /chrome/browser/autofill/phone_field.cc | |
parent | 29a643cfae48661a282ef72131644c62f532b06d (diff) | |
download | chromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.zip chromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.tar.gz chromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.tar.bz2 |
Changed parsing code for the phonenumbers fields to incorporate different combinations of the phone fields in the forms.
Should fix a lot of the phone parsing bugs and make changes to parsing code much easier.
BUG=71893,71897
TEST=unit-tested, please also check that any web-form that includes a phone number fields gets that fields parsed correctly
Review URL: http://codereview.chromium.org/6480083
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75368 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autofill/phone_field.cc')
-rw-r--r-- | chrome/browser/autofill/phone_field.cc | 285 |
1 files changed, 193 insertions, 92 deletions
diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc index f6b41a2..ff6308d 100644 --- a/chrome/browser/autofill/phone_field.cc +++ b/chrome/browser/autofill/phone_field.cc @@ -15,6 +15,85 @@ #include "grit/autofill_resources.h" #include "ui/base/l10n/l10n_util.h" +// Phone field grammars - first matched grammar will be parsed. Grammars are +// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are +// parsed separately unless they are necessary parts of the match. +// The following notation is used to describe the patterns: +// <cc> - country code field. +// <ac> - area code field. +// <phone> - phone or prefix. +// <suffix> - suffix. +// <ext> - extension. +// :N means field is limited to N characters, otherwise it is unlimited. +// (pattern <field>)? means pattern is optional and matched separately. +PhoneField::Parser PhoneField::phone_field_grammars_[] = { + // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> + // (Ext: <ext>)?)? + { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> - <phone> (Ext: <ext>)? + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <phone> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, +}; + PhoneField::~PhoneField() {} // static @@ -33,7 +112,7 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, // be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX, // but "area" and "someotherarea" parsed as HOME, for example). for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) { - phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i)); + phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i)); if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE)) return phone_field.release(); } @@ -49,7 +128,7 @@ PhoneField* PhoneField::ParseECML( AutoFillField* field; if (ParseText(iter, pattern, &field)) { PhoneField* phone_field = new PhoneField(); - phone_field->phone_ = field; + phone_field->parsed_phone_fields_[FIELD_PHONE] = field; return phone_field; } @@ -57,35 +136,42 @@ PhoneField* PhoneField::ParseECML( } bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { - bool ok; + bool ok = false; - if (area_code_ != NULL) { - ok = Add(field_type_map, area_code_, - AutoFillType(number_->GetCityCodeType())); - DCHECK(ok); + DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed. - if (prefix_ != NULL) { - // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form - // we fill only the prefix depending on the size of the input field. - ok = ok && Add(field_type_map, - prefix_, - AutoFillType(number_->GetNumberType())); + if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || + (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || + (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { + if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_COUNTRY_CODE], + AutoFillType(number_->GetCountryCodeType())); DCHECK(ok); - // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form - // we fill only the suffix depending on the size of the input field. - ok = ok && Add(field_type_map, - phone_, - AutoFillType(number_->GetNumberType())); + } + if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_AREA_CODE], + AutoFillType(number_->GetCityCodeType())); DCHECK(ok); - } else { - ok = ok && Add(field_type_map, - phone_, - AutoFillType(number_->GetNumberType())); + } + // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form + // we fill only the prefix depending on the size of the input field. + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_PHONE], + AutoFillType(number_->GetNumberType())); + DCHECK(ok); + // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form + // we fill only the suffix depending on the size of the input field. + if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_SUFFIX], + AutoFillType(number_->GetNumberType())); DCHECK(ok); } } else { ok = Add(field_type_map, - phone_, + parsed_phone_fields_[FIELD_PHONE], AutoFillType(number_->GetWholeNumberType())); DCHECK(ok); } @@ -93,17 +179,27 @@ bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { return ok; } -PhoneField::PhoneField() - : phone_(NULL), - area_code_(NULL), - prefix_(NULL), - extension_(NULL) { +PhoneField::PhoneField() { + memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_)); SetPhoneType(HOME_PHONE); } +string16 PhoneField::GetCountryRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE); +} + string16 PhoneField::GetAreaRegex() const { // This one is the same for Home and Fax numbers. - return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); + string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); + area_code.append(ASCIIToUTF16("|")); // Regexp separator. + area_code.append(GetAreaNoTextRegex()); + return area_code; +} + +string16 PhoneField::GetAreaNoTextRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE); } string16 PhoneField::GetPhoneRegex() const { @@ -116,11 +212,21 @@ string16 PhoneField::GetPhoneRegex() const { return string16(); } +string16 PhoneField::GetPrefixSeparatorRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE); +} + string16 PhoneField::GetPrefixRegex() const { // This one is the same for Home and Fax numbers. return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); } +string16 PhoneField::GetSuffixSeparatorRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE); +} + string16 PhoneField::GetSuffixRegex() const { // This one is the same for Home and Fax numbers. return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); @@ -131,6 +237,24 @@ string16 PhoneField::GetExtensionRegex() const { return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); } +string16 PhoneField::GetRegExp(RegexType regex_id) const { + switch (regex_id) { + case REGEX_COUNTRY: return GetCountryRegex(); + case REGEX_AREA: return GetAreaRegex(); + case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex(); + case REGEX_PHONE: return GetPhoneRegex(); + case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex(); + case REGEX_PREFIX: return GetPrefixRegex(); + case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex(); + case REGEX_SUFFIX: return GetSuffixRegex(); + case REGEX_EXTENSION: return GetExtensionRegex(); + default: + NOTREACHED(); + break; + } + return string16(); +} + // static bool PhoneField::ParseInternal( PhoneField *phone_field, @@ -143,85 +267,62 @@ bool PhoneField::ParseInternal( return false; std::vector<AutoFillField*>::const_iterator q = *iter; + // The form owns the following variables, so they should not be deleted. - AutoFillField* phone = NULL; - AutoFillField* phone2 = NULL; - AutoFillField* phone3 = NULL; - bool area_code = false; // true if we've parsed an area code field. - - // Some pages, such as BloomingdalesShipping.html, have a field labeled - // "Area Code and Phone"; we want to parse this as a phone number field so - // we look for "phone" before we look for "area code". - if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { - area_code = false; - // Check the case when the match is for non-home phone and area code, e.g. - // first field is a "Fax area code" and the subsequent is "Fax phone". - if (!regular_phone) { - // Attempt parsing of the same field as an area code and then phone: - std::vector<AutoFillField*>::const_iterator temp_it = *iter; - AutoFillField* tmp_phone1 = NULL; - AutoFillField* tmp_phone2 = NULL; - if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && - ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { - phone = tmp_phone1; - phone2 = tmp_phone2; - q = temp_it; - area_code = true; + AutoFillField* parsed_fields[FIELD_MAX]; + + for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) { + memset(parsed_fields, 0, sizeof(parsed_fields)); + q = *iter; + // Attempt to parse next possible match. + for (; i < arraysize(phone_field_grammars_) && + phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) { + if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex), + &parsed_fields[phone_field_grammars_[i].phone_part])) + break; + if (phone_field_grammars_[i].max_size && + (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() || + phone_field_grammars_[i].max_size < + parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) { + break; } } - } else { - if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) - return false; - area_code = true; - // If this is not a home phone and there was no specification before - // the phone number actually starts (e.g. field 1 "Area code:", field 2 - // "Fax:"), we skip searching for preffix and suffix and bail out. - if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone) - return false; + if (i >= arraysize(phone_field_grammars_)) + return false; // Parsing failed. + if (phone_field_grammars_[i].regex == REGEX_SEPARATOR) + break; // Parsing succeeded. + do { + ++i; + } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR); + if (i + 1 == arraysize(phone_field_grammars_)) + return false; // Tried through all the possibilities - did not match. } + if (!parsed_fields[FIELD_PHONE]) + return false; - // Sometimes phone number fields are separated by "-" (e.g. test page - // Crate and Barrel Check Out.html). Also, area codes are sometimes - // surrounded by parentheses, so a ")" may appear after the area code field. - // - // We used to match "tel" here, which we've seen in field names (e.g. on - // Newegg2.html), but that's too general: some pages (e.g. - // uk/Furniture123-1.html) have several phone numbers in succession and we - // don't want those to be parsed as components of a single phone number. - if (phone2 == NULL) - ParseText(&q, phone_field->GetPrefixRegex(), &phone2); - - // Look for a third text box. - if (phone2) - ParseText(&q, phone_field->GetSuffixRegex(), &phone3); + for (int i = 0; i < FIELD_MAX; ++i) + phone_field->parsed_phone_fields_[i] = parsed_fields[i]; - // Now we have one, two, or three phone number text fields. Package them - // up into a PhoneField object. + // Look for optional fields. - if (phone2 == NULL) { // only one field - if (area_code) { - // It's an area code - it doesn't make sense. - return false; - } - phone_field->phone_ = phone; - } else { - phone_field->area_code_ = phone; - if (phone3 == NULL) { // two fields - phone_field->phone_ = phone2; - } else { // three boxes: area code, prefix and suffix - phone_field->prefix_ = phone2; - phone_field->phone_ = phone3; + // Look for a third text box. + if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { + if (!ParseText(&q, phone_field->GetSuffixRegex(), + &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { + ParseText(&q, phone_field->GetSuffixSeparatorRegex(), + &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); } } // Now look for an extension. - ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); + ParseText(&q, phone_field->GetExtensionRegex(), + &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); *iter = q; return true; } -void PhoneField::SetPhoneType(PHONE_TYPE phone_type) { +void PhoneField::SetPhoneType(PhoneType phone_type) { // Field types are different as well, so we create a temporary phone number, // to get relevant field types. if (phone_type == HOME_PHONE) |