summaryrefslogtreecommitdiffstats
path: root/chrome/browser/autofill/phone_field.cc
diff options
context:
space:
mode:
authorgeorgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-18 07:15:26 +0000
committergeorgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-18 07:15:26 +0000
commit9f60a236323e348ab1a0343d6acdafa99125ac3b (patch)
tree06fda9b84f89ca567f84124a41ca54f2c1c1658b /chrome/browser/autofill/phone_field.cc
parent29a643cfae48661a282ef72131644c62f532b06d (diff)
downloadchromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.zip
chromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.tar.gz
chromium_src-9f60a236323e348ab1a0343d6acdafa99125ac3b.tar.bz2
Changed parsing code for the phonenumbers fields to incorporate different combinations of the phone fields in the forms.
Should fix a lot of the phone parsing bugs and make changes to parsing code much easier. BUG=71893,71897 TEST=unit-tested, please also check that any web-form that includes a phone number fields gets that fields parsed correctly Review URL: http://codereview.chromium.org/6480083 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75368 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autofill/phone_field.cc')
-rw-r--r--chrome/browser/autofill/phone_field.cc285
1 files changed, 193 insertions, 92 deletions
diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc
index f6b41a2..ff6308d 100644
--- a/chrome/browser/autofill/phone_field.cc
+++ b/chrome/browser/autofill/phone_field.cc
@@ -15,6 +15,85 @@
#include "grit/autofill_resources.h"
#include "ui/base/l10n/l10n_util.h"
+// Phone field grammars - first matched grammar will be parsed. Grammars are
+// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
+// parsed separately unless they are necessary parts of the match.
+// The following notation is used to describe the patterns:
+// <cc> - country code field.
+// <ac> - area code field.
+// <phone> - phone or prefix.
+// <suffix> - suffix.
+// <ext> - extension.
+// :N means field is limited to N characters, otherwise it is unlimited.
+// (pattern <field>)? means pattern is optional and matched separately.
+PhoneField::Parser PhoneField::phone_field_grammars_[] = {
+ // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
+ // (Ext: <ext>)?)?
+ { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+};
+
PhoneField::~PhoneField() {}
// static
@@ -33,7 +112,7 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter,
// be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX,
// but "area" and "someotherarea" parsed as HOME, for example).
for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) {
- phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i));
+ phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i));
if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE))
return phone_field.release();
}
@@ -49,7 +128,7 @@ PhoneField* PhoneField::ParseECML(
AutoFillField* field;
if (ParseText(iter, pattern, &field)) {
PhoneField* phone_field = new PhoneField();
- phone_field->phone_ = field;
+ phone_field->parsed_phone_fields_[FIELD_PHONE] = field;
return phone_field;
}
@@ -57,35 +136,42 @@ PhoneField* PhoneField::ParseECML(
}
bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
- bool ok;
+ bool ok = false;
- if (area_code_ != NULL) {
- ok = Add(field_type_map, area_code_,
- AutoFillType(number_->GetCityCodeType()));
- DCHECK(ok);
+ DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
- if (prefix_ != NULL) {
- // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the prefix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- prefix_,
- AutoFillType(number_->GetNumberType()));
+ if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
+ if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_COUNTRY_CODE],
+ AutoFillType(number_->GetCountryCodeType()));
DCHECK(ok);
- // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the suffix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_AREA_CODE],
+ AutoFillType(number_->GetCityCodeType()));
DCHECK(ok);
- } else {
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the prefix depending on the size of the input field.
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_PHONE],
+ AutoFillType(number_->GetNumberType()));
+ DCHECK(ok);
+ // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the suffix depending on the size of the input field.
+ if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_SUFFIX],
+ AutoFillType(number_->GetNumberType()));
DCHECK(ok);
}
} else {
ok = Add(field_type_map,
- phone_,
+ parsed_phone_fields_[FIELD_PHONE],
AutoFillType(number_->GetWholeNumberType()));
DCHECK(ok);
}
@@ -93,17 +179,27 @@ bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
return ok;
}
-PhoneField::PhoneField()
- : phone_(NULL),
- area_code_(NULL),
- prefix_(NULL),
- extension_(NULL) {
+PhoneField::PhoneField() {
+ memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
SetPhoneType(HOME_PHONE);
}
+string16 PhoneField::GetCountryRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE);
+}
+
string16 PhoneField::GetAreaRegex() const {
// This one is the same for Home and Fax numbers.
- return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ area_code.append(ASCIIToUTF16("|")); // Regexp separator.
+ area_code.append(GetAreaNoTextRegex());
+ return area_code;
+}
+
+string16 PhoneField::GetAreaNoTextRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE);
}
string16 PhoneField::GetPhoneRegex() const {
@@ -116,11 +212,21 @@ string16 PhoneField::GetPhoneRegex() const {
return string16();
}
+string16 PhoneField::GetPrefixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetPrefixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE);
}
+string16 PhoneField::GetSuffixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetSuffixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE);
@@ -131,6 +237,24 @@ string16 PhoneField::GetExtensionRegex() const {
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE);
}
+string16 PhoneField::GetRegExp(RegexType regex_id) const {
+ switch (regex_id) {
+ case REGEX_COUNTRY: return GetCountryRegex();
+ case REGEX_AREA: return GetAreaRegex();
+ case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex();
+ case REGEX_PHONE: return GetPhoneRegex();
+ case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex();
+ case REGEX_PREFIX: return GetPrefixRegex();
+ case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex();
+ case REGEX_SUFFIX: return GetSuffixRegex();
+ case REGEX_EXTENSION: return GetExtensionRegex();
+ default:
+ NOTREACHED();
+ break;
+ }
+ return string16();
+}
+
// static
bool PhoneField::ParseInternal(
PhoneField *phone_field,
@@ -143,85 +267,62 @@ bool PhoneField::ParseInternal(
return false;
std::vector<AutoFillField*>::const_iterator q = *iter;
+
// The form owns the following variables, so they should not be deleted.
- AutoFillField* phone = NULL;
- AutoFillField* phone2 = NULL;
- AutoFillField* phone3 = NULL;
- bool area_code = false; // true if we've parsed an area code field.
-
- // Some pages, such as BloomingdalesShipping.html, have a field labeled
- // "Area Code and Phone"; we want to parse this as a phone number field so
- // we look for "phone" before we look for "area code".
- if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) {
- area_code = false;
- // Check the case when the match is for non-home phone and area code, e.g.
- // first field is a "Fax area code" and the subsequent is "Fax phone".
- if (!regular_phone) {
- // Attempt parsing of the same field as an area code and then phone:
- std::vector<AutoFillField*>::const_iterator temp_it = *iter;
- AutoFillField* tmp_phone1 = NULL;
- AutoFillField* tmp_phone2 = NULL;
- if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) &&
- ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) {
- phone = tmp_phone1;
- phone2 = tmp_phone2;
- q = temp_it;
- area_code = true;
+ AutoFillField* parsed_fields[FIELD_MAX];
+
+ for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) {
+ memset(parsed_fields, 0, sizeof(parsed_fields));
+ q = *iter;
+ // Attempt to parse next possible match.
+ for (; i < arraysize(phone_field_grammars_) &&
+ phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) {
+ if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex),
+ &parsed_fields[phone_field_grammars_[i].phone_part]))
+ break;
+ if (phone_field_grammars_[i].max_size &&
+ (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() ||
+ phone_field_grammars_[i].max_size <
+ parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) {
+ break;
}
}
- } else {
- if (!ParseText(&q, phone_field->GetAreaRegex(), &phone))
- return false;
- area_code = true;
- // If this is not a home phone and there was no specification before
- // the phone number actually starts (e.g. field 1 "Area code:", field 2
- // "Fax:"), we skip searching for preffix and suffix and bail out.
- if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone)
- return false;
+ if (i >= arraysize(phone_field_grammars_))
+ return false; // Parsing failed.
+ if (phone_field_grammars_[i].regex == REGEX_SEPARATOR)
+ break; // Parsing succeeded.
+ do {
+ ++i;
+ } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR);
+ if (i + 1 == arraysize(phone_field_grammars_))
+ return false; // Tried through all the possibilities - did not match.
}
+ if (!parsed_fields[FIELD_PHONE])
+ return false;
- // Sometimes phone number fields are separated by "-" (e.g. test page
- // Crate and Barrel Check Out.html). Also, area codes are sometimes
- // surrounded by parentheses, so a ")" may appear after the area code field.
- //
- // We used to match "tel" here, which we've seen in field names (e.g. on
- // Newegg2.html), but that's too general: some pages (e.g.
- // uk/Furniture123-1.html) have several phone numbers in succession and we
- // don't want those to be parsed as components of a single phone number.
- if (phone2 == NULL)
- ParseText(&q, phone_field->GetPrefixRegex(), &phone2);
-
- // Look for a third text box.
- if (phone2)
- ParseText(&q, phone_field->GetSuffixRegex(), &phone3);
+ for (int i = 0; i < FIELD_MAX; ++i)
+ phone_field->parsed_phone_fields_[i] = parsed_fields[i];
- // Now we have one, two, or three phone number text fields. Package them
- // up into a PhoneField object.
+ // Look for optional fields.
- if (phone2 == NULL) { // only one field
- if (area_code) {
- // It's an area code - it doesn't make sense.
- return false;
- }
- phone_field->phone_ = phone;
- } else {
- phone_field->area_code_ = phone;
- if (phone3 == NULL) { // two fields
- phone_field->phone_ = phone2;
- } else { // three boxes: area code, prefix and suffix
- phone_field->prefix_ = phone2;
- phone_field->phone_ = phone3;
+ // Look for a third text box.
+ if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
+ if (!ParseText(&q, phone_field->GetSuffixRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
+ ParseText(&q, phone_field->GetSuffixSeparatorRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
}
}
// Now look for an extension.
- ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_);
+ ParseText(&q, phone_field->GetExtensionRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
*iter = q;
return true;
}
-void PhoneField::SetPhoneType(PHONE_TYPE phone_type) {
+void PhoneField::SetPhoneType(PhoneType phone_type) {
// Field types are different as well, so we create a temporary phone number,
// to get relevant field types.
if (phone_type == HOME_PHONE)