summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/autofill/autofill_manager_unittest.cc1
-rw-r--r--chrome/browser/autofill/autofill_resources.grd18
-rw-r--r--chrome/browser/autofill/form_structure_unittest.cc6
-rw-r--r--chrome/browser/autofill/phone_field.cc285
-rw-r--r--chrome/browser/autofill/phone_field.h59
-rw-r--r--chrome/browser/autofill/phone_field_unittest.cc9
-rw-r--r--chrome/test/data/autofill/heuristics/input/form_phones_en.html75
-rw-r--r--chrome/test/data/autofill/heuristics/output/form_phones_en.out48
8 files changed, 394 insertions, 107 deletions
diff --git a/chrome/browser/autofill/autofill_manager_unittest.cc b/chrome/browser/autofill/autofill_manager_unittest.cc
index e6c2548..4aa6504 100644
--- a/chrome/browser/autofill/autofill_manager_unittest.cc
+++ b/chrome/browser/autofill/autofill_manager_unittest.cc
@@ -1648,6 +1648,7 @@ TEST_F(AutoFillManagerTest, FillPhoneNumber) {
char test_data[] = "1234567890123456";
for (int i = arraysize(test_data) - 1; i >= 0; --i) {
test_data[i] = 0;
+ SCOPED_TRACE(StringPrintf("Testing phone: %s", test_data));
work_profile->SetInfo(phone_type, ASCIIToUTF16(test_data));
// The page ID sent to the AutoFillManager from the RenderView, used to send
// an IPC message back to the renderer.
diff --git a/chrome/browser/autofill/autofill_resources.grd b/chrome/browser/autofill/autofill_resources.grd
index 475e706..6f491b6 100644
--- a/chrome/browser/autofill/autofill_resources.grd
+++ b/chrome/browser/autofill/autofill_resources.grd
@@ -108,17 +108,29 @@
<message name="IDS_AUTOFILL_PHONE_RE">
phone<!-- de-DE -->|telefonnummer<!-- es -->|telefono|teléfono<!-- fr-FR -->|telfixe<!-- ja-JP -->|電話<!-- pt-BR, pt-PT -->|telefone|telemovel<!-- ru -->|телефон<!-- zh-CN -->|电话
</message>
+ <message name="IDS_AUTOFILL_COUNTRY_CODE_RE">
+ country.*code|ccode
+ </message>
+ <message name="IDS_AUTOFILL_AREA_CODE_NOTEXT_RE">
+ ^\($
+ </message>
<message name="IDS_AUTOFILL_AREA_CODE_RE">
- area code
+ area.*code|acode
</message>
<message name="IDS_AUTOFILL_FAX_RE">
fax<!-- fr-FR -->|télécopie|telecopie<!-- ja-JP -->|ファックス<!-- ru -->|факс<!-- zh-CN -->|传真<!-- zh-TW -->|傳真
</message>
+ <message name="IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE">
+ ^-$|^\)$
+ </message>
+ <message name="IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE">
+ ^-$
+ </message>
<message name="IDS_AUTOFILL_PHONE_PREFIX_RE">
- ^-$|\)$|prefix<!-- fr-FR -->|preselection<!-- pt-BR, pt-PT -->|ddd
+ prefix<!-- fr-FR -->|preselection<!-- pt-BR, pt-PT -->|ddd
</message>
<message name="IDS_AUTOFILL_PHONE_SUFFIX_RE">
- ^-$|suffix
+ suffix
</message>
<message name="IDS_AUTOFILL_PHONE_EXTENSION_RE">
ext<!-- pt-BR, pt-PT -->|ramal
diff --git a/chrome/browser/autofill/form_structure_unittest.cc b/chrome/browser/autofill/form_structure_unittest.cc
index bd3056a..f4d1cb9 100644
--- a/chrome/browser/autofill/form_structure_unittest.cc
+++ b/chrome/browser/autofill/form_structure_unittest.cc
@@ -1113,14 +1113,16 @@ TEST(FormStructureTest, ThreePartPhoneNumber) {
ASCIIToUTF16("dayphone2"),
string16(),
ASCIIToUTF16("text"),
- 0,
+ 3, // Size of prefix is 3.
false));
form.fields.push_back(
webkit_glue::FormField(ASCIIToUTF16("-"),
ASCIIToUTF16("dayphone3"),
string16(),
ASCIIToUTF16("text"),
- 0,
+ 4, // Size of suffix is 4. If unlimited size is
+ // passed, phone will be parsed as
+ // <country code> - <area code> - <phone>.
false));
form.fields.push_back(
webkit_glue::FormField(ASCIIToUTF16("ext.:"),
diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc
index f6b41a2..ff6308d 100644
--- a/chrome/browser/autofill/phone_field.cc
+++ b/chrome/browser/autofill/phone_field.cc
@@ -15,6 +15,85 @@
#include "grit/autofill_resources.h"
#include "ui/base/l10n/l10n_util.h"
+// Phone field grammars - first matched grammar will be parsed. Grammars are
+// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
+// parsed separately unless they are necessary parts of the match.
+// The following notation is used to describe the patterns:
+// <cc> - country code field.
+// <ac> - area code field.
+// <phone> - phone or prefix.
+// <suffix> - suffix.
+// <ext> - extension.
+// :N means field is limited to N characters, otherwise it is unlimited.
+// (pattern <field>)? means pattern is optional and matched separately.
+PhoneField::Parser PhoneField::phone_field_grammars_[] = {
+ // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
+ // (Ext: <ext>)?)?
+ { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+};
+
PhoneField::~PhoneField() {}
// static
@@ -33,7 +112,7 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter,
// be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX,
// but "area" and "someotherarea" parsed as HOME, for example).
for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) {
- phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i));
+ phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i));
if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE))
return phone_field.release();
}
@@ -49,7 +128,7 @@ PhoneField* PhoneField::ParseECML(
AutoFillField* field;
if (ParseText(iter, pattern, &field)) {
PhoneField* phone_field = new PhoneField();
- phone_field->phone_ = field;
+ phone_field->parsed_phone_fields_[FIELD_PHONE] = field;
return phone_field;
}
@@ -57,35 +136,42 @@ PhoneField* PhoneField::ParseECML(
}
bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
- bool ok;
+ bool ok = false;
- if (area_code_ != NULL) {
- ok = Add(field_type_map, area_code_,
- AutoFillType(number_->GetCityCodeType()));
- DCHECK(ok);
+ DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
- if (prefix_ != NULL) {
- // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the prefix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- prefix_,
- AutoFillType(number_->GetNumberType()));
+ if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
+ if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_COUNTRY_CODE],
+ AutoFillType(number_->GetCountryCodeType()));
DCHECK(ok);
- // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the suffix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_AREA_CODE],
+ AutoFillType(number_->GetCityCodeType()));
DCHECK(ok);
- } else {
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the prefix depending on the size of the input field.
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_PHONE],
+ AutoFillType(number_->GetNumberType()));
+ DCHECK(ok);
+ // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the suffix depending on the size of the input field.
+ if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_SUFFIX],
+ AutoFillType(number_->GetNumberType()));
DCHECK(ok);
}
} else {
ok = Add(field_type_map,
- phone_,
+ parsed_phone_fields_[FIELD_PHONE],
AutoFillType(number_->GetWholeNumberType()));
DCHECK(ok);
}
@@ -93,17 +179,27 @@ bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
return ok;
}
-PhoneField::PhoneField()
- : phone_(NULL),
- area_code_(NULL),
- prefix_(NULL),
- extension_(NULL) {
+PhoneField::PhoneField() {
+ memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
SetPhoneType(HOME_PHONE);
}
+string16 PhoneField::GetCountryRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE);
+}
+
string16 PhoneField::GetAreaRegex() const {
// This one is the same for Home and Fax numbers.
- return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ area_code.append(ASCIIToUTF16("|")); // Regexp separator.
+ area_code.append(GetAreaNoTextRegex());
+ return area_code;
+}
+
+string16 PhoneField::GetAreaNoTextRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE);
}
string16 PhoneField::GetPhoneRegex() const {
@@ -116,11 +212,21 @@ string16 PhoneField::GetPhoneRegex() const {
return string16();
}
+string16 PhoneField::GetPrefixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetPrefixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE);
}
+string16 PhoneField::GetSuffixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetSuffixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE);
@@ -131,6 +237,24 @@ string16 PhoneField::GetExtensionRegex() const {
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE);
}
+string16 PhoneField::GetRegExp(RegexType regex_id) const {
+ switch (regex_id) {
+ case REGEX_COUNTRY: return GetCountryRegex();
+ case REGEX_AREA: return GetAreaRegex();
+ case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex();
+ case REGEX_PHONE: return GetPhoneRegex();
+ case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex();
+ case REGEX_PREFIX: return GetPrefixRegex();
+ case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex();
+ case REGEX_SUFFIX: return GetSuffixRegex();
+ case REGEX_EXTENSION: return GetExtensionRegex();
+ default:
+ NOTREACHED();
+ break;
+ }
+ return string16();
+}
+
// static
bool PhoneField::ParseInternal(
PhoneField *phone_field,
@@ -143,85 +267,62 @@ bool PhoneField::ParseInternal(
return false;
std::vector<AutoFillField*>::const_iterator q = *iter;
+
// The form owns the following variables, so they should not be deleted.
- AutoFillField* phone = NULL;
- AutoFillField* phone2 = NULL;
- AutoFillField* phone3 = NULL;
- bool area_code = false; // true if we've parsed an area code field.
-
- // Some pages, such as BloomingdalesShipping.html, have a field labeled
- // "Area Code and Phone"; we want to parse this as a phone number field so
- // we look for "phone" before we look for "area code".
- if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) {
- area_code = false;
- // Check the case when the match is for non-home phone and area code, e.g.
- // first field is a "Fax area code" and the subsequent is "Fax phone".
- if (!regular_phone) {
- // Attempt parsing of the same field as an area code and then phone:
- std::vector<AutoFillField*>::const_iterator temp_it = *iter;
- AutoFillField* tmp_phone1 = NULL;
- AutoFillField* tmp_phone2 = NULL;
- if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) &&
- ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) {
- phone = tmp_phone1;
- phone2 = tmp_phone2;
- q = temp_it;
- area_code = true;
+ AutoFillField* parsed_fields[FIELD_MAX];
+
+ for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) {
+ memset(parsed_fields, 0, sizeof(parsed_fields));
+ q = *iter;
+ // Attempt to parse next possible match.
+ for (; i < arraysize(phone_field_grammars_) &&
+ phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) {
+ if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex),
+ &parsed_fields[phone_field_grammars_[i].phone_part]))
+ break;
+ if (phone_field_grammars_[i].max_size &&
+ (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() ||
+ phone_field_grammars_[i].max_size <
+ parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) {
+ break;
}
}
- } else {
- if (!ParseText(&q, phone_field->GetAreaRegex(), &phone))
- return false;
- area_code = true;
- // If this is not a home phone and there was no specification before
- // the phone number actually starts (e.g. field 1 "Area code:", field 2
- // "Fax:"), we skip searching for preffix and suffix and bail out.
- if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone)
- return false;
+ if (i >= arraysize(phone_field_grammars_))
+ return false; // Parsing failed.
+ if (phone_field_grammars_[i].regex == REGEX_SEPARATOR)
+ break; // Parsing succeeded.
+ do {
+ ++i;
+ } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR);
+ if (i + 1 == arraysize(phone_field_grammars_))
+ return false; // Tried through all the possibilities - did not match.
}
+ if (!parsed_fields[FIELD_PHONE])
+ return false;
- // Sometimes phone number fields are separated by "-" (e.g. test page
- // Crate and Barrel Check Out.html). Also, area codes are sometimes
- // surrounded by parentheses, so a ")" may appear after the area code field.
- //
- // We used to match "tel" here, which we've seen in field names (e.g. on
- // Newegg2.html), but that's too general: some pages (e.g.
- // uk/Furniture123-1.html) have several phone numbers in succession and we
- // don't want those to be parsed as components of a single phone number.
- if (phone2 == NULL)
- ParseText(&q, phone_field->GetPrefixRegex(), &phone2);
-
- // Look for a third text box.
- if (phone2)
- ParseText(&q, phone_field->GetSuffixRegex(), &phone3);
+ for (int i = 0; i < FIELD_MAX; ++i)
+ phone_field->parsed_phone_fields_[i] = parsed_fields[i];
- // Now we have one, two, or three phone number text fields. Package them
- // up into a PhoneField object.
+ // Look for optional fields.
- if (phone2 == NULL) { // only one field
- if (area_code) {
- // It's an area code - it doesn't make sense.
- return false;
- }
- phone_field->phone_ = phone;
- } else {
- phone_field->area_code_ = phone;
- if (phone3 == NULL) { // two fields
- phone_field->phone_ = phone2;
- } else { // three boxes: area code, prefix and suffix
- phone_field->prefix_ = phone2;
- phone_field->phone_ = phone3;
+ // Look for a third text box.
+ if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
+ if (!ParseText(&q, phone_field->GetSuffixRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
+ ParseText(&q, phone_field->GetSuffixSeparatorRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
}
}
// Now look for an extension.
- ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_);
+ ParseText(&q, phone_field->GetExtensionRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
*iter = q;
return true;
}
-void PhoneField::SetPhoneType(PHONE_TYPE phone_type) {
+void PhoneField::SetPhoneType(PhoneType phone_type) {
// Field types are different as well, so we create a temporary phone number,
// to get relevant field types.
if (phone_type == HOME_PHONE)
diff --git a/chrome/browser/autofill/phone_field.h b/chrome/browser/autofill/phone_field.h
index 2563a81..ab48a116 100644
--- a/chrome/browser/autofill/phone_field.h
+++ b/chrome/browser/autofill/phone_field.h
@@ -33,7 +33,7 @@ class PhoneField : public FormField {
private:
PhoneField();
- enum PHONE_TYPE {
+ enum PhoneType {
PHONE_TYPE_FIRST = 0,
HOME_PHONE = PHONE_TYPE_FIRST,
FAX_PHONE,
@@ -43,12 +43,39 @@ class PhoneField : public FormField {
};
// Some field names are different for phone and fax.
+ string16 GetCountryRegex() const;
+ // This string includes all area code separators, including NoText.
string16 GetAreaRegex() const;
+ // Separator of the area code in the case fields are formatted without
+ // any text indicating what fields are (e.g. field1 "(" field2 ")" field3 "-"
+ // field4 means Country Code, Area Code, Prefix, Suffix)
+ string16 GetAreaNoTextRegex() const;
string16 GetPhoneRegex() const;
+ string16 GetPrefixSeparatorRegex() const;
string16 GetPrefixRegex() const;
+ string16 GetSuffixSeparatorRegex() const;
string16 GetSuffixRegex() const;
string16 GetExtensionRegex() const;
+ // This is for easy description of the possible parsing paths of the phone
+ // fields.
+ enum RegexType {
+ REGEX_COUNTRY,
+ REGEX_AREA,
+ REGEX_AREA_NOTEXT,
+ REGEX_PHONE,
+ REGEX_PREFIX_SEPARATOR,
+ REGEX_PREFIX,
+ REGEX_SUFFIX_SEPARATOR,
+ REGEX_SUFFIX,
+ REGEX_EXTENSION,
+
+ // Separates regexps in grammar.
+ REGEX_SEPARATOR,
+ };
+
+ string16 GetRegExp(RegexType regex_id) const;
+
// |field| - field to fill up on successful parsing.
// |iter| - in/out. Form field iterator, points to the first field that is
// attempted to be parsed. If parsing successful, points to the first field
@@ -59,19 +86,35 @@ class PhoneField : public FormField {
std::vector<AutoFillField*>::const_iterator* iter,
bool regular_phone);
- void SetPhoneType(PHONE_TYPE phone_type);
+ void SetPhoneType(PhoneType phone_type);
// Field types are different as well, so we create a temporary phone number,
// to get relevant field types.
scoped_ptr<PhoneNumber> number_;
- PHONE_TYPE phone_type_;
+ PhoneType phone_type_;
+
+
+ // Parsed fields.
+ enum PhonePart {
+ FIELD_NONE = -1,
+ FIELD_COUNTRY_CODE,
+ FIELD_AREA_CODE,
+ FIELD_PHONE,
+ FIELD_SUFFIX,
+ FIELD_EXTENSION,
+
+ FIELD_MAX,
+ };
- // Always present; holds suffix if prefix is present.
- AutoFillField* phone_;
+ // FIELD_PHONE is always present; holds suffix if prefix is present.
+ // The rest could be NULL.
+ AutoFillField* parsed_phone_fields_[FIELD_MAX];
- AutoFillField* area_code_; // optional
- AutoFillField* prefix_; // optional
- AutoFillField* extension_; // optional
+ static struct Parser {
+ RegexType regex; // Field matching reg-ex.
+ PhonePart phone_part; // Index of the field.
+ int max_size; // Max size of the field to match. 0 means any.
+ } phone_field_grammars_[];
DISALLOW_COPY_AND_ASSIGN(PhoneField);
};
diff --git a/chrome/browser/autofill/phone_field_unittest.cc b/chrome/browser/autofill/phone_field_unittest.cc
index 96b02f3..912b5d8 100644
--- a/chrome/browser/autofill/phone_field_unittest.cc
+++ b/chrome/browser/autofill/phone_field_unittest.cc
@@ -171,6 +171,11 @@ TEST_F(PhoneFieldTest, ParseTwoLinePhoneEcmlBillTo) {
}
TEST_F(PhoneFieldTest, ThreePartPhoneNumber) {
+ // Phone in format <field> - <field> - <field> could be either
+ // <area code> - <prefix> - <suffix>, or
+ // <country code> - <area code> - <phone>. The only distinguishing feature is
+ // size: <prefix> is no bigger than 3 characters, and <suffix> is no bigger
+ // than 4.
list_.push_back(
new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Phone:"),
ASCIIToUTF16("dayphone1"),
@@ -184,7 +189,7 @@ TEST_F(PhoneFieldTest, ThreePartPhoneNumber) {
ASCIIToUTF16("dayphone2"),
string16(),
ASCIIToUTF16("text"),
- 0,
+ 3,
false),
ASCIIToUTF16("prefix1")));
list_.push_back(
@@ -192,7 +197,7 @@ TEST_F(PhoneFieldTest, ThreePartPhoneNumber) {
ASCIIToUTF16("dayphone3"),
string16(),
ASCIIToUTF16("text"),
- 0,
+ 4,
false),
ASCIIToUTF16("suffix1")));
list_.push_back(
diff --git a/chrome/test/data/autofill/heuristics/input/form_phones_en.html b/chrome/test/data/autofill/heuristics/input/form_phones_en.html
new file mode 100644
index 0000000..614ff26
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/input/form_phones_en.html
@@ -0,0 +1,75 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <title></title>
+ </head>
+ <body>
+ <form action="http://www.google.com/" method="post">
+ <label for="firstname">First name:</label>
+ <input type="text" id="firstname"><br/>
+ <label for="lastname">Last name:</label>
+ <input type="text" id="lastname"><br/>
+ <label for="address">Address:</label>
+ <input type="text" id="address"><br/>
+ <label for="city">City:</label>
+ <input type="text" id="city"><br/>
+ <label for="state">State:</label>
+ <input type="text" id="state"><br/>
+ <label for="zip">Zip:</label>
+ <input type="text" id="zip"><br/>
+
+ <label for="phone">Phone:</label>
+ <input type="text" id="phone"><br/>
+ Area Code: <input type="text" id="areacode1">
+ Phone: <input type="text" id="phone1"><br/>
+ Phone:
+ <input type="text" maxlength="3" name="hphone1">
+ - <input type="text" maxlength="3" name="hphone2">
+ - <input type="text" maxlength="4" name="hphone3">
+ ext.: <input type="text" maxlength="5" name="hphone4"><br/>
+ Phone:
+ ( <input type="text" maxlength="3" name="hphone1a"> )
+ <input type="text" maxlength="3" name="hphone2a">
+ - <input type="text" maxlength="4" name="hphone3a">
+ ext.: <input type="text" maxlength="5" name="hphone4a"><br/>
+ Phone:
+ <input type="text" maxlength="2" name="hphone1b">
+ <input type="text" maxlength="3" name="hphone1b">
+ - <input type="text" maxlength="3" name="hphone2b">
+ - <input type="text" maxlength="4" name="hphone3b">
+ ext.: <input type="text" maxlength="5" name="hphone4b"><br/>
+ Phone:
+ <input type="text" maxlength="2" name="hphone1c">
+ ( <input type="text" maxlength="3" name="hphone1c"> )
+ <input type="text" maxlength="3" name="hphone2c">
+ - <input type="text" maxlength="4" name="hphone3c">
+ ext.: <input type="text" maxlength="5" name="hphone4c"><br/>
+
+ Fax: <input type="text" id="fax"><br/>
+ Area Code: <input type="text" id="faxareacode1">
+ Fax: <input type="text" id="fax1"><br/>
+ Fax:
+ <input type="text" maxlength="3" name="hfax1">
+ - <input type="text" maxlength="3" name="hfax2">
+ - <input type="text" maxlength="4" name="hfax3">
+ ext.: <input type="text" maxlength="5" name="hfax4"><br/>
+ Fax:
+ ( <input type="text" maxlength="3" name="hfax1a"> )
+ <input type="text" maxlength="3" name="hfax2a">
+ - <input type="text" maxlength="4" name="hfax3a">
+ ext.: <input type="text" maxlength="5" name="hfax4a"><br/>
+ Fax:
+ <input type="text" maxlength="2" name="hfax0b">
+ <input type="text" maxlength="3" name="hfax1b">
+ - <input type="text" maxlength="3" name="hfax2b">
+ - <input type="text" maxlength="4" name="hfax3b">
+ ext.: <input type="text" maxlength="5" name="hfax4"><br/>
+ Fax: <input type="text" maxlength="2" name="hfax0c">
+ ( <input type="text" maxlength="3" name="hfax1c"> )
+ <input type="text" maxlength="3" name="hfax2c">
+ - <input type="text" maxlength="4" name="hfax3c">
+ ext.: <input type="text" maxlength="5" name="hfax4c"><br/>
+ </form>
+ </body>
+</html>
diff --git a/chrome/test/data/autofill/heuristics/output/form_phones_en.out b/chrome/test/data/autofill/heuristics/output/form_phones_en.out
new file mode 100644
index 0000000..f3e1ba9
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/output/form_phones_en.out
@@ -0,0 +1,48 @@
+NAME_FIRST
+NAME_LAST
+ADDRESS_HOME_LINE1
+ADDRESS_HOME_CITY
+ADDRESS_HOME_STATE
+ADDRESS_HOME_ZIP
+PHONE_HOME_WHOLE_NUMBER
+PHONE_HOME_CITY_CODE
+PHONE_HOME_NUMBER
+PHONE_HOME_CITY_CODE
+PHONE_HOME_NUMBER
+PHONE_HOME_NUMBER
+UNKNOWN_TYPE
+PHONE_HOME_CITY_CODE
+PHONE_HOME_NUMBER
+PHONE_HOME_NUMBER
+UNKNOWN_TYPE
+PHONE_HOME_COUNTRY_CODE
+PHONE_HOME_CITY_CODE
+PHONE_HOME_NUMBER
+PHONE_HOME_NUMBER
+UNKNOWN_TYPE
+PHONE_HOME_COUNTRY_CODE
+PHONE_HOME_CITY_CODE
+PHONE_HOME_NUMBER
+PHONE_HOME_NUMBER
+UNKNOWN_TYPE
+PHONE_FAX_WHOLE_NUMBER
+PHONE_FAX_CITY_CODE
+PHONE_FAX_NUMBER
+PHONE_FAX_CITY_CODE
+PHONE_FAX_NUMBER
+PHONE_FAX_NUMBER
+UNKNOWN_TYPE
+PHONE_FAX_CITY_CODE
+PHONE_FAX_NUMBER
+PHONE_FAX_NUMBER
+UNKNOWN_TYPE
+PHONE_FAX_COUNTRY_CODE
+PHONE_FAX_CITY_CODE
+PHONE_FAX_NUMBER
+PHONE_FAX_NUMBER
+UNKNOWN_TYPE
+PHONE_FAX_COUNTRY_CODE
+PHONE_FAX_CITY_CODE
+PHONE_FAX_NUMBER
+PHONE_FAX_NUMBER
+UNKNOWN_TYPE