diff options
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/browser/autofill/autofill_manager_unittest.cc | 1 | ||||
-rw-r--r-- | chrome/browser/autofill/autofill_resources.grd | 18 | ||||
-rw-r--r-- | chrome/browser/autofill/form_structure_unittest.cc | 6 | ||||
-rw-r--r-- | chrome/browser/autofill/phone_field.cc | 285 | ||||
-rw-r--r-- | chrome/browser/autofill/phone_field.h | 59 | ||||
-rw-r--r-- | chrome/browser/autofill/phone_field_unittest.cc | 9 | ||||
-rw-r--r-- | chrome/test/data/autofill/heuristics/input/form_phones_en.html | 75 | ||||
-rw-r--r-- | chrome/test/data/autofill/heuristics/output/form_phones_en.out | 48 |
8 files changed, 394 insertions, 107 deletions
diff --git a/chrome/browser/autofill/autofill_manager_unittest.cc b/chrome/browser/autofill/autofill_manager_unittest.cc index e6c2548..4aa6504 100644 --- a/chrome/browser/autofill/autofill_manager_unittest.cc +++ b/chrome/browser/autofill/autofill_manager_unittest.cc @@ -1648,6 +1648,7 @@ TEST_F(AutoFillManagerTest, FillPhoneNumber) { char test_data[] = "1234567890123456"; for (int i = arraysize(test_data) - 1; i >= 0; --i) { test_data[i] = 0; + SCOPED_TRACE(StringPrintf("Testing phone: %s", test_data)); work_profile->SetInfo(phone_type, ASCIIToUTF16(test_data)); // The page ID sent to the AutoFillManager from the RenderView, used to send // an IPC message back to the renderer. diff --git a/chrome/browser/autofill/autofill_resources.grd b/chrome/browser/autofill/autofill_resources.grd index 475e706..6f491b6 100644 --- a/chrome/browser/autofill/autofill_resources.grd +++ b/chrome/browser/autofill/autofill_resources.grd @@ -108,17 +108,29 @@ <message name="IDS_AUTOFILL_PHONE_RE"> phone<!-- de-DE -->|telefonnummer<!-- es -->|telefono|teléfono<!-- fr-FR -->|telfixe<!-- ja-JP -->|電話<!-- pt-BR, pt-PT -->|telefone|telemovel<!-- ru -->|телефон<!-- zh-CN -->|电话 </message> + <message name="IDS_AUTOFILL_COUNTRY_CODE_RE"> + country.*code|ccode + </message> + <message name="IDS_AUTOFILL_AREA_CODE_NOTEXT_RE"> + ^\($ + </message> <message name="IDS_AUTOFILL_AREA_CODE_RE"> - area code + area.*code|acode </message> <message name="IDS_AUTOFILL_FAX_RE"> fax<!-- fr-FR -->|télécopie|telecopie<!-- ja-JP -->|ファックス<!-- ru -->|факс<!-- zh-CN -->|传真<!-- zh-TW -->|傳真 </message> + <message name="IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE"> + ^-$|^\)$ + </message> + <message name="IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE"> + ^-$ + </message> <message name="IDS_AUTOFILL_PHONE_PREFIX_RE"> - ^-$|\)$|prefix<!-- fr-FR -->|preselection<!-- pt-BR, pt-PT -->|ddd + prefix<!-- fr-FR -->|preselection<!-- pt-BR, pt-PT -->|ddd </message> <message name="IDS_AUTOFILL_PHONE_SUFFIX_RE"> - ^-$|suffix + suffix </message> <message name="IDS_AUTOFILL_PHONE_EXTENSION_RE"> ext<!-- pt-BR, pt-PT -->|ramal diff --git a/chrome/browser/autofill/form_structure_unittest.cc b/chrome/browser/autofill/form_structure_unittest.cc index bd3056a..f4d1cb9 100644 --- a/chrome/browser/autofill/form_structure_unittest.cc +++ b/chrome/browser/autofill/form_structure_unittest.cc @@ -1113,14 +1113,16 @@ TEST(FormStructureTest, ThreePartPhoneNumber) { ASCIIToUTF16("dayphone2"), string16(), ASCIIToUTF16("text"), - 0, + 3, // Size of prefix is 3. false)); form.fields.push_back( webkit_glue::FormField(ASCIIToUTF16("-"), ASCIIToUTF16("dayphone3"), string16(), ASCIIToUTF16("text"), - 0, + 4, // Size of suffix is 4. If unlimited size is + // passed, phone will be parsed as + // <country code> - <area code> - <phone>. false)); form.fields.push_back( webkit_glue::FormField(ASCIIToUTF16("ext.:"), diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc index f6b41a2..ff6308d 100644 --- a/chrome/browser/autofill/phone_field.cc +++ b/chrome/browser/autofill/phone_field.cc @@ -15,6 +15,85 @@ #include "grit/autofill_resources.h" #include "ui/base/l10n/l10n_util.h" +// Phone field grammars - first matched grammar will be parsed. Grammars are +// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are +// parsed separately unless they are necessary parts of the match. +// The following notation is used to describe the patterns: +// <cc> - country code field. +// <ac> - area code field. +// <phone> - phone or prefix. +// <suffix> - suffix. +// <ext> - extension. +// :N means field is limited to N characters, otherwise it is unlimited. +// (pattern <field>)? means pattern is optional and matched separately. +PhoneField::Parser PhoneField::phone_field_grammars_[] = { + // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> + // (Ext: <ext>)?)? + { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <ac> - <phone> (Ext: <ext>)? + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, + // Phone: <phone> (Ext: <ext>)? + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, +}; + PhoneField::~PhoneField() {} // static @@ -33,7 +112,7 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, // be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX, // but "area" and "someotherarea" parsed as HOME, for example). for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) { - phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i)); + phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i)); if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE)) return phone_field.release(); } @@ -49,7 +128,7 @@ PhoneField* PhoneField::ParseECML( AutoFillField* field; if (ParseText(iter, pattern, &field)) { PhoneField* phone_field = new PhoneField(); - phone_field->phone_ = field; + phone_field->parsed_phone_fields_[FIELD_PHONE] = field; return phone_field; } @@ -57,35 +136,42 @@ PhoneField* PhoneField::ParseECML( } bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { - bool ok; + bool ok = false; - if (area_code_ != NULL) { - ok = Add(field_type_map, area_code_, - AutoFillType(number_->GetCityCodeType())); - DCHECK(ok); + DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed. - if (prefix_ != NULL) { - // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form - // we fill only the prefix depending on the size of the input field. - ok = ok && Add(field_type_map, - prefix_, - AutoFillType(number_->GetNumberType())); + if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || + (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || + (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { + if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_COUNTRY_CODE], + AutoFillType(number_->GetCountryCodeType())); DCHECK(ok); - // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form - // we fill only the suffix depending on the size of the input field. - ok = ok && Add(field_type_map, - phone_, - AutoFillType(number_->GetNumberType())); + } + if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_AREA_CODE], + AutoFillType(number_->GetCityCodeType())); DCHECK(ok); - } else { - ok = ok && Add(field_type_map, - phone_, - AutoFillType(number_->GetNumberType())); + } + // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form + // we fill only the prefix depending on the size of the input field. + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_PHONE], + AutoFillType(number_->GetNumberType())); + DCHECK(ok); + // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form + // we fill only the suffix depending on the size of the input field. + if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { + ok = Add(field_type_map, + parsed_phone_fields_[FIELD_SUFFIX], + AutoFillType(number_->GetNumberType())); DCHECK(ok); } } else { ok = Add(field_type_map, - phone_, + parsed_phone_fields_[FIELD_PHONE], AutoFillType(number_->GetWholeNumberType())); DCHECK(ok); } @@ -93,17 +179,27 @@ bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { return ok; } -PhoneField::PhoneField() - : phone_(NULL), - area_code_(NULL), - prefix_(NULL), - extension_(NULL) { +PhoneField::PhoneField() { + memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_)); SetPhoneType(HOME_PHONE); } +string16 PhoneField::GetCountryRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE); +} + string16 PhoneField::GetAreaRegex() const { // This one is the same for Home and Fax numbers. - return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); + string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); + area_code.append(ASCIIToUTF16("|")); // Regexp separator. + area_code.append(GetAreaNoTextRegex()); + return area_code; +} + +string16 PhoneField::GetAreaNoTextRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE); } string16 PhoneField::GetPhoneRegex() const { @@ -116,11 +212,21 @@ string16 PhoneField::GetPhoneRegex() const { return string16(); } +string16 PhoneField::GetPrefixSeparatorRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE); +} + string16 PhoneField::GetPrefixRegex() const { // This one is the same for Home and Fax numbers. return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); } +string16 PhoneField::GetSuffixSeparatorRegex() const { + // This one is the same for Home and Fax numbers. + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE); +} + string16 PhoneField::GetSuffixRegex() const { // This one is the same for Home and Fax numbers. return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); @@ -131,6 +237,24 @@ string16 PhoneField::GetExtensionRegex() const { return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); } +string16 PhoneField::GetRegExp(RegexType regex_id) const { + switch (regex_id) { + case REGEX_COUNTRY: return GetCountryRegex(); + case REGEX_AREA: return GetAreaRegex(); + case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex(); + case REGEX_PHONE: return GetPhoneRegex(); + case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex(); + case REGEX_PREFIX: return GetPrefixRegex(); + case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex(); + case REGEX_SUFFIX: return GetSuffixRegex(); + case REGEX_EXTENSION: return GetExtensionRegex(); + default: + NOTREACHED(); + break; + } + return string16(); +} + // static bool PhoneField::ParseInternal( PhoneField *phone_field, @@ -143,85 +267,62 @@ bool PhoneField::ParseInternal( return false; std::vector<AutoFillField*>::const_iterator q = *iter; + // The form owns the following variables, so they should not be deleted. - AutoFillField* phone = NULL; - AutoFillField* phone2 = NULL; - AutoFillField* phone3 = NULL; - bool area_code = false; // true if we've parsed an area code field. - - // Some pages, such as BloomingdalesShipping.html, have a field labeled - // "Area Code and Phone"; we want to parse this as a phone number field so - // we look for "phone" before we look for "area code". - if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { - area_code = false; - // Check the case when the match is for non-home phone and area code, e.g. - // first field is a "Fax area code" and the subsequent is "Fax phone". - if (!regular_phone) { - // Attempt parsing of the same field as an area code and then phone: - std::vector<AutoFillField*>::const_iterator temp_it = *iter; - AutoFillField* tmp_phone1 = NULL; - AutoFillField* tmp_phone2 = NULL; - if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && - ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { - phone = tmp_phone1; - phone2 = tmp_phone2; - q = temp_it; - area_code = true; + AutoFillField* parsed_fields[FIELD_MAX]; + + for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) { + memset(parsed_fields, 0, sizeof(parsed_fields)); + q = *iter; + // Attempt to parse next possible match. + for (; i < arraysize(phone_field_grammars_) && + phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) { + if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex), + &parsed_fields[phone_field_grammars_[i].phone_part])) + break; + if (phone_field_grammars_[i].max_size && + (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() || + phone_field_grammars_[i].max_size < + parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) { + break; } } - } else { - if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) - return false; - area_code = true; - // If this is not a home phone and there was no specification before - // the phone number actually starts (e.g. field 1 "Area code:", field 2 - // "Fax:"), we skip searching for preffix and suffix and bail out. - if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone) - return false; + if (i >= arraysize(phone_field_grammars_)) + return false; // Parsing failed. + if (phone_field_grammars_[i].regex == REGEX_SEPARATOR) + break; // Parsing succeeded. + do { + ++i; + } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR); + if (i + 1 == arraysize(phone_field_grammars_)) + return false; // Tried through all the possibilities - did not match. } + if (!parsed_fields[FIELD_PHONE]) + return false; - // Sometimes phone number fields are separated by "-" (e.g. test page - // Crate and Barrel Check Out.html). Also, area codes are sometimes - // surrounded by parentheses, so a ")" may appear after the area code field. - // - // We used to match "tel" here, which we've seen in field names (e.g. on - // Newegg2.html), but that's too general: some pages (e.g. - // uk/Furniture123-1.html) have several phone numbers in succession and we - // don't want those to be parsed as components of a single phone number. - if (phone2 == NULL) - ParseText(&q, phone_field->GetPrefixRegex(), &phone2); - - // Look for a third text box. - if (phone2) - ParseText(&q, phone_field->GetSuffixRegex(), &phone3); + for (int i = 0; i < FIELD_MAX; ++i) + phone_field->parsed_phone_fields_[i] = parsed_fields[i]; - // Now we have one, two, or three phone number text fields. Package them - // up into a PhoneField object. + // Look for optional fields. - if (phone2 == NULL) { // only one field - if (area_code) { - // It's an area code - it doesn't make sense. - return false; - } - phone_field->phone_ = phone; - } else { - phone_field->area_code_ = phone; - if (phone3 == NULL) { // two fields - phone_field->phone_ = phone2; - } else { // three boxes: area code, prefix and suffix - phone_field->prefix_ = phone2; - phone_field->phone_ = phone3; + // Look for a third text box. + if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { + if (!ParseText(&q, phone_field->GetSuffixRegex(), + &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { + ParseText(&q, phone_field->GetSuffixSeparatorRegex(), + &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); } } // Now look for an extension. - ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); + ParseText(&q, phone_field->GetExtensionRegex(), + &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); *iter = q; return true; } -void PhoneField::SetPhoneType(PHONE_TYPE phone_type) { +void PhoneField::SetPhoneType(PhoneType phone_type) { // Field types are different as well, so we create a temporary phone number, // to get relevant field types. if (phone_type == HOME_PHONE) diff --git a/chrome/browser/autofill/phone_field.h b/chrome/browser/autofill/phone_field.h index 2563a81..ab48a116 100644 --- a/chrome/browser/autofill/phone_field.h +++ b/chrome/browser/autofill/phone_field.h @@ -33,7 +33,7 @@ class PhoneField : public FormField { private: PhoneField(); - enum PHONE_TYPE { + enum PhoneType { PHONE_TYPE_FIRST = 0, HOME_PHONE = PHONE_TYPE_FIRST, FAX_PHONE, @@ -43,12 +43,39 @@ class PhoneField : public FormField { }; // Some field names are different for phone and fax. + string16 GetCountryRegex() const; + // This string includes all area code separators, including NoText. string16 GetAreaRegex() const; + // Separator of the area code in the case fields are formatted without + // any text indicating what fields are (e.g. field1 "(" field2 ")" field3 "-" + // field4 means Country Code, Area Code, Prefix, Suffix) + string16 GetAreaNoTextRegex() const; string16 GetPhoneRegex() const; + string16 GetPrefixSeparatorRegex() const; string16 GetPrefixRegex() const; + string16 GetSuffixSeparatorRegex() const; string16 GetSuffixRegex() const; string16 GetExtensionRegex() const; + // This is for easy description of the possible parsing paths of the phone + // fields. + enum RegexType { + REGEX_COUNTRY, + REGEX_AREA, + REGEX_AREA_NOTEXT, + REGEX_PHONE, + REGEX_PREFIX_SEPARATOR, + REGEX_PREFIX, + REGEX_SUFFIX_SEPARATOR, + REGEX_SUFFIX, + REGEX_EXTENSION, + + // Separates regexps in grammar. + REGEX_SEPARATOR, + }; + + string16 GetRegExp(RegexType regex_id) const; + // |field| - field to fill up on successful parsing. // |iter| - in/out. Form field iterator, points to the first field that is // attempted to be parsed. If parsing successful, points to the first field @@ -59,19 +86,35 @@ class PhoneField : public FormField { std::vector<AutoFillField*>::const_iterator* iter, bool regular_phone); - void SetPhoneType(PHONE_TYPE phone_type); + void SetPhoneType(PhoneType phone_type); // Field types are different as well, so we create a temporary phone number, // to get relevant field types. scoped_ptr<PhoneNumber> number_; - PHONE_TYPE phone_type_; + PhoneType phone_type_; + + + // Parsed fields. + enum PhonePart { + FIELD_NONE = -1, + FIELD_COUNTRY_CODE, + FIELD_AREA_CODE, + FIELD_PHONE, + FIELD_SUFFIX, + FIELD_EXTENSION, + + FIELD_MAX, + }; - // Always present; holds suffix if prefix is present. - AutoFillField* phone_; + // FIELD_PHONE is always present; holds suffix if prefix is present. + // The rest could be NULL. + AutoFillField* parsed_phone_fields_[FIELD_MAX]; - AutoFillField* area_code_; // optional - AutoFillField* prefix_; // optional - AutoFillField* extension_; // optional + static struct Parser { + RegexType regex; // Field matching reg-ex. + PhonePart phone_part; // Index of the field. + int max_size; // Max size of the field to match. 0 means any. + } phone_field_grammars_[]; DISALLOW_COPY_AND_ASSIGN(PhoneField); }; diff --git a/chrome/browser/autofill/phone_field_unittest.cc b/chrome/browser/autofill/phone_field_unittest.cc index 96b02f3..912b5d8 100644 --- a/chrome/browser/autofill/phone_field_unittest.cc +++ b/chrome/browser/autofill/phone_field_unittest.cc @@ -171,6 +171,11 @@ TEST_F(PhoneFieldTest, ParseTwoLinePhoneEcmlBillTo) { } TEST_F(PhoneFieldTest, ThreePartPhoneNumber) { + // Phone in format <field> - <field> - <field> could be either + // <area code> - <prefix> - <suffix>, or + // <country code> - <area code> - <phone>. The only distinguishing feature is + // size: <prefix> is no bigger than 3 characters, and <suffix> is no bigger + // than 4. list_.push_back( new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Phone:"), ASCIIToUTF16("dayphone1"), @@ -184,7 +189,7 @@ TEST_F(PhoneFieldTest, ThreePartPhoneNumber) { ASCIIToUTF16("dayphone2"), string16(), ASCIIToUTF16("text"), - 0, + 3, false), ASCIIToUTF16("prefix1"))); list_.push_back( @@ -192,7 +197,7 @@ TEST_F(PhoneFieldTest, ThreePartPhoneNumber) { ASCIIToUTF16("dayphone3"), string16(), ASCIIToUTF16("text"), - 0, + 4, false), ASCIIToUTF16("suffix1"))); list_.push_back( diff --git a/chrome/test/data/autofill/heuristics/input/form_phones_en.html b/chrome/test/data/autofill/heuristics/input/form_phones_en.html new file mode 100644 index 0000000..614ff26 --- /dev/null +++ b/chrome/test/data/autofill/heuristics/input/form_phones_en.html @@ -0,0 +1,75 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="UTF-8"> + <title></title> + </head> + <body> + <form action="http://www.google.com/" method="post"> + <label for="firstname">First name:</label> + <input type="text" id="firstname"><br/> + <label for="lastname">Last name:</label> + <input type="text" id="lastname"><br/> + <label for="address">Address:</label> + <input type="text" id="address"><br/> + <label for="city">City:</label> + <input type="text" id="city"><br/> + <label for="state">State:</label> + <input type="text" id="state"><br/> + <label for="zip">Zip:</label> + <input type="text" id="zip"><br/> + + <label for="phone">Phone:</label> + <input type="text" id="phone"><br/> + Area Code: <input type="text" id="areacode1"> + Phone: <input type="text" id="phone1"><br/> + Phone: + <input type="text" maxlength="3" name="hphone1"> + - <input type="text" maxlength="3" name="hphone2"> + - <input type="text" maxlength="4" name="hphone3"> + ext.: <input type="text" maxlength="5" name="hphone4"><br/> + Phone: + ( <input type="text" maxlength="3" name="hphone1a"> ) + <input type="text" maxlength="3" name="hphone2a"> + - <input type="text" maxlength="4" name="hphone3a"> + ext.: <input type="text" maxlength="5" name="hphone4a"><br/> + Phone: + <input type="text" maxlength="2" name="hphone1b"> + <input type="text" maxlength="3" name="hphone1b"> + - <input type="text" maxlength="3" name="hphone2b"> + - <input type="text" maxlength="4" name="hphone3b"> + ext.: <input type="text" maxlength="5" name="hphone4b"><br/> + Phone: + <input type="text" maxlength="2" name="hphone1c"> + ( <input type="text" maxlength="3" name="hphone1c"> ) + <input type="text" maxlength="3" name="hphone2c"> + - <input type="text" maxlength="4" name="hphone3c"> + ext.: <input type="text" maxlength="5" name="hphone4c"><br/> + + Fax: <input type="text" id="fax"><br/> + Area Code: <input type="text" id="faxareacode1"> + Fax: <input type="text" id="fax1"><br/> + Fax: + <input type="text" maxlength="3" name="hfax1"> + - <input type="text" maxlength="3" name="hfax2"> + - <input type="text" maxlength="4" name="hfax3"> + ext.: <input type="text" maxlength="5" name="hfax4"><br/> + Fax: + ( <input type="text" maxlength="3" name="hfax1a"> ) + <input type="text" maxlength="3" name="hfax2a"> + - <input type="text" maxlength="4" name="hfax3a"> + ext.: <input type="text" maxlength="5" name="hfax4a"><br/> + Fax: + <input type="text" maxlength="2" name="hfax0b"> + <input type="text" maxlength="3" name="hfax1b"> + - <input type="text" maxlength="3" name="hfax2b"> + - <input type="text" maxlength="4" name="hfax3b"> + ext.: <input type="text" maxlength="5" name="hfax4"><br/> + Fax: <input type="text" maxlength="2" name="hfax0c"> + ( <input type="text" maxlength="3" name="hfax1c"> ) + <input type="text" maxlength="3" name="hfax2c"> + - <input type="text" maxlength="4" name="hfax3c"> + ext.: <input type="text" maxlength="5" name="hfax4c"><br/> + </form> + </body> +</html> diff --git a/chrome/test/data/autofill/heuristics/output/form_phones_en.out b/chrome/test/data/autofill/heuristics/output/form_phones_en.out new file mode 100644 index 0000000..f3e1ba9 --- /dev/null +++ b/chrome/test/data/autofill/heuristics/output/form_phones_en.out @@ -0,0 +1,48 @@ +NAME_FIRST +NAME_LAST +ADDRESS_HOME_LINE1 +ADDRESS_HOME_CITY +ADDRESS_HOME_STATE +ADDRESS_HOME_ZIP +PHONE_HOME_WHOLE_NUMBER +PHONE_HOME_CITY_CODE +PHONE_HOME_NUMBER +PHONE_HOME_CITY_CODE +PHONE_HOME_NUMBER +PHONE_HOME_NUMBER +UNKNOWN_TYPE +PHONE_HOME_CITY_CODE +PHONE_HOME_NUMBER +PHONE_HOME_NUMBER +UNKNOWN_TYPE +PHONE_HOME_COUNTRY_CODE +PHONE_HOME_CITY_CODE +PHONE_HOME_NUMBER +PHONE_HOME_NUMBER +UNKNOWN_TYPE +PHONE_HOME_COUNTRY_CODE +PHONE_HOME_CITY_CODE +PHONE_HOME_NUMBER +PHONE_HOME_NUMBER +UNKNOWN_TYPE +PHONE_FAX_WHOLE_NUMBER +PHONE_FAX_CITY_CODE +PHONE_FAX_NUMBER +PHONE_FAX_CITY_CODE +PHONE_FAX_NUMBER +PHONE_FAX_NUMBER +UNKNOWN_TYPE +PHONE_FAX_CITY_CODE +PHONE_FAX_NUMBER +PHONE_FAX_NUMBER +UNKNOWN_TYPE +PHONE_FAX_COUNTRY_CODE +PHONE_FAX_CITY_CODE +PHONE_FAX_NUMBER +PHONE_FAX_NUMBER +UNKNOWN_TYPE +PHONE_FAX_COUNTRY_CODE +PHONE_FAX_CITY_CODE +PHONE_FAX_NUMBER +PHONE_FAX_NUMBER +UNKNOWN_TYPE |