diff options
author | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-22 00:08:26 +0000 |
---|---|---|
committer | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-22 00:08:26 +0000 |
commit | a39a17ab1548551f0f4438fe83ea040518886203 (patch) | |
tree | 2a0031dcfa41ba21921d3455cfdda599637edad5 | |
parent | da013c688778b141630c29c2ab728d68727a713d (diff) | |
download | chromium_src-a39a17ab1548551f0f4438fe83ea040518886203.zip chromium_src-a39a17ab1548551f0f4438fe83ea040518886203.tar.gz chromium_src-a39a17ab1548551f0f4438fe83ea040518886203.tar.bz2 |
Autofill heuristics checkout files for 04_checkout_jcrew.com.html
Adds inferencing logic to scan backwards a table-row. Fixes fallout: heuristic order of first, middle, last names. And billing versus shipping logic for addresses.
BUG=86602
TEST=FormStructureBrowserTest.* with 04_checkout_jcrew.com.html
Review URL: http://codereview.chromium.org/7210026
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@89940 0039d316-1c4b-4281-b951-d872f2087c98
9 files changed, 136 insertions, 31 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc index f13b405..d32f3337 100644 --- a/chrome/browser/autofill/address_field.cc +++ b/chrome/browser/autofill/address_field.cc @@ -79,17 +79,40 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { } AddressType AddressField::FindType() const { - // This is not a full address, so don't even bother trying to figure - // out its type. - if (address1_ == NULL) - return kGenericAddress; - // First look at the field name, which itself will sometimes contain // "bill" or "ship". We could check for the ECML type prefixes // here, but there's no need to since ECML's prefixes Ecom_BillTo // and Ecom_ShipTo contain "bill" and "ship" anyway. - string16 name = StringToLowerASCII(address1_->name); - return AddressTypeFromText(name); + if (company_) { + string16 name = StringToLowerASCII(company_->name); + return AddressTypeFromText(name); + } + if (address1_) { + string16 name = StringToLowerASCII(address1_->name); + return AddressTypeFromText(name); + } + if (address2_) { + string16 name = StringToLowerASCII(address2_->name); + return AddressTypeFromText(name); + } + if (city_) { + string16 name = StringToLowerASCII(city_->name); + return AddressTypeFromText(name); + } + if (zip_) { + string16 name = StringToLowerASCII(zip_->name); + return AddressTypeFromText(name); + } + if (state_) { + string16 name = StringToLowerASCII(state_->name); + return AddressTypeFromText(name); + } + if (country_) { + string16 name = StringToLowerASCII(country_->name); + return AddressTypeFromText(name); + } + + return kGenericAddress; } AddressField::AddressField() diff --git a/chrome/browser/autofill/address_field_unittest.cc b/chrome/browser/autofill/address_field_unittest.cc index 90a26eb..5b0196a 100644 --- a/chrome/browser/autofill/address_field_unittest.cc +++ b/chrome/browser/autofill/address_field_unittest.cc @@ -253,7 +253,7 @@ TEST_F(AddressFieldTest, ParseCityEcml) { AutofillScanner scanner(list_.get()); field_.reset(Parse(&scanner, true)); ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); - EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_EQ(kShippingAddress, field_->FindType()); ASSERT_TRUE(field_->ClassifyField(&field_type_map_)); ASSERT_TRUE( field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end()); @@ -292,7 +292,7 @@ TEST_F(AddressFieldTest, ParseStateEcml) { AutofillScanner scanner(list_.get()); field_.reset(Parse(&scanner, true)); ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); - EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_EQ(kShippingAddress, field_->FindType()); ASSERT_TRUE(field_->ClassifyField(&field_type_map_)); ASSERT_TRUE( field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end()); @@ -331,7 +331,7 @@ TEST_F(AddressFieldTest, ParseZipEcml) { AutofillScanner scanner(list_.get()); field_.reset(Parse(&scanner, true)); ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); - EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_EQ(kShippingAddress, field_->FindType()); ASSERT_TRUE(field_->ClassifyField(&field_type_map_)); ASSERT_TRUE( field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end()); @@ -403,7 +403,7 @@ TEST_F(AddressFieldTest, ParseCountryEcml) { AutofillScanner scanner(list_.get()); field_.reset(Parse(&scanner, true)); ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); - EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_EQ(kShippingAddress, field_->FindType()); ASSERT_TRUE(field_->ClassifyField(&field_type_map_)); ASSERT_TRUE( field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end()); @@ -472,7 +472,7 @@ TEST_F(AddressFieldTest, ParseCompanyEcml) { AutofillScanner scanner(list_.get()); field_.reset(Parse(&scanner, true)); ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); - EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_EQ(kShippingAddress, field_->FindType()); ASSERT_TRUE(field_->ClassifyField(&field_type_map_)); ASSERT_TRUE( field_type_map_.find(ASCIIToUTF16("company1")) != field_type_map_.end()); diff --git a/chrome/browser/autofill/autofill_resources.grd b/chrome/browser/autofill/autofill_resources.grd index efc2e37..31662b4 100644 --- a/chrome/browser/autofill/autofill_resources.grd +++ b/chrome/browser/autofill/autofill_resources.grd @@ -97,7 +97,7 @@ first.*name|initials|fname|first$<!-- de-DE -->|vorname<!-- es -->|nombre<!-- fr-FR -->|forename|prénom|prenom<!-- ja-JP -->|名<!-- pt-BR, pt-PT -->|nome<!-- ru -->|Имя<!-- ko-KR -->|이름 </message> <message name="IDS_AUTOFILL_MIDDLE_INITIAL_RE"> - middle.*initial|m\.i\.|mi$ + middle.*initial|m\.i\.|mi$|\bmi\b </message> <message name="IDS_AUTOFILL_MIDDLE_NAME_RE"> middle.*name|mname|middle$<!-- es -->|apellido.?materno|lastlastname diff --git a/chrome/browser/autofill/name_field.cc b/chrome/browser/autofill/name_field.cc index 7bafa15..9d9697b 100644 --- a/chrome/browser/autofill/name_field.cc +++ b/chrome/browser/autofill/name_field.cc @@ -157,13 +157,6 @@ FirstLastNameField* FirstLastNameField::ParseComponentNames( continue; } - if (!v->last_name_ && - ParseField(scanner, - l10n_util::GetStringUTF16(IDS_AUTOFILL_LAST_NAME_RE), - &v->last_name_)) { - continue; - } - // We check for a middle initial before checking for a middle name // because at least one page (PC Connection.html) has a field marked // as both (the label text is "MI" and the element name is @@ -184,6 +177,13 @@ FirstLastNameField* FirstLastNameField::ParseComponentNames( continue; } + if (!v->last_name_ && + ParseField(scanner, + l10n_util::GetStringUTF16(IDS_AUTOFILL_LAST_NAME_RE), + &v->last_name_)) { + continue; + } + break; } diff --git a/chrome/renderer/autofill/form_manager.cc b/chrome/renderer/autofill/form_manager.cc index a442f6e..d63fd46 100644 --- a/chrome/renderer/autofill/form_manager.cc +++ b/chrome/renderer/autofill/form_manager.cc @@ -134,8 +134,10 @@ string16 FindChildTextInner(const WebNode& node, int depth) { string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); TrimPositions child_leading_whitespace = TrimWhitespace(child_text, TRIM_LEADING, &child_text); - if (node_trailing_whitespace || child_leading_whitespace) + if (node_trailing_whitespace || child_leading_whitespace || + (node.nodeType() == WebNode::TextNode && node_text.empty())) { node_text += ASCIIToUTF16(" "); + } node_text += child_text; node_trailing_whitespace = TrimWhitespace(node_text, TRIM_TRAILING, &node_text); @@ -145,8 +147,10 @@ string16 FindChildTextInner(const WebNode& node, int depth) { string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); TrimPositions sibling_leading_whitespace = TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); - if (node_trailing_whitespace || sibling_leading_whitespace) + if (node_trailing_whitespace || sibling_leading_whitespace || + (node.nodeType() == WebNode::TextNode && node_text.empty())) { node_text += ASCIIToUTF16(" "); + } node_text += sibling_text; return node_text; @@ -246,7 +250,7 @@ string16 InferLabelFromPrevious(const WebFormControlElement& element) { // or <tr><th>Some Text</th><td><input ...></td></tr> // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> -string16 InferLabelFromTable(const WebFormControlElement& element) { +string16 InferLabelFromTableColumn(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to<WebElement>().hasTagName("td")) { @@ -271,6 +275,33 @@ string16 InferLabelFromTable(const WebFormControlElement& element) { } // Helper for |InferLabelForElement()| that infers a label, if possible, from +// surrounding table structure, +// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> +string16 InferLabelFromTableRow(const WebFormControlElement& element) { + WebNode parent = element.parentNode(); + while (!parent.isNull() && parent.isElementNode() && + !parent.to<WebElement>().hasTagName("tr")) { + parent = parent.parentNode(); + } + + if (parent.isNull()) + return string16(); + + // Check all previous siblings, skipping non-element nodes, until we find a + // non-empty text block. + string16 inferred_label; + WebNode previous = parent.previousSibling(); + while (inferred_label.empty() && !previous.isNull()) { + if (HasTagName(previous, "tr")) + inferred_label = FindChildText(previous.to<WebElement>()); + + previous = previous.previousSibling(); + } + + return inferred_label; +} + +// Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding div table, // e.g. <div>Some Text<span><input ...></span></div> // e.g. <div>Some Text</div><div><input ...></div> @@ -328,7 +359,12 @@ string16 InferLabelForElement(const WebFormControlElement& element) { return inferred_label; // If we didn't find a label, check for table cell case. - inferred_label = InferLabelFromTable(element); + inferred_label = InferLabelFromTableColumn(element); + if (!inferred_label.empty()) + return inferred_label; + + // If we didn't find a label, check for table row case. + inferred_label = InferLabelFromTableRow(element); if (!inferred_label.empty()) return inferred_label; diff --git a/chrome/renderer/autofill/form_manager_browsertest.cc b/chrome/renderer/autofill/form_manager_browsertest.cc index c930d25..fc0cd22 100644 --- a/chrome/renderer/autofill/form_manager_browsertest.cc +++ b/chrome/renderer/autofill/form_manager_browsertest.cc @@ -1507,6 +1507,52 @@ TEST_F(FormManagerTest, LabelsInferredFromTableAdjacentElements) { labels, names, values); } +// Verify that we correctly infer labels when the label text resides in the +// previous row. +TEST_F(FormManagerTest, LabelsInferredFromTableRow) { + std::vector<string16> labels, names, values; + + labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email")); + names.push_back(ASCIIToUTF16("firstname")); + values.push_back(ASCIIToUTF16("John")); + + labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email")); + names.push_back(ASCIIToUTF16("lastname")); + values.push_back(ASCIIToUTF16("Smith")); + + labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email")); + names.push_back(ASCIIToUTF16("email")); + values.push_back(ASCIIToUTF16("john@example.com")); + + ExpectLabels( + "<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" + "<TABLE>" + " <TR>" + " <TD>*First Name</TD>" + " <TD>*Last Name</TD>" + " <TD>*Email</TD>" + " </TR>" + " <TR>" + " <TD>" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " </TD>" + " <TD>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " </TD>" + " <TD>" + " <INPUT type=\"text\" id=\"email\" value=\"john@example.com\"/>" + " </TD>" + " </TR>" + " <TR>" + " <TD>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" + " </TD>" + " </TR>" + "</TABLE>" + "</FORM>", + labels, names, values); +} + TEST_F(FormManagerTest, LabelsInferredFromDefinitionList) { std::vector<string16> labels, names, values; diff --git a/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out b/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out index 1bc9610..5acb519 100644 --- a/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out +++ b/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out @@ -1,7 +1,7 @@ ADDRESS_HOME_COUNTRY -UNKNOWN_TYPE -UNKNOWN_TYPE -UNKNOWN_TYPE +NAME_FIRST +NAME_LAST +COMPANY_NAME ADDRESS_HOME_LINE1 ADDRESS_HOME_LINE2 ADDRESS_HOME_CITY diff --git a/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out b/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out index 8b5e0bf..89eb47b 100644 --- a/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out +++ b/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out @@ -1,4 +1,4 @@ -ADDRESS_HOME_COUNTRY +ADDRESS_BILLING_COUNTRY NAME_FIRST NAME_LAST ADDRESS_BILLING_LINE1 diff --git a/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out b/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out index 6347b0d..87068f9 100644 --- a/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out +++ b/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out @@ -13,9 +13,9 @@ NAME_FIRST NAME_LAST UNKNOWN_TYPE UNKNOWN_TYPE -ADDRESS_HOME_CITY -ADDRESS_HOME_STATE -ADDRESS_HOME_ZIP +ADDRESS_BILLING_CITY +ADDRESS_BILLING_STATE +ADDRESS_BILLING_ZIP UNKNOWN_TYPE PHONE_HOME_WHOLE_NUMBER PHONE_HOME_WHOLE_NUMBER |