summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-06-22 00:08:26 +0000
committerdhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-06-22 00:08:26 +0000
commita39a17ab1548551f0f4438fe83ea040518886203 (patch)
tree2a0031dcfa41ba21921d3455cfdda599637edad5
parentda013c688778b141630c29c2ab728d68727a713d (diff)
downloadchromium_src-a39a17ab1548551f0f4438fe83ea040518886203.zip
chromium_src-a39a17ab1548551f0f4438fe83ea040518886203.tar.gz
chromium_src-a39a17ab1548551f0f4438fe83ea040518886203.tar.bz2
Autofill heuristics checkout files for 04_checkout_jcrew.com.html
Adds inferencing logic to scan backwards a table-row. Fixes fallout: heuristic order of first, middle, last names. And billing versus shipping logic for addresses. BUG=86602 TEST=FormStructureBrowserTest.* with 04_checkout_jcrew.com.html Review URL: http://codereview.chromium.org/7210026 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@89940 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/autofill/address_field.cc37
-rw-r--r--chrome/browser/autofill/address_field_unittest.cc10
-rw-r--r--chrome/browser/autofill/autofill_resources.grd2
-rw-r--r--chrome/browser/autofill/name_field.cc14
-rw-r--r--chrome/renderer/autofill/form_manager.cc44
-rw-r--r--chrome/renderer/autofill/form_manager_browsertest.cc46
-rw-r--r--chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out6
-rw-r--r--chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out2
-rw-r--r--chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out6
9 files changed, 136 insertions, 31 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc
index f13b405..d32f3337 100644
--- a/chrome/browser/autofill/address_field.cc
+++ b/chrome/browser/autofill/address_field.cc
@@ -79,17 +79,40 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) {
}
AddressType AddressField::FindType() const {
- // This is not a full address, so don't even bother trying to figure
- // out its type.
- if (address1_ == NULL)
- return kGenericAddress;
-
// First look at the field name, which itself will sometimes contain
// "bill" or "ship". We could check for the ECML type prefixes
// here, but there's no need to since ECML's prefixes Ecom_BillTo
// and Ecom_ShipTo contain "bill" and "ship" anyway.
- string16 name = StringToLowerASCII(address1_->name);
- return AddressTypeFromText(name);
+ if (company_) {
+ string16 name = StringToLowerASCII(company_->name);
+ return AddressTypeFromText(name);
+ }
+ if (address1_) {
+ string16 name = StringToLowerASCII(address1_->name);
+ return AddressTypeFromText(name);
+ }
+ if (address2_) {
+ string16 name = StringToLowerASCII(address2_->name);
+ return AddressTypeFromText(name);
+ }
+ if (city_) {
+ string16 name = StringToLowerASCII(city_->name);
+ return AddressTypeFromText(name);
+ }
+ if (zip_) {
+ string16 name = StringToLowerASCII(zip_->name);
+ return AddressTypeFromText(name);
+ }
+ if (state_) {
+ string16 name = StringToLowerASCII(state_->name);
+ return AddressTypeFromText(name);
+ }
+ if (country_) {
+ string16 name = StringToLowerASCII(country_->name);
+ return AddressTypeFromText(name);
+ }
+
+ return kGenericAddress;
}
AddressField::AddressField()
diff --git a/chrome/browser/autofill/address_field_unittest.cc b/chrome/browser/autofill/address_field_unittest.cc
index 90a26eb..5b0196a 100644
--- a/chrome/browser/autofill/address_field_unittest.cc
+++ b/chrome/browser/autofill/address_field_unittest.cc
@@ -253,7 +253,7 @@ TEST_F(AddressFieldTest, ParseCityEcml) {
AutofillScanner scanner(list_.get());
field_.reset(Parse(&scanner, true));
ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
- EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
ASSERT_TRUE(field_->ClassifyField(&field_type_map_));
ASSERT_TRUE(
field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end());
@@ -292,7 +292,7 @@ TEST_F(AddressFieldTest, ParseStateEcml) {
AutofillScanner scanner(list_.get());
field_.reset(Parse(&scanner, true));
ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
- EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
ASSERT_TRUE(field_->ClassifyField(&field_type_map_));
ASSERT_TRUE(
field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end());
@@ -331,7 +331,7 @@ TEST_F(AddressFieldTest, ParseZipEcml) {
AutofillScanner scanner(list_.get());
field_.reset(Parse(&scanner, true));
ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
- EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
ASSERT_TRUE(field_->ClassifyField(&field_type_map_));
ASSERT_TRUE(
field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end());
@@ -403,7 +403,7 @@ TEST_F(AddressFieldTest, ParseCountryEcml) {
AutofillScanner scanner(list_.get());
field_.reset(Parse(&scanner, true));
ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
- EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
ASSERT_TRUE(field_->ClassifyField(&field_type_map_));
ASSERT_TRUE(
field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end());
@@ -472,7 +472,7 @@ TEST_F(AddressFieldTest, ParseCompanyEcml) {
AutofillScanner scanner(list_.get());
field_.reset(Parse(&scanner, true));
ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
- EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
ASSERT_TRUE(field_->ClassifyField(&field_type_map_));
ASSERT_TRUE(
field_type_map_.find(ASCIIToUTF16("company1")) != field_type_map_.end());
diff --git a/chrome/browser/autofill/autofill_resources.grd b/chrome/browser/autofill/autofill_resources.grd
index efc2e37..31662b4 100644
--- a/chrome/browser/autofill/autofill_resources.grd
+++ b/chrome/browser/autofill/autofill_resources.grd
@@ -97,7 +97,7 @@
first.*name|initials|fname|first$<!-- de-DE -->|vorname<!-- es -->|nombre<!-- fr-FR -->|forename|prénom|prenom<!-- ja-JP -->|名<!-- pt-BR, pt-PT -->|nome<!-- ru -->|Имя<!-- ko-KR -->|이름
</message>
<message name="IDS_AUTOFILL_MIDDLE_INITIAL_RE">
- middle.*initial|m\.i\.|mi$
+ middle.*initial|m\.i\.|mi$|\bmi\b
</message>
<message name="IDS_AUTOFILL_MIDDLE_NAME_RE">
middle.*name|mname|middle$<!-- es -->|apellido.?materno|lastlastname
diff --git a/chrome/browser/autofill/name_field.cc b/chrome/browser/autofill/name_field.cc
index 7bafa15..9d9697b 100644
--- a/chrome/browser/autofill/name_field.cc
+++ b/chrome/browser/autofill/name_field.cc
@@ -157,13 +157,6 @@ FirstLastNameField* FirstLastNameField::ParseComponentNames(
continue;
}
- if (!v->last_name_ &&
- ParseField(scanner,
- l10n_util::GetStringUTF16(IDS_AUTOFILL_LAST_NAME_RE),
- &v->last_name_)) {
- continue;
- }
-
// We check for a middle initial before checking for a middle name
// because at least one page (PC Connection.html) has a field marked
// as both (the label text is "MI" and the element name is
@@ -184,6 +177,13 @@ FirstLastNameField* FirstLastNameField::ParseComponentNames(
continue;
}
+ if (!v->last_name_ &&
+ ParseField(scanner,
+ l10n_util::GetStringUTF16(IDS_AUTOFILL_LAST_NAME_RE),
+ &v->last_name_)) {
+ continue;
+ }
+
break;
}
diff --git a/chrome/renderer/autofill/form_manager.cc b/chrome/renderer/autofill/form_manager.cc
index a442f6e..d63fd46 100644
--- a/chrome/renderer/autofill/form_manager.cc
+++ b/chrome/renderer/autofill/form_manager.cc
@@ -134,8 +134,10 @@ string16 FindChildTextInner(const WebNode& node, int depth) {
string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
TrimPositions child_leading_whitespace =
TrimWhitespace(child_text, TRIM_LEADING, &child_text);
- if (node_trailing_whitespace || child_leading_whitespace)
+ if (node_trailing_whitespace || child_leading_whitespace ||
+ (node.nodeType() == WebNode::TextNode && node_text.empty())) {
node_text += ASCIIToUTF16(" ");
+ }
node_text += child_text;
node_trailing_whitespace =
TrimWhitespace(node_text, TRIM_TRAILING, &node_text);
@@ -145,8 +147,10 @@ string16 FindChildTextInner(const WebNode& node, int depth) {
string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1);
TrimPositions sibling_leading_whitespace =
TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text);
- if (node_trailing_whitespace || sibling_leading_whitespace)
+ if (node_trailing_whitespace || sibling_leading_whitespace ||
+ (node.nodeType() == WebNode::TextNode && node_text.empty())) {
node_text += ASCIIToUTF16(" ");
+ }
node_text += sibling_text;
return node_text;
@@ -246,7 +250,7 @@ string16 InferLabelFromPrevious(const WebFormControlElement& element) {
// or <tr><th>Some Text</th><td><input ...></td></tr>
// or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
// or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
-string16 InferLabelFromTable(const WebFormControlElement& element) {
+string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
WebNode parent = element.parentNode();
while (!parent.isNull() && parent.isElementNode() &&
!parent.to<WebElement>().hasTagName("td")) {
@@ -271,6 +275,33 @@ string16 InferLabelFromTable(const WebFormControlElement& element) {
}
// Helper for |InferLabelForElement()| that infers a label, if possible, from
+// surrounding table structure,
+// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
+string16 InferLabelFromTableRow(const WebFormControlElement& element) {
+ WebNode parent = element.parentNode();
+ while (!parent.isNull() && parent.isElementNode() &&
+ !parent.to<WebElement>().hasTagName("tr")) {
+ parent = parent.parentNode();
+ }
+
+ if (parent.isNull())
+ return string16();
+
+ // Check all previous siblings, skipping non-element nodes, until we find a
+ // non-empty text block.
+ string16 inferred_label;
+ WebNode previous = parent.previousSibling();
+ while (inferred_label.empty() && !previous.isNull()) {
+ if (HasTagName(previous, "tr"))
+ inferred_label = FindChildText(previous.to<WebElement>());
+
+ previous = previous.previousSibling();
+ }
+
+ return inferred_label;
+}
+
+// Helper for |InferLabelForElement()| that infers a label, if possible, from
// a surrounding div table,
// e.g. <div>Some Text<span><input ...></span></div>
// e.g. <div>Some Text</div><div><input ...></div>
@@ -328,7 +359,12 @@ string16 InferLabelForElement(const WebFormControlElement& element) {
return inferred_label;
// If we didn't find a label, check for table cell case.
- inferred_label = InferLabelFromTable(element);
+ inferred_label = InferLabelFromTableColumn(element);
+ if (!inferred_label.empty())
+ return inferred_label;
+
+ // If we didn't find a label, check for table row case.
+ inferred_label = InferLabelFromTableRow(element);
if (!inferred_label.empty())
return inferred_label;
diff --git a/chrome/renderer/autofill/form_manager_browsertest.cc b/chrome/renderer/autofill/form_manager_browsertest.cc
index c930d25..fc0cd22 100644
--- a/chrome/renderer/autofill/form_manager_browsertest.cc
+++ b/chrome/renderer/autofill/form_manager_browsertest.cc
@@ -1507,6 +1507,52 @@ TEST_F(FormManagerTest, LabelsInferredFromTableAdjacentElements) {
labels, names, values);
}
+// Verify that we correctly infer labels when the label text resides in the
+// previous row.
+TEST_F(FormManagerTest, LabelsInferredFromTableRow) {
+ std::vector<string16> labels, names, values;
+
+ labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email"));
+ names.push_back(ASCIIToUTF16("firstname"));
+ values.push_back(ASCIIToUTF16("John"));
+
+ labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email"));
+ names.push_back(ASCIIToUTF16("lastname"));
+ values.push_back(ASCIIToUTF16("Smith"));
+
+ labels.push_back(ASCIIToUTF16("*First Name *Last Name *Email"));
+ names.push_back(ASCIIToUTF16("email"));
+ values.push_back(ASCIIToUTF16("john@example.com"));
+
+ ExpectLabels(
+ "<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">"
+ "<TABLE>"
+ " <TR>"
+ " <TD>*First Name</TD>"
+ " <TD>*Last Name</TD>"
+ " <TD>*Email</TD>"
+ " </TR>"
+ " <TR>"
+ " <TD>"
+ " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>"
+ " </TD>"
+ " <TD>"
+ " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>"
+ " </TD>"
+ " <TD>"
+ " <INPUT type=\"text\" id=\"email\" value=\"john@example.com\"/>"
+ " </TD>"
+ " </TR>"
+ " <TR>"
+ " <TD>"
+ " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>"
+ " </TD>"
+ " </TR>"
+ "</TABLE>"
+ "</FORM>",
+ labels, names, values);
+}
+
TEST_F(FormManagerTest, LabelsInferredFromDefinitionList) {
std::vector<string16> labels, names, values;
diff --git a/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out b/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out
index 1bc9610..5acb519 100644
--- a/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out
+++ b/chrome/test/data/autofill/heuristics/output/04_checkout_jcrew.com.out
@@ -1,7 +1,7 @@
ADDRESS_HOME_COUNTRY
-UNKNOWN_TYPE
-UNKNOWN_TYPE
-UNKNOWN_TYPE
+NAME_FIRST
+NAME_LAST
+COMPANY_NAME
ADDRESS_HOME_LINE1
ADDRESS_HOME_LINE2
ADDRESS_HOME_CITY
diff --git a/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out b/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out
index 8b5e0bf..89eb47b 100644
--- a/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out
+++ b/chrome/test/data/autofill/heuristics/output/06_checkout_petsmart.com.out
@@ -1,4 +1,4 @@
-ADDRESS_HOME_COUNTRY
+ADDRESS_BILLING_COUNTRY
NAME_FIRST
NAME_LAST
ADDRESS_BILLING_LINE1
diff --git a/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out b/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out
index 6347b0d..87068f9 100644
--- a/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out
+++ b/chrome/test/data/autofill/heuristics/output/06_checkout_urbanoutfitters.com.out
@@ -13,9 +13,9 @@ NAME_FIRST
NAME_LAST
UNKNOWN_TYPE
UNKNOWN_TYPE
-ADDRESS_HOME_CITY
-ADDRESS_HOME_STATE
-ADDRESS_HOME_ZIP
+ADDRESS_BILLING_CITY
+ADDRESS_BILLING_STATE
+ADDRESS_BILLING_ZIP
UNKNOWN_TYPE
PHONE_HOME_WHOLE_NUMBER
PHONE_HOME_WHOLE_NUMBER