summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authordhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-03-11 22:49:04 +0000
committerdhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-03-11 22:49:04 +0000
commit9913ab8cc18beafd08427e3fd9d4786836e69949 (patch)
tree218e40aae0b1c721639fc864c67c530a47783a44 /chrome
parent9b0df87239c63ac3facc5a7937903b4eddcb31e4 (diff)
downloadchromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.zip
chromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.tar.gz
chromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.tar.bz2
AutoFill field parser fixes.
This fixes a number of form field parsing issues. Ecml field name matching. Adds country field parsing. Adds state field parsing. Multi-line address parsing. Fixes issue with empty field names confusing the parser. Fixes issue with phone field parsing. Adds unit tests in form_structure_unittest.cc to verify parsing order issues. Adds new unit tests address_field_unittest.cc that cover parsing of each address component as well as Ecml variants. BUG=37776 TEST=FormStructureTest.Heuristics:AddressFieldTest.*, as well as manual steps described in bug report. Review URL: http://codereview.chromium.org/867003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@41331 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/autofill/address_field.cc51
-rw-r--r--chrome/browser/autofill/address_field.h3
-rw-r--r--chrome/browser/autofill/address_field_unittest.cc307
-rw-r--r--chrome/browser/autofill/form_field.cc48
-rw-r--r--chrome/browser/autofill/form_field.h17
-rw-r--r--chrome/browser/autofill/form_structure_unittest.cc98
-rw-r--r--chrome/browser/autofill/name_field.cc2
-rw-r--r--chrome/browser/autofill/phone_field.cc4
-rw-r--r--chrome/chrome_tests.gypi1
9 files changed, 501 insertions, 30 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc
index 69f9085..5180952 100644
--- a/chrome/browser/autofill/address_field.cc
+++ b/chrome/browser/autofill/address_field.cc
@@ -66,6 +66,10 @@ bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
AddressField* AddressField::Parse(
std::vector<AutoFillField*>::const_iterator* iter,
bool is_ecml) {
+ DCHECK(iter);
+ if (!iter)
+ return NULL;
+
AddressField address_field;
std::vector<AutoFillField*>::const_iterator q = *iter;
string16 pattern;
@@ -92,7 +96,7 @@ AddressField* AddressField::Parse(
continue;
if ((!address_field.state_ || address_field.state_->IsEmpty()) &&
- address_field.ParseState(&q)) {
+ address_field.ParseState(&q, is_ecml, &address_field)) {
continue;
}
@@ -192,13 +196,17 @@ bool AddressField::ParseAddressLines(
if (is_ecml) {
pattern = GetEcmlPattern(kEcmlShipToAddress1,
kEcmlBillToAddress1, '|');
+ if (!ParseText(iter, pattern, &address_field->address1_))
+ return false;
} else {
pattern =
- ASCIIToUTF16("@address|street|address line|address1|street_line1");
- }
+ ASCIIToUTF16("street|address line|address1|street_line1");
+ string16 label_pattern = ASCIIToUTF16("address");
- if (!ParseText(iter, pattern, &address_field->address1_))
- return false;
+ if (!ParseText(iter, pattern, &address_field->address1_))
+ if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
+ return false;
+ }
// Some pages (e.g. expedia_checkout.html) have an apartment or
// suite number at this point. The occasional page (e.g.
@@ -213,7 +221,7 @@ bool AddressField::ParseAddressLines(
pattern = GetEcmlPattern(kEcmlShipToAddress2,
kEcmlBillToAddress2, '|');
} else {
- pattern = ASCIIToUTF16("|address|address2|street|street_line2");
+ pattern = ASCIIToUTF16("^$|address|address2|street|street_line2");
}
ParseText(iter, pattern, &address_field->address2_);
@@ -230,8 +238,16 @@ bool AddressField::ParseCountry(
if (address_field->country_ && !address_field->country_->IsEmpty())
return false;
- // TODO(jhawkins): Parse the country.
- return false;
+ string16 pattern;
+ if (is_ecml)
+ pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
+ else
+ pattern = ASCIIToUTF16("country|location");
+
+ if (!ParseText(iter, pattern, &address_field->country_))
+ return false;
+
+ return true;
}
// static
@@ -257,7 +273,7 @@ bool AddressField::ParseZipCode(
pattern = GetEcmlPattern(kEcmlShipToPostalCode,
kEcmlBillToPostalCode, '|');
} else {
- pattern = ASCIIToUTF16("zip|postal|post code|^1z");
+ pattern = ASCIIToUTF16("zip|postal|post code|^1z$");
}
AddressType tempType;
@@ -281,7 +297,7 @@ bool AddressField::ParseZipCode(
if (!is_ecml) {
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
- ParseText(iter, ASCIIToUTF16("zip|^-"), &address_field->zip4_);
+ ParseText(iter, ASCIIToUTF16("zip|^-$"), &address_field->zip4_);
}
return true;
@@ -309,9 +325,18 @@ bool AddressField::ParseCity(
}
bool AddressField::ParseState(
- std::vector<AutoFillField*>::const_iterator* iter) {
- // TODO(jhawkins): Parse the state.
- return false;
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field) {
+ string16 pattern;
+ if (is_ecml)
+ pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
+ else
+ pattern = ASCIIToUTF16("state|county");
+
+ if (!ParseText(iter, pattern, &address_field->state_))
+ return false;
+
+ return true;
}
AddressType AddressField::AddressTypeFromText(const string16 &text) {
diff --git a/chrome/browser/autofill/address_field.h b/chrome/browser/autofill/address_field.h
index e95da58..7436e04 100644
--- a/chrome/browser/autofill/address_field.h
+++ b/chrome/browser/autofill/address_field.h
@@ -43,7 +43,8 @@ class AddressField : public FormField {
bool is_ecml, AddressField* address_field);
static bool ParseCity(std::vector<AutoFillField*>::const_iterator* iter,
bool is_ecml, AddressField* address_field);
- bool ParseState(std::vector<AutoFillField*>::const_iterator* iter);
+ bool ParseState(std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field);
// Looks for an address type in the given text, which the caller must
// convert to lowercase.
diff --git a/chrome/browser/autofill/address_field_unittest.cc b/chrome/browser/autofill/address_field_unittest.cc
new file mode 100644
index 0000000..5f87684
--- /dev/null
+++ b/chrome/browser/autofill/address_field_unittest.cc
@@ -0,0 +1,307 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/scoped_ptr.h"
+#include "base/scoped_vector.h"
+#include "chrome/browser/autofill/address_field.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebInputElement.h"
+#include "webkit/glue/form_field_values.h"
+
+namespace {
+
+class AddressFieldTest : public testing::Test {
+ public:
+ AddressFieldTest() {}
+
+ protected:
+ ScopedVector<AutoFillField> list_;
+ scoped_ptr<AddressField> field_;
+ FieldTypeMap field_type_map_;
+ std::vector<AutoFillField*>::const_iterator iter_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(AddressFieldTest);
+};
+
+TEST_F(AddressFieldTest, DeathOnNull) {
+ ASSERT_DEBUG_DEATH(AddressField::Parse(NULL, false), "");
+}
+
+TEST_F(AddressFieldTest, Empty) {
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_EQ(static_cast<AddressField*>(NULL), field_.get());
+}
+
+TEST_F(AddressFieldTest, NonParse) {
+ list_.push_back(new AutoFillField);
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_EQ(static_cast<AddressField*>(NULL), field_.get());
+}
+
+TEST_F(AddressFieldTest, ParseOneLineAddress) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"),
+ ASCIIToUTF16("address"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
+ EXPECT_TRUE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]);
+}
+
+TEST_F(AddressFieldTest, ParseOneLineAddressEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"),
+ kEcmlShipToAddress1,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
+ EXPECT_TRUE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]);
+}
+
+TEST_F(AddressFieldTest, ParseTwoLineAddress) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"),
+ ASCIIToUTF16("address"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr1")));
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16(""),
+ ASCIIToUTF16(""),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr2")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
+ EXPECT_TRUE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]);
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr2")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE2, field_type_map_[ASCIIToUTF16("addr2")]);
+}
+
+TEST_F(AddressFieldTest, ParseTwoLineAddressEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"),
+ kEcmlShipToAddress1,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr1")));
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16(""),
+ kEcmlShipToAddress2,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("addr2")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kShippingAddress, field_->FindType());
+ EXPECT_TRUE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]);
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("addr2")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_LINE2, field_type_map_[ASCIIToUTF16("addr2")]);
+}
+
+TEST_F(AddressFieldTest, ParseCity) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("City"),
+ ASCIIToUTF16("city"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("city1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_CITY, field_type_map_[ASCIIToUTF16("city1")]);
+}
+
+TEST_F(AddressFieldTest, ParseCityEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("City"),
+ kEcmlShipToCity,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("city1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_CITY, field_type_map_[ASCIIToUTF16("city1")]);
+}
+
+TEST_F(AddressFieldTest, ParseState) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("State"),
+ ASCIIToUTF16("state"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("state1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_STATE, field_type_map_[ASCIIToUTF16("state1")]);
+}
+
+TEST_F(AddressFieldTest, ParseStateEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("State"),
+ kEcmlShipToStateProv,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("state1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_STATE, field_type_map_[ASCIIToUTF16("state1")]);
+}
+
+TEST_F(AddressFieldTest, ParseZip) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Zip"),
+ ASCIIToUTF16("zip"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("zip1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_ZIP, field_type_map_[ASCIIToUTF16("zip1")]);
+}
+
+TEST_F(AddressFieldTest, ParseZipEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Zip"),
+ kEcmlShipToPostalCode,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("zip1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_ZIP, field_type_map_[ASCIIToUTF16("zip1")]);
+}
+
+TEST_F(AddressFieldTest, ParseCountry) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Country"),
+ ASCIIToUTF16("country"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("country1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, false));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_COUNTRY, field_type_map_[ASCIIToUTF16("country1")]);
+}
+
+TEST_F(AddressFieldTest, ParseCountryEcml) {
+ list_.push_back(
+ new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Country"),
+ kEcmlShipToCountry,
+ string16(),
+ ASCIIToUTF16("text"),
+ WebKit::WebInputElement::Text),
+ ASCIIToUTF16("country1")));
+ list_.push_back(NULL);
+ iter_ = list_.begin();
+ field_.reset(AddressField::Parse(&iter_, true));
+ ASSERT_NE(static_cast<AddressField*>(NULL), field_.get());
+ EXPECT_EQ(kGenericAddress, field_->FindType());
+ EXPECT_FALSE(field_->IsFullAddress());
+ ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_));
+ ASSERT_TRUE(
+ field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end());
+ EXPECT_EQ(ADDRESS_HOME_COUNTRY, field_type_map_[ASCIIToUTF16("country1")]);
+}
+
+} // namespace
diff --git a/chrome/browser/autofill/form_field.cc b/chrome/browser/autofill/form_field.cc
index b4d20b1..0aa8733 100644
--- a/chrome/browser/autofill/form_field.cc
+++ b/chrome/browser/autofill/form_field.cc
@@ -43,16 +43,24 @@ class EmailField : public FormField {
};
// static
-bool FormField::Match(AutoFillField* field, const string16& pattern) {
+bool FormField::Match(AutoFillField* field,
+ const string16& pattern,
+ bool match_label_only) {
WebKit::WebRegularExpression re(WebKit::WebString(pattern),
WebKit::WebTextCaseInsensitive);
- // For now, we apply the same pattern to the field's label and the field's
- // name. Matching the name is a bit of a long shot for many patterns, but
- // it generally doesn't hurt to try.
- if (re.match(WebKit::WebString(field->label())) != -1 ||
- re.match(WebKit::WebString(field->name())) != -1) {
- return true;
+ if (match_label_only) {
+ if (re.match(WebKit::WebString(field->label())) != -1) {
+ return true;
+ }
+ } else {
+ // For now, we apply the same pattern to the field's label and the field's
+ // name. Matching the name is a bit of a long shot for many patterns, but
+ // it generally doesn't hurt to try.
+ if (re.match(WebKit::WebString(field->label())) != -1 ||
+ re.match(WebKit::WebString(field->name())) != -1) {
+ return true;
+ }
}
return false;
@@ -92,11 +100,27 @@ bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
const string16& pattern,
AutoFillField** dest) {
+ return ParseText(iter, pattern, dest, false);
+}
+
+// static
+bool FormField::ParseLabelText(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ const string16& pattern,
+ AutoFillField** dest) {
+ return ParseText(iter, pattern, dest, true);
+}
+
+// static
+bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
+ const string16& pattern,
+ AutoFillField** dest,
+ bool match_label_only) {
AutoFillField* field = **iter;
if (!field)
return false;
- if (Match(field, pattern)) {
+ if (Match(field, pattern, match_label_only)) {
*dest = field;
(*iter)++;
return true;
@@ -108,7 +132,7 @@ bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
// static
bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) {
// TODO(jhawkins): Handle select fields.
- return ParseText(iter, ASCIIToUTF16(""));
+ return ParseText(iter, ASCIIToUTF16("^$"));
}
// static
@@ -122,14 +146,14 @@ bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field,
}
string16 FormField::GetEcmlPattern(const string16& ecml_name) {
- return ASCIIToUTF16("&") + ecml_name;
+ return ASCIIToUTF16("^") + ecml_name;
}
string16 FormField::GetEcmlPattern(const string16& ecml_name1,
const string16& ecml_name2,
string16::value_type pattern_operator) {
- string16 ampersand = ASCIIToUTF16("&");
- return ampersand + ecml_name1 + pattern_operator + ampersand + ecml_name2;
+ string16 begins_with = ASCIIToUTF16("^");
+ return begins_with + ecml_name1 + pattern_operator + begins_with + ecml_name2;
}
FormFieldSet::FormFieldSet(FormStructure* fields) {
diff --git a/chrome/browser/autofill/form_field.h b/chrome/browser/autofill/form_field.h
index 78d2109..d702719 100644
--- a/chrome/browser/autofill/form_field.h
+++ b/chrome/browser/autofill/form_field.h
@@ -82,7 +82,10 @@ class FormField {
virtual FormFieldType GetFormFieldType() const { return kOtherFieldType; }
// Returns true if |field| contains the regexp |pattern| in the name or label.
- static bool Match(AutoFillField* field, const string16& pattern);
+ // If |match_label_only| is true, then only the field's label is considered.
+ static bool Match(AutoFillField* field,
+ const string16& pattern,
+ bool match_label_only);
// Parses a field using the different field views we know about. |is_ecml|
// should be true when the field conforms to the ECML specification.
@@ -101,6 +104,12 @@ class FormField {
const string16& pattern,
AutoFillField** dest);
+ // Attempts to parse a text field label with the given pattern. Returns true
+ // on success and fills |dest| with a pointer to the field.
+ static bool ParseLabelText(std::vector<AutoFillField*>::const_iterator* iter,
+ const string16& pattern,
+ AutoFillField** dest);
+
// Attempts to parse a control with an empty label.
static bool ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter);
@@ -120,6 +129,12 @@ class FormField {
static string16 GetEcmlPattern(const string16& ecml_name1,
const string16& ecml_name2,
string16::value_type pattern_operator);
+
+ private:
+ static bool ParseText(std::vector<AutoFillField*>::const_iterator* iter,
+ const string16& pattern,
+ AutoFillField** dest,
+ bool match_label_only);
};
class FormFieldSet : public std::vector<FormField*> {
diff --git a/chrome/browser/autofill/form_structure_unittest.cc b/chrome/browser/autofill/form_structure_unittest.cc
index 7f1fd37..3d7df4c 100644
--- a/chrome/browser/autofill/form_structure_unittest.cc
+++ b/chrome/browser/autofill/form_structure_unittest.cc
@@ -12,6 +12,8 @@
using WebKit::WebInputElement;
+namespace {
+
TEST(FormStructureTest, FieldCount) {
webkit_glue::FormFieldValues values;
values.method = ASCIIToUTF16("post");
@@ -91,3 +93,99 @@ TEST(FormStructureTest, IsAutoFillable) {
form_structure.reset(new FormStructure(values));
EXPECT_TRUE(form_structure->IsAutoFillable());
}
+
+TEST(FormStructureTest, Heuristics) {
+ scoped_ptr<FormStructure> form_structure;
+ webkit_glue::FormFieldValues values;
+
+ values.method = ASCIIToUTF16("post");
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("First Name"),
+ ASCIIToUTF16("firstname"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Last Name"),
+ ASCIIToUTF16("lastname"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("EMail"),
+ ASCIIToUTF16("email"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Phone"),
+ ASCIIToUTF16("phone"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Fax"),
+ ASCIIToUTF16("fax"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Address"),
+ ASCIIToUTF16("address"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("City"),
+ ASCIIToUTF16("city"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Zip code"),
+ ASCIIToUTF16("zipcode"),
+ string16(),
+ ASCIIToUTF16("text"),
+ WebInputElement::Text));
+ values.elements.push_back(webkit_glue::FormField(string16(),
+ ASCIIToUTF16("Submit"),
+ string16(),
+ ASCIIToUTF16("submit"),
+ WebInputElement::Submit));
+ form_structure.reset(new FormStructure(values));
+ EXPECT_TRUE(form_structure->IsAutoFillable());
+
+ // Expect the correct number of fields.
+ ASSERT_EQ(8UL, form_structure->field_count());
+
+ // Check that heuristics are initialized as UNKNOWN_TYPE.
+ std::vector<AutoFillField*>::const_iterator iter;
+ size_t i;
+ for (iter = form_structure->begin(), i = 0;
+ iter != form_structure->end();
+ ++iter, ++i) {
+ // Expect last element to be NULL.
+ if (i == form_structure->field_count()) {
+ ASSERT_EQ(static_cast<AutoFillField*>(NULL), *iter);
+ } else {
+ ASSERT_NE(static_cast<AutoFillField*>(NULL), *iter);
+ EXPECT_EQ(UNKNOWN_TYPE, (*iter)->heuristic_type());
+ }
+ }
+
+ // Compute heuristic types.
+ form_structure->GetHeuristicAutoFillTypes();
+
+ // Check that heuristics are no longer UNKNOWN_TYPE.
+ // First name.
+ EXPECT_EQ(NAME_FIRST, form_structure->field(0)->heuristic_type());
+ // Last name.
+ EXPECT_EQ(NAME_LAST, form_structure->field(1)->heuristic_type());
+ // Email.
+ EXPECT_EQ(EMAIL_ADDRESS, form_structure->field(2)->heuristic_type());
+ // Phone.
+ EXPECT_EQ(PHONE_HOME_WHOLE_NUMBER,
+ form_structure->field(3)->heuristic_type());
+ // Fax. Note, we don't currently match fax.
+ EXPECT_EQ(UNKNOWN_TYPE, form_structure->field(4)->heuristic_type());
+ // Address.
+ EXPECT_EQ(ADDRESS_HOME_LINE1, form_structure->field(5)->heuristic_type());
+ // City.
+ EXPECT_EQ(ADDRESS_HOME_CITY, form_structure->field(6)->heuristic_type());
+ // Zip.
+ EXPECT_EQ(ADDRESS_HOME_ZIP, form_structure->field(7)->heuristic_type());
+}
+
+} // namespace
diff --git a/chrome/browser/autofill/name_field.cc b/chrome/browser/autofill/name_field.cc
index 3510e16..2eef561 100644
--- a/chrome/browser/autofill/name_field.cc
+++ b/chrome/browser/autofill/name_field.cc
@@ -21,7 +21,7 @@ FullNameField* FullNameField::Parse(
// Exclude labels containing the string "username", which typically
// denotes a login ID rather than the user's actual name.
AutoFillField* field = **iter;
- if (Match(field, ASCIIToUTF16("username")))
+ if (Match(field, ASCIIToUTF16("username"), false))
return NULL;
// Searching for any label containing the word "name" is too general;
diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc
index 00ff7ba..ea9ec71c 100644
--- a/chrome/browser/autofill/phone_field.cc
+++ b/chrome/browser/autofill/phone_field.cc
@@ -42,11 +42,11 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter,
// uk/Furniture123-1.html) have several phone numbers in succession and we
// don't want those to be parsed as components of a single phone number.
if (phone2 == NULL)
- ParseText(&q, ASCIIToUTF16("^-|\\)|"), &phone2);
+ ParseText(&q, ASCIIToUTF16("^-$|\\)$"), &phone2);
// Look for a third text box.
if (phone2)
- ParseText(&q, ASCIIToUTF16("^-|"), &phone3);
+ ParseText(&q, ASCIIToUTF16("^-$"), &phone3);
// Now we have one, two, or three phone number text fields. Package them
// up into a PhoneField object.
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 0387d50..234fb1b 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -535,6 +535,7 @@
'browser/autocomplete/history_url_provider_unittest.cc',
'browser/autocomplete/keyword_provider_unittest.cc',
'browser/autocomplete/search_provider_unittest.cc',
+ 'browser/autofill/address_field_unittest.cc',
'browser/autofill/autofill_common_unittest.cc',
'browser/autofill/autofill_common_unittest.h',
'browser/autofill/autofill_address_model_mac_unittest.mm',