diff options
author | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-03-11 22:49:04 +0000 |
---|---|---|
committer | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-03-11 22:49:04 +0000 |
commit | 9913ab8cc18beafd08427e3fd9d4786836e69949 (patch) | |
tree | 218e40aae0b1c721639fc864c67c530a47783a44 /chrome | |
parent | 9b0df87239c63ac3facc5a7937903b4eddcb31e4 (diff) | |
download | chromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.zip chromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.tar.gz chromium_src-9913ab8cc18beafd08427e3fd9d4786836e69949.tar.bz2 |
AutoFill field parser fixes.
This fixes a number of form field parsing issues. Ecml field name matching. Adds country field parsing. Adds state field parsing. Multi-line address parsing. Fixes issue with empty field names confusing the parser. Fixes issue with phone field parsing. Adds unit tests in form_structure_unittest.cc to verify parsing order issues. Adds new unit tests address_field_unittest.cc that cover parsing of each address component as well as Ecml variants.
BUG=37776
TEST=FormStructureTest.Heuristics:AddressFieldTest.*, as well as manual steps described in bug report.
Review URL: http://codereview.chromium.org/867003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@41331 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/browser/autofill/address_field.cc | 51 | ||||
-rw-r--r-- | chrome/browser/autofill/address_field.h | 3 | ||||
-rw-r--r-- | chrome/browser/autofill/address_field_unittest.cc | 307 | ||||
-rw-r--r-- | chrome/browser/autofill/form_field.cc | 48 | ||||
-rw-r--r-- | chrome/browser/autofill/form_field.h | 17 | ||||
-rw-r--r-- | chrome/browser/autofill/form_structure_unittest.cc | 98 | ||||
-rw-r--r-- | chrome/browser/autofill/name_field.cc | 2 | ||||
-rw-r--r-- | chrome/browser/autofill/phone_field.cc | 4 | ||||
-rw-r--r-- | chrome/chrome_tests.gypi | 1 |
9 files changed, 501 insertions, 30 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc index 69f9085..5180952 100644 --- a/chrome/browser/autofill/address_field.cc +++ b/chrome/browser/autofill/address_field.cc @@ -66,6 +66,10 @@ bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { AddressField* AddressField::Parse( std::vector<AutoFillField*>::const_iterator* iter, bool is_ecml) { + DCHECK(iter); + if (!iter) + return NULL; + AddressField address_field; std::vector<AutoFillField*>::const_iterator q = *iter; string16 pattern; @@ -92,7 +96,7 @@ AddressField* AddressField::Parse( continue; if ((!address_field.state_ || address_field.state_->IsEmpty()) && - address_field.ParseState(&q)) { + address_field.ParseState(&q, is_ecml, &address_field)) { continue; } @@ -192,13 +196,17 @@ bool AddressField::ParseAddressLines( if (is_ecml) { pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); + if (!ParseText(iter, pattern, &address_field->address1_)) + return false; } else { pattern = - ASCIIToUTF16("@address|street|address line|address1|street_line1"); - } + ASCIIToUTF16("street|address line|address1|street_line1"); + string16 label_pattern = ASCIIToUTF16("address"); - if (!ParseText(iter, pattern, &address_field->address1_)) - return false; + if (!ParseText(iter, pattern, &address_field->address1_)) + if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) + return false; + } // Some pages (e.g. expedia_checkout.html) have an apartment or // suite number at this point. The occasional page (e.g. @@ -213,7 +221,7 @@ bool AddressField::ParseAddressLines( pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); } else { - pattern = ASCIIToUTF16("|address|address2|street|street_line2"); + pattern = ASCIIToUTF16("^$|address|address2|street|street_line2"); } ParseText(iter, pattern, &address_field->address2_); @@ -230,8 +238,16 @@ bool AddressField::ParseCountry( if (address_field->country_ && !address_field->country_->IsEmpty()) return false; - // TODO(jhawkins): Parse the country. - return false; + string16 pattern; + if (is_ecml) + pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); + else + pattern = ASCIIToUTF16("country|location"); + + if (!ParseText(iter, pattern, &address_field->country_)) + return false; + + return true; } // static @@ -257,7 +273,7 @@ bool AddressField::ParseZipCode( pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); } else { - pattern = ASCIIToUTF16("zip|postal|post code|^1z"); + pattern = ASCIIToUTF16("zip|postal|post code|^1z$"); } AddressType tempType; @@ -281,7 +297,7 @@ bool AddressField::ParseZipCode( if (!is_ecml) { // Look for a zip+4, whose field name will also often contain // the substring "zip". - ParseText(iter, ASCIIToUTF16("zip|^-"), &address_field->zip4_); + ParseText(iter, ASCIIToUTF16("zip|^-$"), &address_field->zip4_); } return true; @@ -309,9 +325,18 @@ bool AddressField::ParseCity( } bool AddressField::ParseState( - std::vector<AutoFillField*>::const_iterator* iter) { - // TODO(jhawkins): Parse the state. - return false; + std::vector<AutoFillField*>::const_iterator* iter, + bool is_ecml, AddressField* address_field) { + string16 pattern; + if (is_ecml) + pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); + else + pattern = ASCIIToUTF16("state|county"); + + if (!ParseText(iter, pattern, &address_field->state_)) + return false; + + return true; } AddressType AddressField::AddressTypeFromText(const string16 &text) { diff --git a/chrome/browser/autofill/address_field.h b/chrome/browser/autofill/address_field.h index e95da58..7436e04 100644 --- a/chrome/browser/autofill/address_field.h +++ b/chrome/browser/autofill/address_field.h @@ -43,7 +43,8 @@ class AddressField : public FormField { bool is_ecml, AddressField* address_field); static bool ParseCity(std::vector<AutoFillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field); - bool ParseState(std::vector<AutoFillField*>::const_iterator* iter); + bool ParseState(std::vector<AutoFillField*>::const_iterator* iter, + bool is_ecml, AddressField* address_field); // Looks for an address type in the given text, which the caller must // convert to lowercase. diff --git a/chrome/browser/autofill/address_field_unittest.cc b/chrome/browser/autofill/address_field_unittest.cc new file mode 100644 index 0000000..5f87684 --- /dev/null +++ b/chrome/browser/autofill/address_field_unittest.cc @@ -0,0 +1,307 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/scoped_ptr.h" +#include "base/scoped_vector.h" +#include "chrome/browser/autofill/address_field.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/WebKit/WebKit/chromium/public/WebInputElement.h" +#include "webkit/glue/form_field_values.h" + +namespace { + +class AddressFieldTest : public testing::Test { + public: + AddressFieldTest() {} + + protected: + ScopedVector<AutoFillField> list_; + scoped_ptr<AddressField> field_; + FieldTypeMap field_type_map_; + std::vector<AutoFillField*>::const_iterator iter_; + + private: + DISALLOW_COPY_AND_ASSIGN(AddressFieldTest); +}; + +TEST_F(AddressFieldTest, DeathOnNull) { + ASSERT_DEBUG_DEATH(AddressField::Parse(NULL, false), ""); +} + +TEST_F(AddressFieldTest, Empty) { + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_EQ(static_cast<AddressField*>(NULL), field_.get()); +} + +TEST_F(AddressFieldTest, NonParse) { + list_.push_back(new AutoFillField); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_EQ(static_cast<AddressField*>(NULL), field_.get()); +} + +TEST_F(AddressFieldTest, ParseOneLineAddress) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"), + ASCIIToUTF16("address"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kShippingAddress, field_->FindType()); + EXPECT_TRUE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]); +} + +TEST_F(AddressFieldTest, ParseOneLineAddressEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"), + kEcmlShipToAddress1, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kShippingAddress, field_->FindType()); + EXPECT_TRUE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]); +} + +TEST_F(AddressFieldTest, ParseTwoLineAddress) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"), + ASCIIToUTF16("address"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr1"))); + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16(""), + ASCIIToUTF16(""), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr2"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kShippingAddress, field_->FindType()); + EXPECT_TRUE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr2")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE2, field_type_map_[ASCIIToUTF16("addr2")]); +} + +TEST_F(AddressFieldTest, ParseTwoLineAddressEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Address"), + kEcmlShipToAddress1, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr1"))); + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16(""), + kEcmlShipToAddress2, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("addr2"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kShippingAddress, field_->FindType()); + EXPECT_TRUE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE1, field_type_map_[ASCIIToUTF16("addr1")]); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("addr2")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_LINE2, field_type_map_[ASCIIToUTF16("addr2")]); +} + +TEST_F(AddressFieldTest, ParseCity) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("City"), + ASCIIToUTF16("city"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("city1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_CITY, field_type_map_[ASCIIToUTF16("city1")]); +} + +TEST_F(AddressFieldTest, ParseCityEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("City"), + kEcmlShipToCity, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("city1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("city1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_CITY, field_type_map_[ASCIIToUTF16("city1")]); +} + +TEST_F(AddressFieldTest, ParseState) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("State"), + ASCIIToUTF16("state"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("state1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_STATE, field_type_map_[ASCIIToUTF16("state1")]); +} + +TEST_F(AddressFieldTest, ParseStateEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("State"), + kEcmlShipToStateProv, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("state1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("state1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_STATE, field_type_map_[ASCIIToUTF16("state1")]); +} + +TEST_F(AddressFieldTest, ParseZip) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Zip"), + ASCIIToUTF16("zip"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("zip1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_ZIP, field_type_map_[ASCIIToUTF16("zip1")]); +} + +TEST_F(AddressFieldTest, ParseZipEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Zip"), + kEcmlShipToPostalCode, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("zip1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("zip1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_ZIP, field_type_map_[ASCIIToUTF16("zip1")]); +} + +TEST_F(AddressFieldTest, ParseCountry) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Country"), + ASCIIToUTF16("country"), + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("country1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, false)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_COUNTRY, field_type_map_[ASCIIToUTF16("country1")]); +} + +TEST_F(AddressFieldTest, ParseCountryEcml) { + list_.push_back( + new AutoFillField(webkit_glue::FormField(ASCIIToUTF16("Country"), + kEcmlShipToCountry, + string16(), + ASCIIToUTF16("text"), + WebKit::WebInputElement::Text), + ASCIIToUTF16("country1"))); + list_.push_back(NULL); + iter_ = list_.begin(); + field_.reset(AddressField::Parse(&iter_, true)); + ASSERT_NE(static_cast<AddressField*>(NULL), field_.get()); + EXPECT_EQ(kGenericAddress, field_->FindType()); + EXPECT_FALSE(field_->IsFullAddress()); + ASSERT_TRUE(field_->GetFieldInfo(&field_type_map_)); + ASSERT_TRUE( + field_type_map_.find(ASCIIToUTF16("country1")) != field_type_map_.end()); + EXPECT_EQ(ADDRESS_HOME_COUNTRY, field_type_map_[ASCIIToUTF16("country1")]); +} + +} // namespace diff --git a/chrome/browser/autofill/form_field.cc b/chrome/browser/autofill/form_field.cc index b4d20b1..0aa8733 100644 --- a/chrome/browser/autofill/form_field.cc +++ b/chrome/browser/autofill/form_field.cc @@ -43,16 +43,24 @@ class EmailField : public FormField { }; // static -bool FormField::Match(AutoFillField* field, const string16& pattern) { +bool FormField::Match(AutoFillField* field, + const string16& pattern, + bool match_label_only) { WebKit::WebRegularExpression re(WebKit::WebString(pattern), WebKit::WebTextCaseInsensitive); - // For now, we apply the same pattern to the field's label and the field's - // name. Matching the name is a bit of a long shot for many patterns, but - // it generally doesn't hurt to try. - if (re.match(WebKit::WebString(field->label())) != -1 || - re.match(WebKit::WebString(field->name())) != -1) { - return true; + if (match_label_only) { + if (re.match(WebKit::WebString(field->label())) != -1) { + return true; + } + } else { + // For now, we apply the same pattern to the field's label and the field's + // name. Matching the name is a bit of a long shot for many patterns, but + // it generally doesn't hurt to try. + if (re.match(WebKit::WebString(field->label())) != -1 || + re.match(WebKit::WebString(field->name())) != -1) { + return true; + } } return false; @@ -92,11 +100,27 @@ bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, const string16& pattern, AutoFillField** dest) { + return ParseText(iter, pattern, dest, false); +} + +// static +bool FormField::ParseLabelText( + std::vector<AutoFillField*>::const_iterator* iter, + const string16& pattern, + AutoFillField** dest) { + return ParseText(iter, pattern, dest, true); +} + +// static +bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, + const string16& pattern, + AutoFillField** dest, + bool match_label_only) { AutoFillField* field = **iter; if (!field) return false; - if (Match(field, pattern)) { + if (Match(field, pattern, match_label_only)) { *dest = field; (*iter)++; return true; @@ -108,7 +132,7 @@ bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, // static bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) { // TODO(jhawkins): Handle select fields. - return ParseText(iter, ASCIIToUTF16("")); + return ParseText(iter, ASCIIToUTF16("^$")); } // static @@ -122,14 +146,14 @@ bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field, } string16 FormField::GetEcmlPattern(const string16& ecml_name) { - return ASCIIToUTF16("&") + ecml_name; + return ASCIIToUTF16("^") + ecml_name; } string16 FormField::GetEcmlPattern(const string16& ecml_name1, const string16& ecml_name2, string16::value_type pattern_operator) { - string16 ampersand = ASCIIToUTF16("&"); - return ampersand + ecml_name1 + pattern_operator + ampersand + ecml_name2; + string16 begins_with = ASCIIToUTF16("^"); + return begins_with + ecml_name1 + pattern_operator + begins_with + ecml_name2; } FormFieldSet::FormFieldSet(FormStructure* fields) { diff --git a/chrome/browser/autofill/form_field.h b/chrome/browser/autofill/form_field.h index 78d2109..d702719 100644 --- a/chrome/browser/autofill/form_field.h +++ b/chrome/browser/autofill/form_field.h @@ -82,7 +82,10 @@ class FormField { virtual FormFieldType GetFormFieldType() const { return kOtherFieldType; } // Returns true if |field| contains the regexp |pattern| in the name or label. - static bool Match(AutoFillField* field, const string16& pattern); + // If |match_label_only| is true, then only the field's label is considered. + static bool Match(AutoFillField* field, + const string16& pattern, + bool match_label_only); // Parses a field using the different field views we know about. |is_ecml| // should be true when the field conforms to the ECML specification. @@ -101,6 +104,12 @@ class FormField { const string16& pattern, AutoFillField** dest); + // Attempts to parse a text field label with the given pattern. Returns true + // on success and fills |dest| with a pointer to the field. + static bool ParseLabelText(std::vector<AutoFillField*>::const_iterator* iter, + const string16& pattern, + AutoFillField** dest); + // Attempts to parse a control with an empty label. static bool ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter); @@ -120,6 +129,12 @@ class FormField { static string16 GetEcmlPattern(const string16& ecml_name1, const string16& ecml_name2, string16::value_type pattern_operator); + + private: + static bool ParseText(std::vector<AutoFillField*>::const_iterator* iter, + const string16& pattern, + AutoFillField** dest, + bool match_label_only); }; class FormFieldSet : public std::vector<FormField*> { diff --git a/chrome/browser/autofill/form_structure_unittest.cc b/chrome/browser/autofill/form_structure_unittest.cc index 7f1fd37..3d7df4c 100644 --- a/chrome/browser/autofill/form_structure_unittest.cc +++ b/chrome/browser/autofill/form_structure_unittest.cc @@ -12,6 +12,8 @@ using WebKit::WebInputElement; +namespace { + TEST(FormStructureTest, FieldCount) { webkit_glue::FormFieldValues values; values.method = ASCIIToUTF16("post"); @@ -91,3 +93,99 @@ TEST(FormStructureTest, IsAutoFillable) { form_structure.reset(new FormStructure(values)); EXPECT_TRUE(form_structure->IsAutoFillable()); } + +TEST(FormStructureTest, Heuristics) { + scoped_ptr<FormStructure> form_structure; + webkit_glue::FormFieldValues values; + + values.method = ASCIIToUTF16("post"); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("First Name"), + ASCIIToUTF16("firstname"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Last Name"), + ASCIIToUTF16("lastname"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("EMail"), + ASCIIToUTF16("email"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Phone"), + ASCIIToUTF16("phone"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Fax"), + ASCIIToUTF16("fax"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Address"), + ASCIIToUTF16("address"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("City"), + ASCIIToUTF16("city"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(ASCIIToUTF16("Zip code"), + ASCIIToUTF16("zipcode"), + string16(), + ASCIIToUTF16("text"), + WebInputElement::Text)); + values.elements.push_back(webkit_glue::FormField(string16(), + ASCIIToUTF16("Submit"), + string16(), + ASCIIToUTF16("submit"), + WebInputElement::Submit)); + form_structure.reset(new FormStructure(values)); + EXPECT_TRUE(form_structure->IsAutoFillable()); + + // Expect the correct number of fields. + ASSERT_EQ(8UL, form_structure->field_count()); + + // Check that heuristics are initialized as UNKNOWN_TYPE. + std::vector<AutoFillField*>::const_iterator iter; + size_t i; + for (iter = form_structure->begin(), i = 0; + iter != form_structure->end(); + ++iter, ++i) { + // Expect last element to be NULL. + if (i == form_structure->field_count()) { + ASSERT_EQ(static_cast<AutoFillField*>(NULL), *iter); + } else { + ASSERT_NE(static_cast<AutoFillField*>(NULL), *iter); + EXPECT_EQ(UNKNOWN_TYPE, (*iter)->heuristic_type()); + } + } + + // Compute heuristic types. + form_structure->GetHeuristicAutoFillTypes(); + + // Check that heuristics are no longer UNKNOWN_TYPE. + // First name. + EXPECT_EQ(NAME_FIRST, form_structure->field(0)->heuristic_type()); + // Last name. + EXPECT_EQ(NAME_LAST, form_structure->field(1)->heuristic_type()); + // Email. + EXPECT_EQ(EMAIL_ADDRESS, form_structure->field(2)->heuristic_type()); + // Phone. + EXPECT_EQ(PHONE_HOME_WHOLE_NUMBER, + form_structure->field(3)->heuristic_type()); + // Fax. Note, we don't currently match fax. + EXPECT_EQ(UNKNOWN_TYPE, form_structure->field(4)->heuristic_type()); + // Address. + EXPECT_EQ(ADDRESS_HOME_LINE1, form_structure->field(5)->heuristic_type()); + // City. + EXPECT_EQ(ADDRESS_HOME_CITY, form_structure->field(6)->heuristic_type()); + // Zip. + EXPECT_EQ(ADDRESS_HOME_ZIP, form_structure->field(7)->heuristic_type()); +} + +} // namespace diff --git a/chrome/browser/autofill/name_field.cc b/chrome/browser/autofill/name_field.cc index 3510e16..2eef561 100644 --- a/chrome/browser/autofill/name_field.cc +++ b/chrome/browser/autofill/name_field.cc @@ -21,7 +21,7 @@ FullNameField* FullNameField::Parse( // Exclude labels containing the string "username", which typically // denotes a login ID rather than the user's actual name. AutoFillField* field = **iter; - if (Match(field, ASCIIToUTF16("username"))) + if (Match(field, ASCIIToUTF16("username"), false)) return NULL; // Searching for any label containing the word "name" is too general; diff --git a/chrome/browser/autofill/phone_field.cc b/chrome/browser/autofill/phone_field.cc index 00ff7ba..ea9ec71c 100644 --- a/chrome/browser/autofill/phone_field.cc +++ b/chrome/browser/autofill/phone_field.cc @@ -42,11 +42,11 @@ PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, // uk/Furniture123-1.html) have several phone numbers in succession and we // don't want those to be parsed as components of a single phone number. if (phone2 == NULL) - ParseText(&q, ASCIIToUTF16("^-|\\)|"), &phone2); + ParseText(&q, ASCIIToUTF16("^-$|\\)$"), &phone2); // Look for a third text box. if (phone2) - ParseText(&q, ASCIIToUTF16("^-|"), &phone3); + ParseText(&q, ASCIIToUTF16("^-$"), &phone3); // Now we have one, two, or three phone number text fields. Package them // up into a PhoneField object. diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index 0387d50..234fb1b 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -535,6 +535,7 @@ 'browser/autocomplete/history_url_provider_unittest.cc', 'browser/autocomplete/keyword_provider_unittest.cc', 'browser/autocomplete/search_provider_unittest.cc', + 'browser/autofill/address_field_unittest.cc', 'browser/autofill/autofill_common_unittest.cc', 'browser/autofill/autofill_common_unittest.h', 'browser/autofill/autofill_address_model_mac_unittest.mm', |