summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhawkins@chromium.org <jhawkins@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-15 22:18:37 +0000
committerjhawkins@chromium.org <jhawkins@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-15 22:18:37 +0000
commitd7240bfe793d42544e2256eb45367f01c7f2423e (patch)
treef47b56d7f255a5adc643faab9491a027920ccb35
parentf2ece936f1ec85e684fe0a0712256106272190d1 (diff)
downloadchromium_src-d7240bfe793d42544e2256eb45367f01c7f2423e.zip
chromium_src-d7240bfe793d42544e2256eb45367f01c7f2423e.tar.gz
chromium_src-d7240bfe793d42544e2256eb45367f01c7f2423e.tar.bz2
Implement AddressField, a FormField that matches a set of address fields in a form.
BUG=none TEST=none Review URL: http://codereview.chromium.org/492022 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@34614 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/autofill/address_field.cc334
-rw-r--r--chrome/browser/autofill/address_field.h65
-rw-r--r--chrome/browser/autofill/form_field.cc6
-rwxr-xr-xchrome/chrome_browser.gypi2
4 files changed, 405 insertions, 2 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc
new file mode 100644
index 0000000..f2a89c2
--- /dev/null
+++ b/chrome/browser/autofill/address_field.cc
@@ -0,0 +1,334 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/autofill/address_field.h"
+
+#include "base/logging.h"
+#include "base/string16.h"
+#include "base/string_util.h"
+#include "chrome/browser/autofill/autofill_field.h"
+
+bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
+ AutoFillFieldType address_line1;
+ AutoFillFieldType address_line2;
+ AutoFillFieldType address_appt_num;
+ AutoFillFieldType address_city;
+ AutoFillFieldType address_state;
+ AutoFillFieldType address_zip;
+ AutoFillFieldType address_country;
+
+ switch (type_) {
+ case kShippingAddress:
+ // Fallthrough. Autofill no longer supports shipping addresses.
+ case kGenericAddress:
+ address_line1 = ADDRESS_HOME_LINE1;
+ address_line2 = ADDRESS_HOME_LINE2;
+ address_appt_num = ADDRESS_HOME_APPT_NUM;
+ address_city = ADDRESS_HOME_CITY;
+ address_state = ADDRESS_HOME_STATE;
+ address_zip = ADDRESS_HOME_ZIP;
+ address_country = ADDRESS_HOME_COUNTRY;
+ break;
+
+ case kBillingAddress:
+ address_line1 = ADDRESS_BILLING_LINE1;
+ address_line2 = ADDRESS_BILLING_LINE2;
+ address_appt_num = ADDRESS_BILLING_APPT_NUM;
+ address_city = ADDRESS_BILLING_CITY;
+ address_state = ADDRESS_BILLING_STATE;
+ address_zip = ADDRESS_BILLING_ZIP;
+ address_country = ADDRESS_BILLING_COUNTRY;
+ break;
+
+ default:
+ NOTREACHED();
+ return false;
+ }
+
+ bool ok;
+ ok = Add(field_type_map, address1_, AutoFillType(address_line1));
+ DCHECK(ok);
+ ok = ok && Add(field_type_map, address2_, AutoFillType(address_line2));
+ DCHECK(ok);
+ ok = ok && Add(field_type_map, city_, AutoFillType(address_city));
+ DCHECK(ok);
+ ok = ok && Add(field_type_map, state_, AutoFillType(address_state));
+ DCHECK(ok);
+ ok = ok && Add(field_type_map, zip_, AutoFillType(address_zip));
+ DCHECK(ok);
+ ok = ok && Add(field_type_map, country_, AutoFillType(address_country));
+ DCHECK(ok);
+
+ return ok;
+}
+
+AddressField* AddressField::Parse(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml) {
+ AddressField address_field;
+ std::vector<AutoFillField*>::const_iterator q = *iter;
+ string16 pattern;
+
+ // The ECML standard uses 2 letter country codes. So we will
+ // have to remember that this is an ECML form, for when we fill
+ // it out.
+ address_field.is_ecml_ = is_ecml;
+
+ // Allow address fields to appear in any order.
+ do {
+ if (ParseText(&q, ASCIIToUTF16("company|business name")))
+ continue; // Ignore company name for now.
+
+ // Some pages (e.g. PC Connection.html) have an Attention field inside an
+ // address; we ignore this for now.
+ if (ParseText(&q, ASCIIToUTF16("attention|attn.")))
+ continue;
+
+ if (ParseAddressLines(&q, is_ecml, &address_field))
+ continue;
+
+ if (ParseCity(&q, is_ecml, &address_field))
+ continue;
+
+ if ((!address_field.state_ || address_field.state_->IsEmpty()) &&
+ address_field.ParseState(&q)) {
+ continue;
+ }
+
+ if (ParseZipCode(&q, is_ecml, &address_field))
+ continue;
+
+ if (ParseCountry(&q, is_ecml, &address_field))
+ continue;
+
+ // Some test pages (e.g. SharperImageModifyAccount.html,
+ // Craft Catalog1.html, FAO Schwarz Billing Info Page.html) have a
+ // "province"/"region"/"other" field; we ignore this field for now.
+ if (ParseText(&q, ASCIIToUTF16("province|region|other")))
+ continue;
+
+ // Ignore non-labeled fields within an address; the page
+ // MapQuest Driving Directions North America.html contains such a field.
+ // We only ignore such fields after we've parsed at least one other field;
+ // otherwise we'd effectively parse address fields before other field types
+ // after any non-labeled fields, and we want email address fields to have
+ // precedence since some pages contain fields labeled "Email address".
+ } while (q != *iter && ParseEmpty(&q));
+
+ // If we have identified any address fields in this field then it should be
+ // added to the list of fields.
+ if (address_field.address1_ != NULL || address_field.address2_ != NULL ||
+ address_field.city_ != NULL || address_field.state_ != NULL ||
+ address_field.zip_ != NULL || address_field.zip4_ ||
+ address_field.country_ != NULL) {
+ *iter = q;
+ return new AddressField(address_field);
+ }
+
+ return NULL;
+}
+
+AddressType AddressField::FindType() const {
+ // This is not a full address, so don't even bother trying to figure
+ // out its type.
+ if (address1_ == NULL)
+ return kGenericAddress;
+
+ // First look at the field name, which itself will sometimes contain
+ // "bill" or "ship". We could check for the ECML type prefixes
+ // here, but there's no need to since ECML's prefixes Ecom_BillTo
+ // and Ecom_ShipTo contain "bill" and "ship" anyway.
+ string16 name = StringToLowerASCII(address1_->name());
+ AddressType address_type = AddressTypeFromText(name);
+ if (address_type)
+ return address_type;
+
+ // TODO(jhawkins): Look at table cells above this point.
+ return kGenericAddress;
+}
+
+AddressField::AddressField()
+ : address1_(NULL),
+ address2_(NULL),
+ city_(NULL),
+ state_(NULL),
+ zip_(NULL),
+ zip4_(NULL),
+ country_(NULL),
+ type_(kGenericAddress),
+ is_ecml_(false) {
+}
+
+AddressField::AddressField(const AddressField& field)
+ : address1_(field.address1_),
+ address2_(field.address2_),
+ city_(field.city_),
+ state_(field.state_),
+ zip_(field.zip_),
+ zip4_(field.zip4_),
+ country_(field.country_),
+ type_(field.type_),
+ is_ecml_(field.is_ecml_) {
+}
+
+// static
+bool AddressField::ParseAddressLines(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field) {
+ // We only match the string "address" in page text, not in element names,
+ // because sometimes every element in a group of address fields will have
+ // a name containing the string "address"; for example, on the page
+ // Kohl's - Register Billing Address.html the text element labeled "city"
+ // has the name "BILL_TO_ADDRESS<>city". We do match address labels
+ // such as "address1", which appear as element names on various pages (eg
+ // AmericanGirl-Registration.html, BloomingdalesBilling.html,
+ // EBay Registration Enter Information.html).
+ if (address_field->address1_)
+ return false;
+
+ string16 pattern;
+ if (is_ecml) {
+ pattern = GetEcmlPattern(kEcmlShipToAddress1,
+ kEcmlBillToAddress1, '|');
+ } else {
+ pattern =
+ ASCIIToUTF16("@address|street|address line|address1|street_line1");
+ }
+
+ if (!ParseText(iter, pattern, &address_field->address1_))
+ return false;
+
+ // Some pages (e.g. expedia_checkout.html) have an apartment or
+ // suite number at this point. The occasional page (e.g.
+ // Ticketmaster3.html) calls this a unit number. We ignore this
+ // field since we can't fill it yet.
+ ParseText(iter, ASCIIToUTF16("suite|unit"));
+
+ // Optionally parse more address lines, which may have empty labels.
+ // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
+ // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
+ if (is_ecml) {
+ pattern = GetEcmlPattern(kEcmlShipToAddress2,
+ kEcmlBillToAddress2, '|');
+ } else {
+ pattern = ASCIIToUTF16("|address|address2|street|street_line2");
+ }
+
+ ParseText(iter, pattern, &address_field->address2_);
+ return true;
+}
+
+// static
+bool AddressField::ParseCountry(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field) {
+ // Parse a country. The occasional page (e.g.
+ // Travelocity_New Member Information1.html) calls this a "location".
+ // Note: ECML standard uses 2 letter country code (ISO 3166)
+ if (address_field->country_ && !address_field->country_->IsEmpty())
+ return false;
+
+ // TODO(jhawkins): Parse the country.
+ return false;
+}
+
+// static
+bool AddressField::ParseZipCode(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field) {
+ // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
+ // is called a "post code".
+ //
+ // HACK: Just for the MapQuest driving directions page we match the
+ // exact name "1z", which MapQuest uses to label its zip code field.
+ // Hopefully before long we'll be smart enough to find the zip code
+ // on that page automatically.
+ if (address_field->zip_)
+ return false;
+
+ string16 pattern;
+ if (is_ecml) {
+ pattern = GetEcmlPattern(kEcmlShipToPostalCode,
+ kEcmlBillToPostalCode, '|');
+ } else {
+ pattern = ASCIIToUTF16("zip|postal|post code|^1z");
+ }
+
+ AddressType tempType;
+ string16 name = (**iter)->name();
+
+ // Note: comparisons using the ecml compliant name as a prefix must be used in
+ // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
+ // more detail.
+ if (StartsWith(name, kEcmlBillToPostalCode, false)) {
+ tempType = kBillingAddress;
+ } else if (StartsWith(name, kEcmlShipToPostalCode, false)) {
+ tempType = kShippingAddress;
+ } else {
+ tempType = kGenericAddress;
+ }
+
+ if (!ParseText(iter, pattern, &address_field->zip_))
+ return false;
+
+ address_field->type_ = tempType;
+ if (!is_ecml) {
+ // Look for a zip+4, whose field name will also often contain
+ // the substring "zip".
+ ParseText(iter, ASCIIToUTF16("zip|^-"), &address_field->zip4_);
+ }
+
+ return true;
+}
+
+// static
+bool AddressField::ParseCity(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field) {
+ // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
+ // the term "town".
+ if (address_field->city_)
+ return false;
+
+ string16 pattern;
+ if (is_ecml)
+ pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
+ else
+ pattern = ASCIIToUTF16("city|town");
+
+ if (!ParseText(iter, pattern, &address_field->city_))
+ return false;
+
+ return true;
+}
+
+bool AddressField::ParseState(
+ std::vector<AutoFillField*>::const_iterator* iter) {
+ // TODO(jhawkins): Parse the state.
+ return false;
+}
+
+AddressType AddressField::AddressTypeFromText(const string16 &text) {
+ if (text.find(ASCIIToUTF16("same as")) != string16::npos ||
+ text.find(ASCIIToUTF16("use my")) != string16::npos)
+ // This text could be a checkbox label such as "same as my billing
+ // address" or "use my shipping address".
+ // ++ It would help if we generally skipped all text that appears
+ // after a check box.
+ return kGenericAddress;
+
+ // Not all pages say "billing address" and "shipping address" explicitly;
+ // for example, Craft Catalog1.html has "Bill-to Address" and
+ // "Ship-to Address".
+ size_t bill = text.find_last_of(ASCIIToUTF16("bill"));
+ size_t ship = text.find_last_of(ASCIIToUTF16("ship"));
+
+ if (bill != string16::npos && bill > ship)
+ return kBillingAddress;
+
+ if (ship != string16::npos)
+ return kShippingAddress;
+
+ return kGenericAddress;
+}
diff --git a/chrome/browser/autofill/address_field.h b/chrome/browser/autofill/address_field.h
new file mode 100644
index 0000000..58ea0de
--- /dev/null
+++ b/chrome/browser/autofill/address_field.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_AUTOFILL_ADDRESS_FIELD_H_
+#define CHROME_BROWSER_AUTOFILL_ADDRESS_FIELD_H_
+
+#include <vector>
+
+#include "chrome/browser/autofill/autofill_type.h"
+#include "chrome/browser/autofill/form_field.h"
+
+class AutoFillField;
+
+class AddressField : public FormField {
+ public:
+ virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const;
+ virtual FormFieldType GetFormFieldType() { return kAddressType; }
+ virtual int priority() const { return 3; }
+
+ static AddressField* Parse(std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml);
+
+ // Tries to determine the billing/shipping type of this address.
+ AddressType FindType() const;
+
+ void SetType(AddressType address_type) { type_ = address_type; }
+
+ // Returns true if this is a full address as opposed to an address fragment
+ // such as a stand-alone ZIP code.
+ bool IsFullAddress() { return address1_ != NULL; }
+
+ private:
+ AddressField();
+ explicit AddressField(const AddressField& field);
+ void operator=(const AddressField&);
+
+ static bool ParseAddressLines(
+ std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field);
+ static bool ParseCountry(std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field);
+ static bool ParseZipCode(std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field);
+ static bool ParseCity(std::vector<AutoFillField*>::const_iterator* iter,
+ bool is_ecml, AddressField* address_field);
+ bool ParseState(std::vector<AutoFillField*>::const_iterator* iter);
+
+ // Looks for an address type in the given text, which the caller must
+ // convert to lowercase.
+ static AddressType AddressTypeFromText(const string16& text);
+
+ AutoFillField* address1_;
+ AutoFillField* address2_; // optional
+ AutoFillField* city_;
+ AutoFillField* state_; // optional
+ AutoFillField* zip_;
+ AutoFillField* zip4_; // optional ZIP+4; we don't fill this yet
+ AutoFillField* country_; // optional
+
+ AddressType type_;
+ bool is_ecml_;
+};
+
+#endif // CHROME_BROWSER_AUTOFILL_ADDRESS_FIELD_H_
diff --git a/chrome/browser/autofill/form_field.cc b/chrome/browser/autofill/form_field.cc
index da74448..1ab2c88 100644
--- a/chrome/browser/autofill/form_field.cc
+++ b/chrome/browser/autofill/form_field.cc
@@ -4,6 +4,7 @@
#include "chrome/browser/autofill/form_field.h"
+#include "chrome/browser/autofill/address_field.h"
#include "chrome/browser/autofill/autofill_field.h"
#include "chrome/browser/autofill/phone_field.h"
#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h"
@@ -68,12 +69,13 @@ FormField* FormField::ParseFormField(
field = PhoneField::Parse(iter, is_ecml);
if (field != NULL)
return field;
+ field = AddressField::Parse(iter, is_ecml);
+ if (field != NULL)
+ return field;
// TODO(jhawkins):
- // - AddressField
// - CreditCardField
// - NameField
- // - PhoneField
return NULL;
}
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 957880c..8c98e0c 100755
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -80,6 +80,8 @@
'browser/autocomplete/keyword_provider.h',
'browser/autocomplete/search_provider.cc',
'browser/autocomplete/search_provider.h',
+ 'browser/autofill/address_field.cc',
+ 'browser/autofill/address_field.h',
'browser/autofill/autofill_field.cc',
'browser/autofill/autofill_field.h',
'browser/autofill/autofill_infobar_delegate.cc',