// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/autofill/address_field.h" #include #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/string16.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autofill/autofill_field.h" #include "chrome/browser/autofill/autofill_regex_constants.h" #include "chrome/browser/autofill/autofill_scanner.h" #include "chrome/browser/autofill/field_types.h" #include "ui/base/l10n/l10n_util.h" FormField* AddressField::Parse(AutofillScanner* scanner, bool parse_new_field_types) { if (scanner->IsEnd()) return NULL; scoped_ptr address_field(new AddressField); const AutofillField* const initial_field = scanner->Cursor(); size_t saved_cursor = scanner->SaveCursor(); string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); // Allow address fields to appear in any order. size_t begin_trailing_non_labeled_fields = 0; bool has_trailing_non_labeled_fields = false; while (!scanner->IsEnd()) { const size_t cursor = scanner->SaveCursor(); if (ParseAddressLines(scanner, address_field.get()) || ParseCity(scanner, address_field.get()) || ParseState(scanner, address_field.get()) || ParseZipCode(scanner, address_field.get()) || ParseCountry(scanner, address_field.get()) || ParseCompany(scanner, address_field.get())) { has_trailing_non_labeled_fields = false; continue; } else if (ParseField(scanner, attention_ignored, NULL) || ParseField(scanner, region_ignored, NULL)) { // We ignore the following: // * Attention. // * Province/Region/Other. continue; } else if (scanner->Cursor() != initial_field && ParseEmptyLabel(scanner, NULL)) { // Ignore non-labeled fields within an address; the page // MapQuest Driving Directions North America.html contains such a field. // We only ignore such fields after we've parsed at least one other field; // otherwise we'd effectively parse address fields before other field // types after any non-labeled fields, and we want email address fields to // have precedence since some pages contain fields labeled // "Email address". if (!has_trailing_non_labeled_fields) { has_trailing_non_labeled_fields = true; begin_trailing_non_labeled_fields = cursor; } continue; } else { // No field found. break; } } // If we have identified any address fields in this field then it should be // added to the list of fields. if (address_field->company_ != NULL || address_field->address1_ != NULL || address_field->address2_ != NULL || address_field->city_ != NULL || address_field->state_ != NULL || address_field->zip_ != NULL || address_field->zip4_ || address_field->country_ != NULL) { // Don't slurp non-labeled fields at the end into the address. if (has_trailing_non_labeled_fields) scanner->RewindTo(begin_trailing_non_labeled_fields); address_field->type_ = address_field->FindType(); return address_field.release(); } scanner->RewindTo(saved_cursor); return NULL; } AddressField::AddressType AddressField::FindType() const { // First look at the field name, which itself will sometimes contain // "bill" or "ship". if (company_) { string16 name = StringToLowerASCII(company_->name); return AddressTypeFromText(name); } if (address1_) { string16 name = StringToLowerASCII(address1_->name); return AddressTypeFromText(name); } if (address2_) { string16 name = StringToLowerASCII(address2_->name); return AddressTypeFromText(name); } if (city_) { string16 name = StringToLowerASCII(city_->name); return AddressTypeFromText(name); } if (zip_) { string16 name = StringToLowerASCII(zip_->name); return AddressTypeFromText(name); } if (state_) { string16 name = StringToLowerASCII(state_->name); return AddressTypeFromText(name); } if (country_) { string16 name = StringToLowerASCII(country_->name); return AddressTypeFromText(name); } return kGenericAddress; } AddressField::AddressField() : company_(NULL), address1_(NULL), address2_(NULL), city_(NULL), state_(NULL), zip_(NULL), zip4_(NULL), country_(NULL), type_(kGenericAddress) { } bool AddressField::ClassifyField(FieldTypeMap* map) const { AutofillFieldType address_company; AutofillFieldType address_line1; AutofillFieldType address_line2; AutofillFieldType address_city; AutofillFieldType address_state; AutofillFieldType address_zip; AutofillFieldType address_country; switch (type_) { case kShippingAddress: // Fall through. Autofill does not support shipping addresses. case kGenericAddress: address_company = COMPANY_NAME; address_line1 = ADDRESS_HOME_LINE1; address_line2 = ADDRESS_HOME_LINE2; address_city = ADDRESS_HOME_CITY; address_state = ADDRESS_HOME_STATE; address_zip = ADDRESS_HOME_ZIP; address_country = ADDRESS_HOME_COUNTRY; break; case kBillingAddress: address_company = COMPANY_NAME; address_line1 = ADDRESS_BILLING_LINE1; address_line2 = ADDRESS_BILLING_LINE2; address_city = ADDRESS_BILLING_CITY; address_state = ADDRESS_BILLING_STATE; address_zip = ADDRESS_BILLING_ZIP; address_country = ADDRESS_BILLING_COUNTRY; break; default: NOTREACHED(); return false; } bool ok = AddClassification(company_, address_company, map); ok = ok && AddClassification(address1_, address_line1, map); ok = ok && AddClassification(address2_, address_line2, map); ok = ok && AddClassification(city_, address_city, map); ok = ok && AddClassification(state_, address_state, map); ok = ok && AddClassification(zip_, address_zip, map); ok = ok && AddClassification(country_, address_country, map); return ok; } // static bool AddressField::ParseCompany(AutofillScanner* scanner, AddressField* address_field) { if (address_field->company_ && !address_field->company_->IsEmpty()) return false; return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), &address_field->company_); } // static bool AddressField::ParseAddressLines(AutofillScanner* scanner, AddressField* address_field) { // We only match the string "address" in page text, not in element names, // because sometimes every element in a group of address fields will have // a name containing the string "address"; for example, on the page // Kohl's - Register Billing Address.html the text element labeled "city" // has the name "BILL_TO_ADDRESS<>city". We do match address labels // such as "address1", which appear as element names on various pages (eg // AmericanGirl-Registration.html, BloomingdalesBilling.html, // EBay Registration Enter Information.html). if (address_field->address1_) return false; string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); if (!ParseField(scanner, pattern, &address_field->address1_) && !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, &address_field->address1_)) { return false; } // Optionally parse more address lines, which may have empty labels. // Some pages have 3 address lines (eg SharperImageModifyAccount.html) // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! pattern = UTF8ToUTF16(autofill::kAddressLine2Re); label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); if (!ParseEmptyLabel(scanner, &address_field->address2_) && !ParseField(scanner, pattern, &address_field->address2_)) { ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, &address_field->address2_); } // Try for a third line, which we will promptly discard. if (address_field->address2_ != NULL) { pattern = UTF8ToUTF16(autofill::kAddressLine3Re); ParseField(scanner, pattern, NULL); } return true; } // static bool AddressField::ParseCountry(AutofillScanner* scanner, AddressField* address_field) { // Parse a country. The occasional page (e.g. // Travelocity_New Member Information1.html) calls this a "location". if (address_field->country_ && !address_field->country_->IsEmpty()) return false; return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kCountryRe), MATCH_DEFAULT | MATCH_SELECT, &address_field->country_); } // static bool AddressField::ParseZipCode(AutofillScanner* scanner, AddressField* address_field) { // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this // is called a "post code". // // HACK: Just for the MapQuest driving directions page we match the // exact name "1z", which MapQuest uses to label its zip code field. // Hopefully before long we'll be smart enough to find the zip code // on that page automatically. if (address_field->zip_) return false; string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); if (!ParseField(scanner, pattern, &address_field->zip_)) return false; address_field->type_ = kGenericAddress; // Look for a zip+4, whose field name will also often contain // the substring "zip". ParseField(scanner, UTF8ToUTF16(autofill::kZip4Re), &address_field->zip4_); return true; } // static bool AddressField::ParseCity(AutofillScanner* scanner, AddressField* address_field) { // Parse a city name. Some UK pages (e.g. The China Shop2.html) use // the term "town". if (address_field->city_) return false; // Select fields are allowed here. This occurs on top-100 site rediff.com. return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kCityRe), MATCH_DEFAULT | MATCH_SELECT, &address_field->city_); } // static bool AddressField::ParseState(AutofillScanner* scanner, AddressField* address_field) { if (address_field->state_) return false; return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kStateRe), MATCH_DEFAULT | MATCH_SELECT, &address_field->state_); } AddressField::AddressType AddressField::AddressTypeFromText( const string16 &text) { size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); if (same_as != string16::npos || use_shipping != string16::npos) // This text could be a checkbox label such as "same as my billing // address" or "use my shipping address". // ++ It would help if we generally skipped all text that appears // after a check box. return kGenericAddress; // Not all pages say "billing address" and "shipping address" explicitly; // for example, Craft Catalog1.html has "Bill-to Address" and // "Ship-to Address". size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); if (bill == string16::npos && ship == string16::npos) return kGenericAddress; if (bill != string16::npos && ship == string16::npos) return kBillingAddress; if (bill == string16::npos && ship != string16::npos) return kShippingAddress; if (bill > ship) return kBillingAddress; return kShippingAddress; }