// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/autofill/core/browser/address_field.h" #include #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/strings/string16.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "components/autofill/core/browser/autofill_field.h" #include "components/autofill/core/browser/autofill_regex_constants.h" #include "components/autofill/core/browser/autofill_scanner.h" #include "components/autofill/core/browser/field_types.h" #include "ui/base/l10n/l10n_util.h" namespace autofill { FormField* AddressField::Parse(AutofillScanner* scanner) { if (scanner->IsEnd()) return NULL; scoped_ptr address_field(new AddressField); const AutofillField* const initial_field = scanner->Cursor(); size_t saved_cursor = scanner->SaveCursor(); base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); // Allow address fields to appear in any order. size_t begin_trailing_non_labeled_fields = 0; bool has_trailing_non_labeled_fields = false; while (!scanner->IsEnd()) { const size_t cursor = scanner->SaveCursor(); if (address_field->ParseAddressLines(scanner) || address_field->ParseCity(scanner) || address_field->ParseState(scanner) || address_field->ParseZipCode(scanner) || address_field->ParseCountry(scanner) || address_field->ParseCompany(scanner)) { has_trailing_non_labeled_fields = false; continue; } else if (ParseField(scanner, attention_ignored, NULL) || ParseField(scanner, region_ignored, NULL)) { // We ignore the following: // * Attention. // * Province/Region/Other. continue; } else if (scanner->Cursor() != initial_field && ParseEmptyLabel(scanner, NULL)) { // Ignore non-labeled fields within an address; the page // MapQuest Driving Directions North America.html contains such a field. // We only ignore such fields after we've parsed at least one other field; // otherwise we'd effectively parse address fields before other field // types after any non-labeled fields, and we want email address fields to // have precedence since some pages contain fields labeled // "Email address". if (!has_trailing_non_labeled_fields) { has_trailing_non_labeled_fields = true; begin_trailing_non_labeled_fields = cursor; } continue; } else { // No field found. break; } } // If we have identified any address fields in this field then it should be // added to the list of fields. if (address_field->company_ || address_field->address1_ || address_field->address2_ || address_field->street_address_ || address_field->city_ || address_field->state_ || address_field->zip_ || address_field->zip4_ || address_field->country_) { // Don't slurp non-labeled fields at the end into the address. if (has_trailing_non_labeled_fields) scanner->RewindTo(begin_trailing_non_labeled_fields); return address_field.release(); } scanner->RewindTo(saved_cursor); return NULL; } AddressField::AddressField() : company_(NULL), address1_(NULL), address2_(NULL), street_address_(NULL), city_(NULL), state_(NULL), zip_(NULL), zip4_(NULL), country_(NULL) { } bool AddressField::ClassifyField(ServerFieldTypeMap* map) const { // The page can request the address lines as a single textarea input or as // multiple text fields (or not at all), but it shouldn't be possible to // request both. DCHECK(!(address1_ && street_address_)); DCHECK(!(address2_ && street_address_)); return AddClassification(company_, COMPANY_NAME, map) && AddClassification(address1_, ADDRESS_HOME_LINE1, map) && AddClassification(address2_, ADDRESS_HOME_LINE2, map) && AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS, map) && AddClassification(city_, ADDRESS_HOME_CITY, map) && AddClassification(state_, ADDRESS_HOME_STATE, map) && AddClassification(zip_, ADDRESS_HOME_ZIP, map) && AddClassification(country_, ADDRESS_HOME_COUNTRY, map); } bool AddressField::ParseCompany(AutofillScanner* scanner) { if (company_ && !company_->IsEmpty()) return false; return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), &company_); } bool AddressField::ParseAddressLines(AutofillScanner* scanner) { // We only match the string "address" in page text, not in element names, // because sometimes every element in a group of address fields will have // a name containing the string "address"; for example, on the page // Kohl's - Register Billing Address.html the text element labeled "city" // has the name "BILL_TO_ADDRESS<>city". We do match address labels // such as "address1", which appear as element names on various pages (eg // AmericanGirl-Registration.html, BloomingdalesBilling.html, // EBay Registration Enter Information.html). if (address1_ || street_address_) return false; base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, &address1_) && !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT_AREA, &street_address_)) { return false; } // Optionally parse more address lines, which may have empty labels. pattern = UTF8ToUTF16(autofill::kAddressLine2Re); label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); if (!street_address_ && !ParseEmptyLabel(scanner, &address2_) && !ParseField(scanner, pattern, &address2_)) { ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, &address2_); } // Try for surplus lines, which we will promptly discard. // Some pages have 3 address lines (eg SharperImageModifyAccount.html) // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! if (address2_) { pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe); while (ParseField(scanner, pattern, NULL)) { // Consumed a surplus line, try for another. } } return true; } bool AddressField::ParseCountry(AutofillScanner* scanner) { // Parse a country. The occasional page (e.g. // Travelocity_New Member Information1.html) calls this a "location". if (country_ && !country_->IsEmpty()) return false; return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kCountryRe), MATCH_DEFAULT | MATCH_SELECT, &country_); } bool AddressField::ParseZipCode(AutofillScanner* scanner) { // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this // is called a "post code". if (zip_) return false; base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); if (!ParseField(scanner, pattern, &zip_)) return false; // Look for a zip+4, whose field name will also often contain // the substring "zip". ParseField(scanner, UTF8ToUTF16(autofill::kZip4Re), &zip4_); return true; } bool AddressField::ParseCity(AutofillScanner* scanner) { // Parse a city name. Some UK pages (e.g. The China Shop2.html) use // the term "town". if (city_) return false; // Select fields are allowed here. This occurs on top-100 site rediff.com. return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kCityRe), MATCH_DEFAULT | MATCH_SELECT, &city_); } bool AddressField::ParseState(AutofillScanner* scanner) { if (state_) return false; return ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kStateRe), MATCH_DEFAULT | MATCH_SELECT, &state_); } } // namespace autofill