diff options
author | isherman@chromium.org <isherman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-02 00:47:48 +0000 |
---|---|---|
committer | isherman@chromium.org <isherman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-02 00:47:48 +0000 |
commit | f638c3d4ff79bc94b4651e7791a362eb4ced2eeb (patch) | |
tree | e1226ec6fbd6b3b021cdf4bd238ee3319105faaa /chrome/browser/autofill/address_field.cc | |
parent | 41569a283de6a9694cd10f9187fd25e6d53066d4 (diff) | |
download | chromium_src-f638c3d4ff79bc94b4651e7791a362eb4ced2eeb.zip chromium_src-f638c3d4ff79bc94b4651e7791a362eb4ced2eeb.tar.gz chromium_src-f638c3d4ff79bc94b4651e7791a362eb4ced2eeb.tar.bz2 |
Improve Autofill heuristics when detecting labels from previous elements.
Support HTML like """Name <span class="required">*</span> <input type="text" name="name">"""
Of course, pull at a thread and...
Other changes also included to avoid regressions:
* When parsing address fields for heuristics, we try to skip over unlabeled fields in the middle of an address. Updated the code not to also skip over unlabeled fields at the *end* of an address, as these might be part of a different section entirely.
* Tighten the credit card number regex to require the word "card".
* Add "csc" to the credit card security code regex.
* When inferring labels based on <div> structure, be willing to scan up the tree past the closest parent that is a <div>.
* Also when inferring labels based on <div> structure, we previously would only stop early if we were about to escape from a <table> element. Also stop early if we are about to escape from a <fieldset> element. If we see either of these elements, we expect the field label to be contained with the element.
* Allow <label> elements to misuse the 'for' attribute to specify the element's name rather than its id.
BUG=87517
TEST=browser_tests --gtest_filter=FormStructureBrowserTest.DataDrivenHeuristics*
Review URL: http://codereview.chromium.org/7531023
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@95019 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autofill/address_field.cc')
-rw-r--r-- | chrome/browser/autofill/address_field.cc | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc index 2c4ce93..025911d 100644 --- a/chrome/browser/autofill/address_field.cc +++ b/chrome/browser/autofill/address_field.cc @@ -24,8 +24,8 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { return NULL; scoped_ptr<AddressField> address_field(new AddressField); - const AutofillField* initial_field = scanner->Cursor(); - scanner->SaveCursor(); + const AutofillField* const initial_field = scanner->Cursor(); + size_t saved_cursor = scanner->SaveCursor(); string16 attention_ignored = l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); @@ -33,13 +33,17 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); // Allow address fields to appear in any order. + size_t begin_trailing_non_labeled_fields = 0; + bool has_trailing_non_labeled_fields = false; while (!scanner->IsEnd()) { + const size_t cursor = scanner->SaveCursor(); if (ParseAddressLines(scanner, is_ecml, address_field.get()) || ParseCity(scanner, is_ecml, address_field.get()) || ParseState(scanner, is_ecml, address_field.get()) || ParseZipCode(scanner, is_ecml, address_field.get()) || ParseCountry(scanner, is_ecml, address_field.get()) || ParseCompany(scanner, is_ecml, address_field.get())) { + has_trailing_non_labeled_fields = false; continue; } else if (ParseField(scanner, attention_ignored, NULL) || ParseField(scanner, region_ignored, NULL)) { @@ -56,6 +60,11 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { // types after any non-labeled fields, and we want email address fields to // have precedence since some pages contain fields labeled // "Email address". + if (!has_trailing_non_labeled_fields) { + has_trailing_non_labeled_fields = true; + begin_trailing_non_labeled_fields = cursor; + } + continue; } else { // No field found. @@ -70,11 +79,15 @@ FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { address_field->city_ != NULL || address_field->state_ != NULL || address_field->zip_ != NULL || address_field->zip4_ || address_field->country_ != NULL) { + // Don't slurp non-labeled fields at the end into the address. + if (has_trailing_non_labeled_fields) + scanner->RewindTo(begin_trailing_non_labeled_fields); + address_field->type_ = address_field->FindType(); return address_field.release(); } - scanner->Rewind(); + scanner->RewindTo(saved_cursor); return NULL; } |