diff options
author | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-03-26 01:58:09 +0000 |
---|---|---|
committer | dhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-03-26 01:58:09 +0000 |
commit | 6a3d7b998c691aa54b0f024a2a17bcf6ea2e0c57 (patch) | |
tree | ce14eccb00c57730e1c0067e55d49c96e7567dd5 /chrome/renderer | |
parent | 18506ac1d93c3ac122f7ae435b25431c580e9317 (diff) | |
download | chromium_src-6a3d7b998c691aa54b0f024a2a17bcf6ea2e0c57.zip chromium_src-6a3d7b998c691aa54b0f024a2a17bcf6ea2e0c57.tar.gz chromium_src-6a3d7b998c691aa54b0f024a2a17bcf6ea2e0c57.tar.bz2 |
Label scraping for AutoFill.
Adds label scraping to AutoFill. Infers labels from surrounding context of
input fields in the WebKit DOM. Specific cases added are:
- Text element immediately preceeding INPUT element.
Eg. First name:<INPUT type="text" id="firstname" value="John"/>
- Paragraph element containing text immediately preceeding INPUT element.
Eg. <P>First name:</P><INPUT type="text" id="firstname" value="John"/>
BUG=33031
TEST=FormManagerTest.LabelsFromInferredParagraph,
FormManagerTest.LabelsFromInferredText,
FormStructureTest.HeuristicsLabelsOnly
Review URL: http://codereview.chromium.org/1380002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@42720 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
-rw-r--r-- | chrome/renderer/form_manager.cc | 53 | ||||
-rw-r--r-- | chrome/renderer/form_manager.h | 5 | ||||
-rw-r--r-- | chrome/renderer/form_manager_unittest.cc | 165 |
3 files changed, 179 insertions, 44 deletions
diff --git a/chrome/renderer/form_manager.cc b/chrome/renderer/form_manager.cc index 246c448..2752dcb 100644 --- a/chrome/renderer/form_manager.cc +++ b/chrome/renderer/form_manager.cc @@ -202,10 +202,10 @@ void FormManager::FormElementToFormData(WebFrame* frame, string16 form_control_type = input_element.formControlType(); WebInputElement::InputType input_type = input_element.inputType(); FormField field = FormField(label, - name, - value, - form_control_type, - input_type); + name, + value, + form_control_type, + input_type); form->fields.push_back(field); } } @@ -221,5 +221,48 @@ string16 FormManager::LabelForElement(const WebInputElement& element) { return label.innerText(); } } - return string16(); + + // Infer the label from context if not found in label element. + return FormManager::InferLabelForElement(element); +} + +// static +string16 FormManager::InferLabelForElement(const WebInputElement& element) { + string16 inferred_label; + WebNode previous = element.previousSibling(); + if (!previous.isNull()) { + if (previous.isTextNode()) { + inferred_label = previous.nodeValue(); + TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); + } + + // If we didn't find text, check for previous paragraph. + // Eg. <p>Some Text</p><input ...> + // Note the lack of whitespace between <p> and <input> elements. + if (inferred_label.empty()) { + if (previous.isElementNode()) { + WebElement element = previous.toElement<WebElement>(); + if (element.hasTagName("p")) { + inferred_label = element.innerText(); + TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); + } + } + } + + // If we didn't find paragraph, check for previous paragraph to this. + // Eg. <p>Some Text</p> <input ...> + // Note the whitespace between <p> and <input> elements. + if (inferred_label.empty()) { + previous = previous.previousSibling(); + if (!previous.isNull() && previous.isElementNode()) { + WebElement element = previous.toElement<WebElement>(); + if (element.hasTagName("p")) { + inferred_label = element.innerText(); + TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); + } + } + } + } + + return inferred_label; } diff --git a/chrome/renderer/form_manager.h b/chrome/renderer/form_manager.h index a6805b2..4928838 100644 --- a/chrome/renderer/form_manager.h +++ b/chrome/renderer/form_manager.h @@ -80,6 +80,11 @@ class FormManager { // Returns the corresponding label for |element|. static string16 LabelForElement(const WebKit::WebInputElement& element); + // Infers corresponding label for |element| from surrounding context in the + // DOM. Contents of preceeding <p> tag or preceeding text element found in + // the form. + static string16 InferLabelForElement(const WebKit::WebInputElement& element); + // The map of form elements. WebFrameFormElementMap form_elements_map_; diff --git a/chrome/renderer/form_manager_unittest.cc b/chrome/renderer/form_manager_unittest.cc index 418afd8..c288d19 100644 --- a/chrome/renderer/form_manager_unittest.cc +++ b/chrome/renderer/form_manager_unittest.cc @@ -19,20 +19,19 @@ using WebKit::WebString; using webkit_glue::FormData; using webkit_glue::FormField; -class FormManagerTest : public RenderViewTest { - public: - FormManagerTest() {} -}; +namespace { + +typedef RenderViewTest FormManagerTest; TEST_F(FormManagerTest, ExtractForms) { LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -67,16 +66,16 @@ TEST_F(FormManagerTest, ExtractForms) { TEST_F(FormManagerTest, ExtractMultipleForms) { LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>" "<FORM name=\"TestForm2\" action=\"http://zoo.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"second\" value=\"Submit\">" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"second\" value=\"Submit\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -128,12 +127,12 @@ TEST_F(FormManagerTest, GetFormsAutocomplete) { // Form is not auto-completable due to autocomplete=off. LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\"" " autocomplete=off>" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -152,12 +151,12 @@ TEST_F(FormManagerTest, GetFormsAutocomplete) { LoadHTML("<FORM name=\"TestForm\" action=\"http://abc.com\" method=\"post\">" " <INPUT type=\"text\" id=\"firstname\" value=\"John\"" " autocomplete=off>" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"reply\" value=\"Send\">" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply\" value=\"Send\"/>" "</FORM>"); web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); form_manager.Reset(); form_manager.ExtractForms(web_frame); @@ -188,13 +187,13 @@ TEST_F(FormManagerTest, GetFormsAutocomplete) { TEST_F(FormManagerTest, GetFormsElementsEnabled) { // The firstname element is not enabled due to disabled being set. LoadHTML("<FORM name=\"TestForm\" action=\"http://xyz.com\" method=\"post\">" - " <INPUT disabled type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"submit\" value=\"Send\">" + " <INPUT disabled type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"submit\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -224,13 +223,13 @@ TEST_F(FormManagerTest, GetFormsElementsEnabled) { TEST_F(FormManagerTest, FindForm) { LoadHTML("<FORM name=\"TestForm\" action=\"http://buh.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -273,13 +272,13 @@ TEST_F(FormManagerTest, FindForm) { TEST_F(FormManagerTest, FillForm) { LoadHTML("<FORM name=\"TestForm\" action=\"http://buh.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"firstname\">" - " <INPUT type=\"text\" id=\"lastname\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\"/>" + " <INPUT type=\"text\" id=\"lastname\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -352,13 +351,13 @@ TEST_F(FormManagerTest, FillForm) { TEST_F(FormManagerTest, Reset) { LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -378,14 +377,57 @@ TEST_F(FormManagerTest, Reset) { TEST_F(FormManagerTest, Labels) { LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" " <LABEL for=\"firstname\"> First name: </LABEL>" - " <INPUT type=\"text\" id=\"firstname\" value=\"John\">" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" " <LABEL for=\"lastname\"> Last name: </LABEL>" - " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\">" - " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\">" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" + "</FORM>"); + + WebFrame* web_frame = GetMainFrame(); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); + + FormManager form_manager; + form_manager.ExtractForms(web_frame); + + std::vector<FormData> forms; + form_manager.GetForms(&forms, FormManager::REQUIRE_NONE); + ASSERT_EQ(1U, forms.size()); + + const FormData& form = forms[0]; + EXPECT_EQ(ASCIIToUTF16("TestForm"), form.name); + EXPECT_EQ(GURL(web_frame->url()), form.origin); + EXPECT_EQ(GURL("http://cnn.com"), form.action); + + const std::vector<FormField>& fields = form.fields; + ASSERT_EQ(3U, fields.size()); + EXPECT_EQ(FormField(ASCIIToUTF16("First name:"), + ASCIIToUTF16("firstname"), + ASCIIToUTF16("John"), + ASCIIToUTF16("text"), + WebInputElement::Text), fields[0]); + EXPECT_EQ(FormField(ASCIIToUTF16("Last name:"), + ASCIIToUTF16("lastname"), + ASCIIToUTF16("Smith"), + ASCIIToUTF16("text"), + WebInputElement::Text), fields[1]); + EXPECT_EQ(FormField(string16(), + ASCIIToUTF16("reply-send"), + ASCIIToUTF16("Send"), + ASCIIToUTF16("submit"), + WebInputElement::Submit), fields[2]); +} + +TEST_F(FormManagerTest, LabelsFromInferredText) { + LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" + " First name:" + " <INPUT type=\"text\" id=\"firstname\" value=\"John\"/>" + " Last name:" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" "</FORM>"); WebFrame* web_frame = GetMainFrame(); - ASSERT_TRUE(web_frame); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); FormManager form_manager; form_manager.ExtractForms(web_frame); @@ -417,3 +459,48 @@ TEST_F(FormManagerTest, Labels) { ASCIIToUTF16("submit"), WebInputElement::Submit), fields[2]); } + +TEST_F(FormManagerTest, LabelsFromInferredParagraph) { + LoadHTML("<FORM name=\"TestForm\" action=\"http://cnn.com\" method=\"post\">" + " <P>First name:</P><INPUT type=\"text\" " + " id=\"firstname\" value=\"John\"/>" + " <P>Last name:</P>" + " <INPUT type=\"text\" id=\"lastname\" value=\"Smith\"/>" + " <INPUT type=\"submit\" name=\"reply-send\" value=\"Send\"/>" + "</FORM>"); + + WebFrame* web_frame = GetMainFrame(); + ASSERT_NE(static_cast<WebFrame*>(NULL), web_frame); + + FormManager form_manager; + form_manager.ExtractForms(web_frame); + + std::vector<FormData> forms; + form_manager.GetForms(&forms, FormManager::REQUIRE_NONE); + ASSERT_EQ(1U, forms.size()); + + const FormData& form = forms[0]; + EXPECT_EQ(ASCIIToUTF16("TestForm"), form.name); + EXPECT_EQ(GURL(web_frame->url()), form.origin); + EXPECT_EQ(GURL("http://cnn.com"), form.action); + + const std::vector<FormField>& fields = form.fields; + ASSERT_EQ(3U, fields.size()); + EXPECT_EQ(FormField(ASCIIToUTF16("First name:"), + ASCIIToUTF16("firstname"), + ASCIIToUTF16("John"), + ASCIIToUTF16("text"), + WebInputElement::Text), fields[0]); + EXPECT_EQ(FormField(ASCIIToUTF16("Last name:"), + ASCIIToUTF16("lastname"), + ASCIIToUTF16("Smith"), + ASCIIToUTF16("text"), + WebInputElement::Text), fields[1]); + EXPECT_EQ(FormField(string16(), + ASCIIToUTF16("reply-send"), + ASCIIToUTF16("Send"), + ASCIIToUTF16("submit"), + WebInputElement::Submit), fields[2]); +} + +} // namespace |