summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/form_manager.cc
diff options
context:
space:
mode:
authordhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-13 19:27:44 +0000
committerdhollowa@chromium.org <dhollowa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-13 19:27:44 +0000
commitb9214d4348aed078b9c4ee4660183bb7a3f3ba36 (patch)
tree99dc0ecc106ccbe6973dcb694b8fd4f4eeb329a7 /chrome/renderer/form_manager.cc
parentd91061d4e4bb1ce5c2ff597902daf68b4872cdc8 (diff)
downloadchromium_src-b9214d4348aed078b9c4ee4660183bb7a3f3ba36.zip
chromium_src-b9214d4348aed078b9c4ee4660183bb7a3f3ba36.tar.gz
chromium_src-b9214d4348aed078b9c4ee4660183bb7a3f3ba36.tar.bz2
AutoFill label scraping nested table contents.
Label scraping in forms is now extended to include text nested within formatting (and other) elements within the table. For example: <TR> <TD> <FONT> Last name: </FONT> </TD> <TD> <FONT> <INPUT type="text" id="lastname" value="Smith"/> </FONT> </TD> </TR> The "Last name:" text is now correctly scraped. BUG=38269 TEST=Manual testing of forms: \ http://www.mycontactform.com/samples/rental.php \ http://www.mycontactform.com/samples/real_estate.php \ http://www.mycontactform.com/samples/jobapp.php \ http://www.mycontactform.com/samples/employee_eval.php \ http://www.mycontactform.com/samples/customer_complaint.php \ Unit test: FormManagerTest.LabelsInferredFromTableCellNested. Review URL: http://codereview.chromium.org/2061008 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@47176 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/form_manager.cc')
-rw-r--r--chrome/renderer/form_manager.cc63
1 files changed, 43 insertions, 20 deletions
diff --git a/chrome/renderer/form_manager.cc b/chrome/renderer/form_manager.cc
index f81d6df..ebdccbc 100644
--- a/chrome/renderer/form_manager.cc
+++ b/chrome/renderer/form_manager.cc
@@ -45,22 +45,43 @@ namespace {
// it's not necessary.
const size_t kRequiredAutoFillFields = 3;
-// Returns the node value of the first offspring of |element| that is a text
-// node. This is a faster alternative to |innerText()| for performance
-// critical operations when the child structure of |element| is known.
-string16 GetChildText(const WebElement& element) {
+// This is a helper function for the FindChildText() function.
+// Returns the node value of the descendant or sibling of |node| that is a
+// non-empty text node. This is a faster alternative to |innerText()| for
+// performance critical operations. It does a full depth-first search so
+// can be used when the structure is not directly known. It does not aggregate
+// the text of multiple nodes, it just returns the value of the first found.
+// "Non-empty" in this case means non-empty after the whitespace has been
+// stripped.
+string16 FindChildTextInner(const WebNode& node) {
string16 element_text;
- WebNode child = element.firstChild();
- // Find the text node.
- while (!child.isNull() && !child.isTextNode())
- child = child.firstChild();
- if (!child.isNull()) {
- element_text = child.nodeValue();
- TrimWhitespace(element_text, TRIM_ALL, &element_text);
- }
+ if (node.isNull())
+ return element_text;
+
+ element_text = node.nodeValue();
+ TrimWhitespace(element_text, TRIM_ALL, &element_text);
+ if (!element_text.empty())
+ return element_text;
+
+ element_text = FindChildTextInner(node.firstChild());
+ if (!element_text.empty())
+ return element_text;
+
+ element_text = FindChildTextInner(node.nextSibling());
+ if (!element_text.empty())
+ return element_text;
+
return element_text;
}
+// Returns the node value of the first decendant of |element| that is a
+// non-empty text node. "Non-empty" in this case means non-empty after the
+// whitespace has been stripped.
+string16 FindChildText(const WebElement& element) {
+ WebNode child = element.firstChild();
+ return FindChildTextInner(child);
+}
+
} // namespace
FormManager::FormManager() {
@@ -109,7 +130,7 @@ string16 FormManager::LabelForElement(const WebFormControlElement& element) {
if (e.hasTagName("label")) {
WebLabelElement label = e.to<WebLabelElement>();
if (label.correspondingControl() == element)
- return GetChildText(label);
+ return FindChildText(label);
}
}
@@ -200,7 +221,7 @@ bool FormManager::WebFormElementToFormData(const WebFormElement& element,
std::map<string16, FormField*>::iterator iter =
name_map.find(field_element.nameForAutofill());
if (iter != name_map.end())
- iter->second->set_label(GetChildText(label));
+ iter->second->set_label(FindChildText(label));
}
// Loop through the form control elements, extracting the label text from the
@@ -548,8 +569,7 @@ string16 FormManager::InferLabelForElement(
if (previous.isElementNode()) {
WebElement element = previous.to<WebElement>();
if (element.hasTagName("p")) {
- inferred_label = GetChildText(element);
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
+ inferred_label = FindChildText(element);
}
}
}
@@ -562,8 +582,7 @@ string16 FormManager::InferLabelForElement(
if (!previous.isNull() && previous.isElementNode()) {
WebElement element = previous.to<WebElement>();
if (element.hasTagName("p")) {
- inferred_label = GetChildText(element);
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
+ inferred_label = FindChildText(element);
}
}
}
@@ -571,8 +590,13 @@ string16 FormManager::InferLabelForElement(
// If we didn't find paragraph, check for table cell case.
// Eg. <tr><td>Some Text</td><td><input ...></td></tr>
+ // Eg. <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
if (inferred_label.empty()) {
WebNode parent = element.parentNode();
+ while (!parent.isNull() && parent.isElementNode() &&
+ !parent.to<WebElement>().hasTagName("td"))
+ parent = parent.parentNode();
+
if (!parent.isNull() && parent.isElementNode()) {
WebElement element = parent.to<WebElement>();
if (element.hasTagName("td")) {
@@ -585,8 +609,7 @@ string16 FormManager::InferLabelForElement(
if (!previous.isNull() && previous.isElementNode()) {
element = previous.to<WebElement>();
if (element.hasTagName("td")) {
- inferred_label = GetChildText(element);
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
+ inferred_label = FindChildText(element);
}
}
}