// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/renderer/autofill/form_manager.h" #include "base/logging.h" #include "base/memory/scoped_vector.h" #include "base/stl_util.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "grit/generated_resources.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFormControlElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebInputElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebLabelElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebOptionElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebSelectElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebVector.h" #include "ui/base/l10n/l10n_util.h" #include "webkit/glue/form_data.h" #include "webkit/glue/form_data_predictions.h" #include "webkit/glue/form_field.h" #include "webkit/glue/form_field_predictions.h" #include "webkit/glue/web_io_operators.h" using WebKit::WebDocument; using WebKit::WebElement; using WebKit::WebFormControlElement; using WebKit::WebFormElement; using WebKit::WebFrame; using WebKit::WebInputElement; using WebKit::WebLabelElement; using WebKit::WebNode; using WebKit::WebNodeList; using WebKit::WebOptionElement; using WebKit::WebSelectElement; using WebKit::WebString; using WebKit::WebVector; using webkit_glue::FormData; using webkit_glue::FormDataPredictions; using webkit_glue::FormField; using webkit_glue::FormFieldPredictions; namespace { // The number of fields required by Autofill. Ideally we could send the forms // to Autofill no matter how many fields are in the forms; however, finding the // label for each field is a costly operation and we can't spare the cycles if // it's not necessary. const size_t kRequiredAutofillFields = 3; // The maximum number of form fields we are willing to parse, due to // computational costs. This is a very conservative upper bound. const size_t kMaxParseableFields = 1000; // The maximum length allowed for form data. const size_t kMaxDataLength = 1024; // In HTML5, all text fields except password are text input fields to // autocomplete. bool IsTextInput(const WebInputElement* element) { if (!element) return false; return element->isTextField() && !element->isPasswordField(); } bool IsSelectElement(const WebFormControlElement& element) { return element.formControlType() == ASCIIToUTF16("select-one"); } bool IsTextContainerElement(const WebElement& element) { return element.hasTagName("p") || element.hasTagName("b") || element.hasTagName("span") || element.hasTagName("font"); } bool IsOptionElement(const WebElement& element) { return element.hasTagName("option"); } bool IsScriptElement(const WebElement& element) { return element.hasTagName("script"); } bool IsNoScriptElement(const WebElement& element) { return element.hasTagName("noscript"); } bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { return node.isElementNode() && node.toConst().hasTagName(tag); } bool IsAutofillableElement(const WebFormControlElement& element) { const WebInputElement* input_element = toWebInputElement(&element); return IsTextInput(input_element) || IsSelectElement(element); } // This is a helper function for the FindChildText() function (see below). // Search depth is limited with the |depth| parameter. string16 FindChildTextInner(const WebNode& node, int depth) { if (depth <= 0 || node.isNull()) return string16(); if (node.nodeType() != WebNode::ElementNode && node.nodeType() != WebNode::TextNode) return string16(); // Ignore elements known not to contain inferable labels. if (node.isElementNode()) { const WebElement element = node.toConst(); if (IsOptionElement(element) || IsScriptElement(element) || IsNoScriptElement(element)) { return string16(); } } // Extract the text exactly at this node. string16 node_text = node.nodeValue(); TrimPositions node_trailing_whitespace = TrimWhitespace(node_text, TRIM_TRAILING, &node_text); // Recursively compute the children's text. // Preserve inter-element whitespace separation. string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); TrimPositions child_leading_whitespace = TrimWhitespace(child_text, TRIM_LEADING, &child_text); if (node_trailing_whitespace || child_leading_whitespace || (node.nodeType() == WebNode::TextNode && node_text.empty())) { node_text += ASCIIToUTF16(" "); } node_text += child_text; node_trailing_whitespace = TrimWhitespace(node_text, TRIM_TRAILING, &node_text); // Recursively compute the siblings' text. // Again, preserve inter-element whitespace separation. string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); TrimPositions sibling_leading_whitespace = TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); if (node_trailing_whitespace || sibling_leading_whitespace || (node.nodeType() == WebNode::TextNode && node_text.empty())) { node_text += ASCIIToUTF16(" "); } node_text += sibling_text; return node_text; } // Returns the aggregated values of the descendants of |element| that are // non-empty text nodes. This is a faster alternative to |innerText()| for // performance critical operations. It does a full depth-first search so can be // used when the structure is not directly known. However, unlike with // |innerText()|, the search depth and breadth are limited to a fixed threshold. // Whitespace is trimmed from text accumulated at descendant nodes. string16 FindChildText(const WebElement& element) { WebNode child = element.firstChild(); const int kChildSearchDepth = 10; string16 element_text = FindChildTextInner(child, kChildSearchDepth); TrimWhitespace(element_text, TRIM_ALL, &element_text); return element_text; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a previous sibling of |element|. string16 InferLabelFromPrevious(const WebFormControlElement& element) { string16 inferred_label; WebNode previous = element.previousSibling(); if (previous.isNull()) return string16(); // Check for text immediately before the |element|. if (previous.isTextNode()) { inferred_label = previous.nodeValue(); TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); } // If we didn't find text, check for an immediately preceding text container, // e.g.

Some Text

// Note the lack of whitespace between

and elements. if (inferred_label.empty() && previous.isElementNode()) { WebElement previous_element = previous.to(); if (IsTextContainerElement(previous_element)) inferred_label = FindChildText(previous_element); } // If we didn't find one immediately preceding, check for a text container // separated from this node only by whitespace, // e.g.

Some Text

// Note the whitespace between

and elements. if (inferred_label.empty() && previous.isTextNode()) { WebNode sibling = previous.previousSibling(); if (!sibling.isNull() && sibling.isElementNode()) { WebElement previous_element = sibling.to(); if (IsTextContainerElement(previous_element)) inferred_label = FindChildText(previous_element); } } // Look for a text node prior to or
tags, // e.g. Some Text or Some Text
while (inferred_label.empty() && !previous.isNull()) { if (previous.isTextNode()) { inferred_label = previous.nodeValue(); TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); } else if (previous.isElementNode()) { WebElement previous_element = previous.to(); if (IsTextContainerElement(previous_element)) inferred_label = FindChildText(previous_element); else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) break; } else { break; } previous = previous.previousSibling(); } // Look for a label node prior to the tag, // e.g. while (inferred_label.empty() && !previous.isNull()) { if (previous.isTextNode()) { inferred_label = previous.nodeValue(); TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); } else if (HasTagName(previous, "label")) { inferred_label = FindChildText(previous.to()); } else { break; } previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // enclosing list item, // e.g.

  • Some Text string16 InferLabelFromListItem(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("li")) { parent = parent.parentNode(); } if (!parent.isNull() && HasTagName(parent, "li")) return FindChildText(parent.to()); return string16(); } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text // or Some Text // or Some Text // or Some Text string16 InferLabelFromTableColumn(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("td")) { parent = parent.parentNode(); } if (parent.isNull()) return string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. string16 inferred_label; WebNode previous = parent.previousSibling(); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, "td") || HasTagName(previous, "th")) inferred_label = FindChildText(previous.to()); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text string16 InferLabelFromTableRow(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("tr")) { parent = parent.parentNode(); } if (parent.isNull()) return string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. string16 inferred_label; WebNode previous = parent.previousSibling(); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, "tr")) inferred_label = FindChildText(previous.to()); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding div table, // e.g.
    Some Text
    // e.g.
    Some Text
    string16 InferLabelFromDivTable(const WebFormControlElement& element) { WebNode node = element.parentNode(); while (!node.isNull() && node.isElementNode() && !node.to().hasTagName("div")) { node = node.parentNode(); } if (node.isNull() || !HasTagName(node, "div")) return string16(); // Search the siblings while we cannot find label. string16 inferred_label; while (inferred_label.empty() && !node.isNull()) { if (HasTagName(node, "div")) inferred_label = FindChildText(node.to()); node = node.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding definition list, // e.g.
    Some Text
    // e.g.
    Some Text
    string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("dd")) parent = parent.parentNode(); if (parent.isNull() || !HasTagName(parent, "dd")) return string16(); // Skip by any intervening text nodes. WebNode previous = parent.previousSibling(); while (!previous.isNull() && previous.isTextNode()) previous = previous.previousSibling(); if (previous.isNull() || !HasTagName(previous, "dt")) return string16(); return FindChildText(previous.to()); } // Infers corresponding label for |element| from surrounding context in the DOM, // e.g. the contents of the preceding

    tag or text element. string16 InferLabelForElement(const WebFormControlElement& element) { string16 inferred_label = InferLabelFromPrevious(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for list item case. inferred_label = InferLabelFromListItem(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table cell case. inferred_label = InferLabelFromTableColumn(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table row case. inferred_label = InferLabelFromTableRow(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for definition list case. inferred_label = InferLabelFromDefinitionList(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for div table case. return InferLabelFromDivTable(element); } // Fills |option_strings| with the values of the