// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/renderer/autofill/form_autofill_util.h" #include #include "base/callback_old.h" #include "base/logging.h" #include "base/memory/scoped_vector.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFormElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFormControlElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebInputElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebLabelElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebOptionElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebSelectElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebVector.h" #include "webkit/glue/form_data.h" #include "webkit/glue/form_field.h" using WebKit::WebElement; using WebKit::WebFormControlElement; using WebKit::WebFormElement; using WebKit::WebFrame; using WebKit::WebInputElement; using WebKit::WebLabelElement; using WebKit::WebNode; using WebKit::WebNodeList; using WebKit::WebOptionElement; using WebKit::WebSelectElement; using WebKit::WebString; using WebKit::WebVector; using webkit_glue::FormData; using webkit_glue::FormField; namespace { using autofill::ExtractAutofillableElements; using autofill::IsTextInput; using autofill::IsSelectElement; // The maximum length allowed for form data. const size_t kMaxDataLength = 1024; bool IsOptionElement(const WebElement& element) { return element.hasTagName("option"); } bool IsScriptElement(const WebElement& element) { return element.hasTagName("script"); } bool IsNoScriptElement(const WebElement& element) { return element.hasTagName("noscript"); } bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { return node.isElementNode() && node.toConst().hasTagName(tag); } bool IsAutofillableElement(const WebFormControlElement& element) { const WebInputElement* input_element = toWebInputElement(&element); return IsTextInput(input_element) || IsSelectElement(element); } // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed // to a single space. If |force_whitespace| is true, then the resulting string // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the // result includes a space only if |prefix| has trailing whitespace or |suffix| // has leading whitespace. // A few examples: // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " const string16 CombineAndCollapseWhitespace(const string16& prefix, const string16& suffix, bool force_whitespace) { string16 prefix_trimmed; TrimPositions prefix_trailing_whitespace = TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); // Recursively compute the children's text. string16 suffix_trimmed; TrimPositions suffix_leading_whitespace = TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); if (prefix_trailing_whitespace || suffix_leading_whitespace || force_whitespace) { return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; } else { return prefix_trimmed + suffix_trimmed; } } // This is a helper function for the FindChildText() function (see below). // Search depth is limited with the |depth| parameter. string16 FindChildTextInner(const WebNode& node, int depth) { if (depth <= 0 || node.isNull()) return string16(); // Skip over comments. if (node.nodeType() == WebNode::CommentNode) return FindChildTextInner(node.nextSibling(), depth - 1); if (node.nodeType() != WebNode::ElementNode && node.nodeType() != WebNode::TextNode) return string16(); // Ignore elements known not to contain inferable labels. if (node.isElementNode()) { const WebElement element = node.toConst(); if (IsOptionElement(element) || IsScriptElement(element) || IsNoScriptElement(element) || (element.isFormControlElement() && IsAutofillableElement(element.toConst()))) { return string16(); } } // Extract the text exactly at this node. string16 node_text = node.nodeValue(); // Recursively compute the children's text. // Preserve inter-element whitespace separation. string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); // Recursively compute the siblings' text. // Again, preserve inter-element whitespace separation. string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); return node_text; } // Returns the aggregated values of the descendants of |element| that are // non-empty text nodes. This is a faster alternative to |innerText()| for // performance critical operations. It does a full depth-first search so can be // used when the structure is not directly known. However, unlike with // |innerText()|, the search depth and breadth are limited to a fixed threshold. // Whitespace is trimmed from text accumulated at descendant nodes. string16 FindChildText(const WebNode& node) { if (node.isTextNode()) return node.nodeValue(); WebNode child = node.firstChild(); const int kChildSearchDepth = 10; string16 node_text = FindChildTextInner(child, kChildSearchDepth); TrimWhitespace(node_text, TRIM_ALL, &node_text); return node_text; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a previous sibling of |element|, // e.g. Some Text // or Some Text // or

Some Text

// or // or Some Text // or Some Text
. string16 InferLabelFromPrevious(const WebFormControlElement& element) { string16 inferred_label; WebNode previous = element; while (true) { previous = previous.previousSibling(); if (previous.isNull()) break; // Skip over comments. WebNode::NodeType node_type = previous.nodeType(); if (node_type == WebNode::CommentNode) continue; // Otherwise, only consider normal HTML elements and their contents. if (node_type != WebNode::TextNode && node_type != WebNode::ElementNode) break; // A label might be split across multiple "lightweight" nodes. // Coalesce any text contained in multiple consecutive // (a) plain text nodes or // (b) inline HTML elements that are essentially equivalent to text nodes. if (previous.isTextNode() || HasTagName(previous, "b") || HasTagName(previous, "strong") || HasTagName(previous, "span") || HasTagName(previous, "font")) { string16 value = FindChildText(previous); // A text node's value will be empty if it is for a line break. bool add_space = previous.isTextNode() && value.empty(); inferred_label = CombineAndCollapseWhitespace(value, inferred_label, add_space); continue; } // If we have identified a partial label and have reached a non-lightweight // element, consider the label to be complete. string16 trimmed_label; TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); if (!trimmed_label.empty()) break; // and
tags often appear between the input element and its // label text, so skip over them. if (HasTagName(previous, "img") || HasTagName(previous, "br")) continue; // We only expect

and

  • Some Text string16 InferLabelFromListItem(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("li")) { parent = parent.parentNode(); } if (!parent.isNull() && HasTagName(parent, "li")) return FindChildText(parent); return string16(); } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text // or Some Text // or Some Text // or Some Text string16 InferLabelFromTableColumn(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("td")) { parent = parent.parentNode(); } if (parent.isNull()) return string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. string16 inferred_label; WebNode previous = parent.previousSibling(); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, "td") || HasTagName(previous, "th")) inferred_label = FindChildText(previous); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text string16 InferLabelFromTableRow(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("tr")) { parent = parent.parentNode(); } if (parent.isNull()) return string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. string16 inferred_label; WebNode previous = parent.previousSibling(); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, "tr")) inferred_label = FindChildText(previous); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding div table, // e.g.
    Some Text
    // e.g.
    Some Text
    string16 InferLabelFromDivTable(const WebFormControlElement& element) { WebNode node = element.parentNode(); bool looking_for_parent = true; // Search the sibling and parent
    s until we find a candidate label. string16 inferred_label; while (inferred_label.empty() && !node.isNull()) { if (HasTagName(node, "div")) { looking_for_parent = false; inferred_label = FindChildText(node); } else if (looking_for_parent && (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { // If the element is in a table or fieldset, its label most likely is too. break; } if (node.previousSibling().isNull()) { // If there are no more siblings, continue walking up the tree. looking_for_parent = true; } if (looking_for_parent) node = node.parentNode(); else node = node.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding definition list, // e.g.
    Some Text
    // e.g.
    Some Text
    string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName("dd")) parent = parent.parentNode(); if (parent.isNull() || !HasTagName(parent, "dd")) return string16(); // Skip by any intervening text nodes. WebNode previous = parent.previousSibling(); while (!previous.isNull() && previous.isTextNode()) previous = previous.previousSibling(); if (previous.isNull() || !HasTagName(previous, "dt")) return string16(); return FindChildText(previous); } // Infers corresponding label for |element| from surrounding context in the DOM, // e.g. the contents of the preceding

    tag or text element. string16 InferLabelForElement(const WebFormControlElement& element) { string16 inferred_label = InferLabelFromPrevious(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for list item case. inferred_label = InferLabelFromListItem(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table cell case. inferred_label = InferLabelFromTableColumn(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table row case. inferred_label = InferLabelFromTableRow(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for definition list case. inferred_label = InferLabelFromDefinitionList(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for div table case. return InferLabelFromDivTable(element); } // Fills |option_strings| with the values of the