Merge Chromium src@r53293

Change-Id: Ia79acf8670f385cee48c45b0a75371d8e950af34
author: Ben Murdoch <benm@google.com> 2010-07-29 17:14:53 +0100
committer: Ben Murdoch <benm@google.com> 2010-08-04 14:29:45 +0100
commit: c407dc5cd9bdc5668497f21b26b09d988ab439de (patch)
tree: 7eaf8707c0309516bdb042ad976feedaf72b0bb1 /webkit/glue/dom_serializer_unittest.cc
parent: 0998b1cdac5733f299c12d88bc31ef9c8035b8fa (diff)
download: external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.zip
external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.gz
external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.bz2
1 files changed, 850 insertions, 0 deletions
diff --git a/webkit/glue/dom_serializer_unittest.cc b/webkit/glue/dom_serializer_unittest.cc
new file mode 100644
index 0000000..a1846f3
--- /dev/null
+++ b/webkit/glue/dom_serializer_unittest.cc
@@ -0,0 +1,850 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/compiler_specific.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/hash_tables.h"
+#include "base/utf_string_conversions.h"
+#include "net/base/net_util.h"
+#include "net/url_request/url_request_context.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebCString.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebData.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebElement.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNode.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNodeCollection.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNodeList.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializer.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializerClient.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebString.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebURL.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebVector.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebView.h"
+#include "webkit/glue/dom_operations.h"
+#include "webkit/glue/webkit_glue.h"
+#include "webkit/tools/test_shell/simple_resource_loader_bridge.h"
+#include "webkit/tools/test_shell/test_shell_test.h"
+
+using WebKit::WebCString;
+using WebKit::WebData;
+using WebKit::WebDocument;
+using WebKit::WebElement;
+using WebKit::WebFrame;
+using WebKit::WebNode;
+using WebKit::WebNodeCollection;
+using WebKit::WebNodeList;
+using WebKit::WebPageSerializer;
+using WebKit::WebPageSerializerClient;
+using WebKit::WebNode;
+using WebKit::WebString;
+using WebKit::WebURL;
+using WebKit::WebView;
+using WebKit::WebVector;
+
+namespace {
+
+// Iterate recursively over sub-frames to find one with with a given url.
+WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) {
+  if (!web_view->mainFrame())
+    return NULL;
+
+  std::vector<WebFrame*> stack;
+  stack.push_back(web_view->mainFrame());
+
+  while (!stack.empty()) {
+    WebFrame* current_frame = stack.back();
+    stack.pop_back();
+    if (GURL(current_frame->url()) == url)
+      return current_frame;
+    WebNodeCollection all = current_frame->document().all();
+    for (WebNode node = all.firstItem();
+         !node.isNull(); node = all.nextItem()) {
+      if (!node.isElementNode())
+        continue;
+      // Check frame tag and iframe tag
+      WebElement element = node.to<WebElement>();
+      if (!element.hasTagName("frame") && !element.hasTagName("iframe"))
+        continue;
+      WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element);
+      if (sub_frame)
+        stack.push_back(sub_frame);
+    }
+  }
+  return NULL;
+}
+
+class DomSerializerTests : public TestShellTest,
+                           public WebPageSerializerClient {
+ public:
+  DomSerializerTests()
+    : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { }
+
+  // DomSerializerDelegate.
+  void didSerializeDataForFrame(const WebURL& frame_web_url,
+                                const WebCString& data,
+                                PageSerializationStatus status) {
+
+    GURL frame_url(frame_web_url);
+    // If the all frames are finished saving, check all finish status
+    if (status == WebPageSerializerClient::AllFramesAreFinished) {
+      SerializationFinishStatusMap::iterator it =
+          serialization_finish_status_.begin();
+      for (; it != serialization_finish_status_.end(); ++it)
+        ASSERT_TRUE(it->second);
+      serialized_ = true;
+      return;
+    }
+
+    // Check finish status of current frame.
+    SerializationFinishStatusMap::iterator it =
+        serialization_finish_status_.find(frame_url.spec());
+    // New frame, set initial status as false.
+    if (it == serialization_finish_status_.end())
+      serialization_finish_status_[frame_url.spec()] = false;
+
+    it = serialization_finish_status_.find(frame_url.spec());
+    ASSERT_TRUE(it != serialization_finish_status_.end());
+    // In process frame, finish status should be false.
+    ASSERT_FALSE(it->second);
+
+    // Add data to corresponding frame's content.
+    serialized_frame_map_[frame_url.spec()] += data.data();
+
+    // Current frame is completed saving, change the finish status.
+    if (status == WebPageSerializerClient::CurrentFrameIsFinished)
+      it->second = true;
+  }
+
+  bool HasSerializedFrame(const GURL& frame_url) {
+    return serialized_frame_map_.find(frame_url.spec()) !=
+           serialized_frame_map_.end();
+  }
+
+  const std::string& GetSerializedContentForFrame(
+      const GURL& frame_url) {
+    return serialized_frame_map_[frame_url.spec()];
+  }
+
+  // Load web page according to specific URL.
+  void LoadPageFromURL(const GURL& page_url) {
+    // Load the test file.
+    test_shell_->ResetTestController();
+    test_shell_->LoadURL(page_url);
+    test_shell_->WaitTestFinished();
+  }
+
+  // Load web page according to input content and relative URLs within
+  // the document.
+  void LoadContents(const std::string& contents,
+                    const GURL& base_url,
+                    const WebString encoding_info) {
+    test_shell_->ResetTestController();
+    // If input encoding is empty, use UTF-8 as default encoding.
+    if (encoding_info.isEmpty()) {
+      test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url);
+    } else {
+      WebData data(contents.data(), contents.length());
+
+      // Do not use WebFrame.LoadHTMLString because it assumes that input
+      // html contents use UTF-8 encoding.
+      // TODO(darin): This should use WebFrame::loadData.
+      WebFrame* web_frame =
+          test_shell_->webView()->mainFrame();
+
+      ASSERT_TRUE(web_frame != NULL);
+
+      web_frame->loadData(data, "text/html", encoding_info, base_url);
+    }
+
+    test_shell_->WaitTestFinished();
+  }
+
+  // Serialize page DOM according to specific page URL. The parameter
+  // recursive_serialization indicates whether we will serialize all
+  // sub-frames.
+  void SerializeDomForURL(const GURL& page_url,
+                          bool recursive_serialization) {
+    // Find corresponding WebFrame according to page_url.
+    WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(),
+                                            page_url);
+    ASSERT_TRUE(web_frame != NULL);
+    // Add input file URl to links_.
+    links_.assign(&page_url,1);
+    // Add dummy file path to local_path_.
+    WebString file_path = webkit_glue::FilePathStringToWebString(
+        FILE_PATH_LITERAL("c:\\dummy.htm"));
+    local_paths_.assign(&file_path, 1);
+    // Start serializing DOM.
+    bool result = WebPageSerializer::serialize(web_frame,
+       recursive_serialization,
+       static_cast<WebPageSerializerClient*>(this),
+       links_,
+       local_paths_,
+       webkit_glue::FilePathToWebString(local_directory_name_));
+    ASSERT_TRUE(result);
+    ASSERT_TRUE(serialized_);
+  }
+
+ private:
+  // Map frame_url to corresponding serialized_content.
+  typedef base::hash_map<std::string, std::string> SerializedFrameContentMap;
+  SerializedFrameContentMap serialized_frame_map_;
+  // Map frame_url to corresponding status of serialization finish.
+  typedef base::hash_map<std::string, bool> SerializationFinishStatusMap;
+  SerializationFinishStatusMap serialization_finish_status_;
+  // Flag indicates whether the process of serializing DOM is finished or not.
+  bool serialized_;
+  // The links_ contain dummy original URLs of all saved links.
+  WebVector<WebURL> links_;
+  // The local_paths_ contain dummy corresponding local file paths of all saved
+  // links, which matched links_ one by one.
+  WebVector<WebString> local_paths_;
+  // The local_directory_name_ is dummy relative path of directory which
+  // contain all saved auxiliary files included all sub frames and resources.
+  const FilePath local_directory_name_;
+
+ protected:
+  // testing::Test
+  virtual void SetUp() {
+    TestShellTest::SetUp();
+    serialized_ = false;
+  }
+
+  virtual void TearDown() {
+    TestShellTest::TearDown();
+  }
+};
+
+// Helper function that test whether the first node in the doc is a doc type
+// node.
+bool HasDocType(const WebDocument& doc) {
+  WebNode node = doc.firstChild();
+  if (node.isNull())
+    return false;
+  return node.nodeType() == WebNode::DocumentTypeNode;
+}
+
+// Helper function for checking whether input node is META tag. Return true
+// means it is META element, otherwise return false. The parameter charset_info
+// return actual charset info if the META tag has charset declaration.
+bool IsMetaElement(const WebNode& node, std::string& charset_info) {
+  if (!node.isElementNode())
+    return false;
+  const WebElement meta = node.toConst<WebElement>();
+  if (!meta.hasTagName("meta"))
+    return false;
+  charset_info.erase(0, charset_info.length());
+  // Check the META charset declaration.
+  WebString httpEquiv = meta.getAttribute("http-equiv");
+  if (LowerCaseEqualsASCII(httpEquiv, "content-type")) {
+    std::string content = meta.getAttribute("content").utf8();
+    int pos = content.find("charset", 0);
+    if (pos > -1) {
+      // Add a dummy charset declaration to charset_info, which indicates this
+      // META tag has charset declaration although we do not get correct value
+      // yet.
+      charset_info.append("has-charset-declaration");
+      int remaining_length = content.length() - pos - 7;
+      if (!remaining_length)
+        return true;
+      int start_pos = pos + 7;
+      // Find "=" symbol.
+      while (remaining_length--)
+        if (content[start_pos++] == L'=')
+          break;
+      // Skip beginning space.
+      while (remaining_length) {
+        if (content[start_pos] > 0x0020)
+          break;
+        ++start_pos;
+        --remaining_length;
+      }
+      if (!remaining_length)
+        return true;
+      int end_pos = start_pos;
+      // Now we find out the start point of charset info. Search the end point.
+      while (remaining_length--) {
+        if (content[end_pos] <= 0x0020 || content[end_pos] == L';')
+          break;
+        ++end_pos;
+      }
+      // Get actual charset info.
+      charset_info = content.substr(start_pos, end_pos - start_pos);
+      return true;
+    }
+  }
+  return true;
+}
+
+// If original contents have document type, the serialized contents also have
+// document type.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("youtube_1.htm");
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Make sure original contents have document type.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(HasDocType(doc));
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Load the serialized contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  LoadContents(serialized_contents, file_url,
+               web_frame->encoding());
+  // Make sure serialized contents still have document type.
+  web_frame = test_shell_->webView()->mainFrame();
+  doc = web_frame->document();
+  ASSERT_TRUE(HasDocType(doc));
+}
+
+// If original contents do not have document type, the serialized contents
+// also do not have document type.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("youtube_2.htm");
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Make sure original contents do not have document type.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(!HasDocType(doc));
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Load the serialized contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  LoadContents(serialized_contents, file_url,
+               web_frame->encoding());
+  // Make sure serialized contents do not have document type.
+  web_frame = test_shell_->webView()->mainFrame();
+  doc = web_frame->document();
+  ASSERT_TRUE(!HasDocType(doc));
+}
+
+// Serialize XML document which has all 5 built-in entities. After
+// finishing serialization, the serialized contents should be same
+// with original XML document.
+TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("note.xml");
+  // Read original contents for later comparison.
+  std::string original_contents;
+  ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents));
+  // Get file URL.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Compare the serialized contents with original contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  ASSERT_EQ(original_contents, serialized_contents);
+}
+
+// When serializing DOM, we add MOTW declaration before html tag.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("youtube_2.htm");
+  // Read original contents for later comparison .
+  std::string original_contents;
+  ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents));
+  // Get file URL.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Make sure original contents does not have MOTW;
+  std::string motw_declaration =
+     WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
+  ASSERT_FALSE(motw_declaration.empty());
+  // The encoding of original contents is ISO-8859-1, so we convert the MOTW
+  // declaration to ASCII and search whether original contents has it or not.
+  ASSERT_TRUE(std::string::npos ==
+      original_contents.find(motw_declaration));
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Make sure the serialized contents have MOTW ;
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  ASSERT_FALSE(std::string::npos ==
+      serialized_contents.find(motw_declaration));
+}
+
+// When serializing DOM, we will add the META which have correct charset
+// declaration as first child of HEAD element for resolving WebKit bug:
+// http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
+// does not have META charset declaration.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("youtube_1.htm");
+  // Get file URL.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+
+  // Make sure there is no META charset declaration in original document.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement head_element = doc.head();
+  ASSERT_TRUE(!head_element.isNull());
+  // Go through all children of HEAD element.
+  for (WebNode child = head_element.firstChild(); !child.isNull();
+       child = child.nextSibling()) {
+    std::string charset_info;
+    if (IsMetaElement(child, charset_info))
+      ASSERT_TRUE(charset_info.empty());
+  }
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+
+  // Load the serialized contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  LoadContents(serialized_contents, file_url,
+               web_frame->encoding());
+  // Make sure the first child of HEAD element is META which has charset
+  // declaration in serialized contents.
+  web_frame = test_shell_->webView()->mainFrame();
+  ASSERT_TRUE(web_frame != NULL);
+  doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  head_element = doc.head();
+  ASSERT_TRUE(!head_element.isNull());
+  WebNode meta_node = head_element.firstChild();
+  ASSERT_TRUE(!meta_node.isNull());
+  // Get meta charset info.
+  std::string charset_info2;
+  ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
+  ASSERT_TRUE(!charset_info2.empty());
+  ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8()));
+
+  // Make sure no more additional META tags which have charset declaration.
+  for (WebNode child = meta_node.nextSibling(); !child.isNull();
+       child = child.nextSibling()) {
+    std::string charset_info;
+    if (IsMetaElement(child, charset_info))
+      ASSERT_TRUE(charset_info.empty());
+  }
+}
+
+// When serializing DOM, if the original document has multiple META charset
+// declaration, we will add the META which have correct charset declaration
+// as first child of HEAD element and remove all original META charset
+// declarations.
+TEST_F(DomSerializerTests,
+       SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("youtube_2.htm");
+  // Get file URL.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+
+  // Make sure there are multiple META charset declarations in original
+  // document.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement head_ele = doc.head();
+  ASSERT_TRUE(!head_ele.isNull());
+  // Go through all children of HEAD element.
+  int charset_declaration_count = 0;
+  for (WebNode child = head_ele.firstChild(); !child.isNull();
+       child = child.nextSibling()) {
+    std::string charset_info;
+    if (IsMetaElement(child, charset_info) && !charset_info.empty())
+      charset_declaration_count++;
+  }
+  // The original doc has more than META tags which have charset declaration.
+  ASSERT_TRUE(charset_declaration_count > 1);
+
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+
+  // Load the serialized contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  LoadContents(serialized_contents, file_url,
+               web_frame->encoding());
+  // Make sure only first child of HEAD element is META which has charset
+  // declaration in serialized contents.
+  web_frame = test_shell_->webView()->mainFrame();
+  ASSERT_TRUE(web_frame != NULL);
+  doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  head_ele = doc.head();
+  ASSERT_TRUE(!head_ele.isNull());
+  WebNode meta_node = head_ele.firstChild();
+  ASSERT_TRUE(!meta_node.isNull());
+  // Get meta charset info.
+  std::string charset_info2;
+  ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
+  ASSERT_TRUE(!charset_info2.empty());
+  ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8()));
+
+  // Make sure no more additional META tags which have charset declaration.
+  for (WebNode child = meta_node.nextSibling(); !child.isNull();
+       child = child.nextSibling()) {
+    std::string charset_info;
+    if (IsMetaElement(child, charset_info))
+      ASSERT_TRUE(charset_info.empty());
+  }
+}
+
+// Test situation of html entities in text when serializing HTML DOM.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII(
+      "dom_serializer/htmlentities_in_text.htm");
+  // Get file URL. The URL is dummy URL to identify the following loading
+  // actions. The test content is in constant:original_contents.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Test contents.
+  static const char* const original_contents =
+      "<html><body>&amp;&lt;&gt;\"\'</body></html>";
+  // Load the test contents.
+  LoadContents(original_contents, file_url, WebString());
+
+  // Get BODY's text content in DOM.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement body_ele = doc.body();
+  ASSERT_TRUE(!body_ele.isNull());
+  WebNode text_node = body_ele.firstChild();
+  ASSERT_TRUE(text_node.isTextNode());
+  ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) ==
+              "&amp;&lt;&gt;\"\'");
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Compare the serialized contents with original contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  // Compare the serialized contents with original contents to make sure
+  // they are same.
+  // Because we add MOTW when serializing DOM, so before comparison, we also
+  // need to add MOTW to original_contents.
+  std::string original_str =
+    WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
+  original_str += original_contents;
+  // Since WebCore now inserts a new HEAD element if there is no HEAD element
+  // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.)
+  // We need to append the HEAD content and corresponding META content if we
+  // find WebCore-generated HEAD element.
+  if (!doc.head().isNull()) {
+    WebString encoding = web_frame->encoding();
+    std::string htmlTag("<html>");
+    std::string::size_type pos = original_str.find(htmlTag);
+    ASSERT_NE(std::string::npos, pos);
+    pos += htmlTag.length();
+    std::string head_part("<head>");
+    head_part +=
+        WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
+    head_part += "</head>";
+    original_str.insert(pos, head_part);
+  }
+  ASSERT_EQ(original_str, serialized_contents);
+}
+
+// Test situation of html entities in attribute value when serializing
+// HTML DOM.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII(
+      "dom_serializer/htmlentities_in_attribute_value.htm");
+  // Get file URL. The URL is dummy URL to identify the following loading
+  // actions. The test content is in constant:original_contents.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Test contents.
+  static const char* const original_contents =
+      "<html><body title=\"&amp;&lt;&gt;&quot;&#39;\"></body></html>";
+  // Load the test contents.
+  LoadContents(original_contents, file_url, WebString());
+  // Get value of BODY's title attribute in DOM.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement body_ele = doc.body();
+  ASSERT_TRUE(!body_ele.isNull());
+  WebString value = body_ele.getAttribute("title");
+  ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'");
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Compare the serialized contents with original contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  // Compare the serialized contents with original contents to make sure
+  // they are same.
+  std::string original_str =
+      WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
+  original_str += original_contents;
+  if (!doc.isNull()) {
+    WebString encoding = web_frame->encoding();
+    std::string htmlTag("<html>");
+    std::string::size_type pos = original_str.find(htmlTag);
+    ASSERT_NE(std::string::npos, pos);
+    pos += htmlTag.length();
+    std::string head_part("<head>");
+    head_part +=
+        WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
+    head_part += "</head>";
+    original_str.insert(pos, head_part);
+  }
+  ASSERT_EQ(original_str, serialized_contents);
+}
+
+// Test situation of non-standard HTML entities when serializing HTML DOM.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) {
+  // Make a test file URL and load it.
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm");
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  LoadPageFromURL(file_url);
+
+  // Get value of BODY's title attribute in DOM.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement body_element = doc.body();
+  // Unescaped string for "&percnt;&nsup;&supl;&apos;".
+  static const wchar_t parsed_value[] = {
+    '%', 0x2285, 0x00b9, '\'', 0
+  };
+  WebString value = body_element.getAttribute("title");
+  ASSERT_TRUE(UTF16ToWide(value) == parsed_value);
+  ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value);
+
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Check the serialized string.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  // Confirm that the serialized string has no non-standard HTML entities.
+  ASSERT_EQ(std::string::npos, serialized_contents.find("&percnt;"));
+  ASSERT_EQ(std::string::npos, serialized_contents.find("&nsup;"));
+  ASSERT_EQ(std::string::npos, serialized_contents.find("&supl;"));
+  ASSERT_EQ(std::string::npos, serialized_contents.find("&apos;"));
+}
+
+// Test situation of BASE tag in original document when serializing HTML DOM.
+// When serializing, we should comment the BASE tag, append a new BASE tag.
+// rewrite all the savable URLs to relative local path, and change other URLs
+// to absolute URLs.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) {
+  // There are total 2 available base tags in this test file.
+  const int kTotalBaseTagCountInTestFile = 2;
+
+  FilePath page_file_path = data_dir_.AppendASCII("dom_serializer");
+  file_util::EnsureEndsWithSeparator(&page_file_path);
+
+  // Get page dir URL which is base URL of this file.
+  GURL path_dir_url = net::FilePathToFileURL(page_file_path);
+  // Get file path.
+  page_file_path =
+      page_file_path.AppendASCII("html_doc_has_base_tag.htm");
+  // Get file URL.
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Since for this test, we assume there is no savable sub-resource links for
+  // this test file, also all links are relative URLs in this test file, so we
+  // need to check those relative URLs and make sure document has BASE tag.
+  WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  // Go through all descent nodes.
+  WebNodeCollection all = doc.all();
+  int original_base_tag_count = 0;
+  for (WebNode node = all.firstItem(); !node.isNull();
+       node = all.nextItem()) {
+    if (!node.isElementNode())
+      continue;
+    WebElement element = node.to<WebElement>();
+    if (element.hasTagName("base")) {
+      original_base_tag_count++;
+    } else {
+      // Get link.
+      WebString value =
+          webkit_glue::GetSubResourceLinkFromElement(element);
+      if (value.isNull() && element.hasTagName("a")) {
+        value = element.getAttribute("href");
+        if (value.isEmpty())
+          value = WebString();
+      }
+      // Each link is relative link.
+      if (!value.isNull()) {
+        GURL link(value.utf8());
+        ASSERT_TRUE(link.scheme().empty());
+      }
+    }
+  }
+  ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
+  // Make sure in original document, the base URL is not equal with the
+  // |path_dir_url|.
+  GURL original_base_url(doc.baseURL());
+  ASSERT_NE(original_base_url, path_dir_url);
+
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+
+  // Load the serialized contents.
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+  LoadContents(serialized_contents, file_url,
+               web_frame->encoding());
+
+  // Make sure all links are absolute URLs and doc there are some number of
+  // BASE tags in serialized HTML data. Each of those BASE tags have same base
+  // URL which is as same as URL of current test file.
+  web_frame = test_shell_->webView()->mainFrame();
+  ASSERT_TRUE(web_frame != NULL);
+  doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  // Go through all descent nodes.
+  all = doc.all();
+  int new_base_tag_count = 0;
+  for (WebNode node = all.firstItem(); !node.isNull();
+       node = all.nextItem()) {
+    if (!node.isElementNode())
+      continue;
+    WebElement element = node.to<WebElement>();
+    if (element.hasTagName("base")) {
+      new_base_tag_count++;
+    } else {
+      // Get link.
+      WebString value =
+          webkit_glue::GetSubResourceLinkFromElement(element);
+      if (value.isNull() && element.hasTagName("a")) {
+        value = element.getAttribute("href");
+        if (value.isEmpty())
+          value = WebString();
+      }
+      // Each link is absolute link.
+      if (!value.isNull()) {
+        GURL link(std::string(value.utf8()));
+        ASSERT_FALSE(link.scheme().empty());
+      }
+    }
+  }
+  // We have one more added BASE tag which is generated by JavaScript.
+  ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1);
+  // Make sure in new document, the base URL is equal with the |path_dir_url|.
+  GURL new_base_url(doc.baseURL());
+  ASSERT_EQ(new_base_url, path_dir_url);
+}
+
+// Serializing page which has an empty HEAD tag.
+TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("empty_head.htm");
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+
+  // Load the test html content.
+  static const char* const empty_head_contents =
+    "<html><head></head><body>hello world</body></html>";
+  LoadContents(empty_head_contents, file_url, WebString());
+
+  // Make sure the head tag is empty.
+  WebFrame* web_frame = test_shell_->webView()->mainFrame();
+  ASSERT_TRUE(web_frame != NULL);
+  WebDocument doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  WebElement head_element = doc.head();
+  ASSERT_TRUE(!head_element.isNull());
+  ASSERT_TRUE(!head_element.hasChildNodes());
+  ASSERT_TRUE(head_element.childNodes().length() == 0);
+
+  // Do serialization.
+  SerializeDomForURL(file_url, false);
+  // Make sure the serialized contents have META ;
+  ASSERT_TRUE(HasSerializedFrame(file_url));
+  const std::string& serialized_contents =
+      GetSerializedContentForFrame(file_url);
+
+  // Reload serialized contents and make sure there is only one META tag.
+  LoadContents(serialized_contents, file_url, web_frame->encoding());
+  web_frame = test_shell_->webView()->mainFrame();
+  ASSERT_TRUE(web_frame != NULL);
+  doc = web_frame->document();
+  ASSERT_TRUE(doc.isHTMLDocument());
+  head_element = doc.head();
+  ASSERT_TRUE(!head_element.isNull());
+  ASSERT_TRUE(head_element.hasChildNodes());
+  ASSERT_TRUE(head_element.childNodes().length() == 1);
+  WebNode meta_node = head_element.firstChild();
+  ASSERT_TRUE(!meta_node.isNull());
+  // Get meta charset info.
+  std::string charset_info;
+  ASSERT_TRUE(IsMetaElement(meta_node, charset_info));
+  ASSERT_TRUE(!charset_info.empty());
+  ASSERT_TRUE(charset_info == std::string(web_frame->encoding().utf8()));
+
+  // Check the body's first node is text node and its contents are
+  // "hello world"
+  WebElement body_element = doc.body();
+  ASSERT_TRUE(!body_element.isNull());
+  WebNode text_node = body_element.firstChild();
+  ASSERT_TRUE(text_node.isTextNode());
+  WebString text_node_contents = text_node.nodeValue();
+  ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world");
+}
+
+// Test that we don't crash when the page contains an iframe that
+// was handled as a download (http://crbug.com/42212).
+TEST_F(DomSerializerTests, SerializeDocumentWithDownloadedIFrame) {
+  FilePath page_file_path = data_dir_;
+  page_file_path = page_file_path.AppendASCII("dom_serializer");
+  page_file_path = page_file_path.AppendASCII("iframe-src-is-exe.htm");
+  GURL file_url = net::FilePathToFileURL(page_file_path);
+  ASSERT_TRUE(file_url.SchemeIsFile());
+  // Load the test file.
+  LoadPageFromURL(file_url);
+  // Do a recursive serialization. We pass if we don't crash.
+  SerializeDomForURL(file_url, true);
+}
+
+}  // namespace
author	Ben Murdoch <benm@google.com>	2010-07-29 17:14:53 +0100
committer	Ben Murdoch <benm@google.com>	2010-08-04 14:29:45 +0100
commit	c407dc5cd9bdc5668497f21b26b09d988ab439de (patch)
tree	7eaf8707c0309516bdb042ad976feedaf72b0bb1 /webkit/glue/dom_serializer_unittest.cc
parent	0998b1cdac5733f299c12d88bc31ef9c8035b8fa (diff)
download	external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.zip external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.gz external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.bz2