// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "config.h" #include "base/compiler_specific.h" MSVC_PUSH_WARNING_LEVEL(0); #include "Document.h" #include "DocumentType.h" #include "Element.h" #include "FrameLoader.h" #include "FrameView.h" #include "HTMLCollection.h" #include "HTMLHeadElement.h" #include "HTMLMetaElement.h" #include "HTMLNames.h" #include "KURL.h" #include "markup.h" #include "SharedBuffer.h" #include "SubstituteData.h" MSVC_POP_WARNING(); #undef LOG #include "base/file_path.h" #include "base/file_util.h" #include "base/hash_tables.h" #include "base/string_util.h" #include "net/base/net_util.h" #include "net/url_request/url_request_context.h" #include "webkit/api/public/WebData.h" #include "webkit/api/public/WebURL.h" #include "webkit/glue/dom_operations.h" #include "webkit/glue/dom_operations_private.h" #include "webkit/glue/dom_serializer.h" #include "webkit/glue/dom_serializer_delegate.h" #include "webkit/glue/glue_util.h" #include "webkit/glue/webframe_impl.h" #include "webkit/glue/webview.h" #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" #include "webkit/tools/test_shell/test_shell_test.h" namespace { class DomSerializerTests : public TestShellTest, public webkit_glue::DomSerializerDelegate { public: DomSerializerTests() : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } // DomSerializerDelegate. void DidSerializeDataForFrame(const GURL& frame_url, const std::string& data, PageSavingSerializationStatus status) { // If the all frames are finished saving, check all finish status if (status == ALL_FRAMES_ARE_FINISHED) { SerializationFinishStatusMap::iterator it = serialization_finish_status_.begin(); for (; it != serialization_finish_status_.end(); ++it) ASSERT_TRUE(it->second); serialized_ = true; return; } // Check finish status of current frame. SerializationFinishStatusMap::iterator it = serialization_finish_status_.find(frame_url.spec()); // New frame, set initial status as false. if (it == serialization_finish_status_.end()) serialization_finish_status_[frame_url.spec()] = false; it = serialization_finish_status_.find(frame_url.spec()); ASSERT_TRUE(it != serialization_finish_status_.end()); // In process frame, finish status should be false. ASSERT_FALSE(it->second); // Add data to corresponding frame's content. serialized_frame_map_[frame_url.spec()] += data; // Current frame is completed saving, change the finish status. if (status == CURRENT_FRAME_IS_FINISHED) it->second = true; } bool HasSerializedFrame(const GURL& frame_url) { return serialized_frame_map_.find(frame_url.spec()) != serialized_frame_map_.end(); } const std::string& GetSerializedContentForFrame( const GURL& frame_url) { return serialized_frame_map_[frame_url.spec()]; } // Load web page according to specific URL. void LoadPageFromURL(const GURL& page_url) { // Load the test file. test_shell_->ResetTestController(); test_shell_->LoadURL(UTF8ToWide(page_url.spec()).c_str()); test_shell_->WaitTestFinished(); } // Load web page according to input content and relative URLs within // the document. void LoadContents(const std::string& contents, const GURL& base_url, const WebCore::String encoding_info) { test_shell_->ResetTestController(); // If input encoding is empty, use UTF-8 as default encoding. if (encoding_info.isEmpty()) { test_shell_->webView()->GetMainFrame()->loadHTMLString(contents, base_url); } else { // Do not use WebFrame.LoadHTMLString because it assumes that input // html contents use UTF-8 encoding. // TODO(darin): This should use WebFrame::loadData. WebFrameImpl* web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); int len = static_cast(contents.size()); RefPtr buf( WebCore::SharedBuffer::create(contents.data(), len)); WebCore::SubstituteData subst_data( buf, WebCore::String("text/html"), encoding_info, WebCore::KURL()); WebCore::ResourceRequest request(webkit_glue::GURLToKURL(base_url), WebCore::CString()); web_frame->frame()->loader()->load(request, subst_data, false); } test_shell_->WaitTestFinished(); } // Serialize page DOM according to specific page URL. The parameter // recursive_serialization indicates whether we will serialize all // sub-frames. void SerializeDomForURL(const GURL& page_url, bool recursive_serialization) { // Find corresponding WebFrameImpl according to page_url. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), page_url); ASSERT_TRUE(web_frame != NULL); // Add input file URl to links_. links_.push_back(page_url); // Add dummy file path to local_path_. local_paths_.push_back(FilePath(FILE_PATH_LITERAL("c:\\dummy.htm"))); // Start serializing DOM. webkit_glue::DomSerializer dom_serializer(web_frame, recursive_serialization, this, links_, local_paths_, local_directory_name_); ASSERT_TRUE(dom_serializer.SerializeDom()); ASSERT_TRUE(serialized_); } private: // Map frame_url to corresponding serialized_content. typedef base::hash_map SerializedFrameContentMap; SerializedFrameContentMap serialized_frame_map_; // Map frame_url to corresponding status of serialization finish. typedef base::hash_map SerializationFinishStatusMap; SerializationFinishStatusMap serialization_finish_status_; // Flag indicates whether the process of serializing DOM is finished or not. bool serialized_; // The links_ contain dummy original URLs of all saved links. std::vector links_; // The local_paths_ contain dummy corresponding local file paths of all saved // links, which matched links_ one by one. std::vector local_paths_; // The local_directory_name_ is dummy relative path of directory which // contain all saved auxiliary files included all sub frames and resources. const FilePath local_directory_name_; protected: // testing::Test virtual void SetUp() { TestShellTest::SetUp(); serialized_ = false; } virtual void TearDown() { TestShellTest::TearDown(); } }; // Helper function for checking whether input node is META tag. Return true // means it is META element, otherwise return false. The parameter charset_info // return actual charset info if the META tag has charset declaration. bool IsMetaElement(const WebCore::Node* node, WebCore::String* charset_info) { if (!node->isHTMLElement()) return false; if (!(static_cast(node))->hasTagName( WebCore::HTMLNames::metaTag)) return false; charset_info->remove(0, charset_info->length()); const WebCore::HTMLMetaElement* meta = static_cast(node); // Check the META charset declaration. WebCore::String equiv = meta->httpEquiv(); if (equalIgnoringCase(equiv, "content-type")) { WebCore::String content = meta->content(); int pos = content.find("charset", 0, false); if (pos > -1) { // Add a dummy charset declaration to charset_info, which indicates this // META tag has charset declaration although we do not get correct value // yet. charset_info->append("has-charset-declaration"); int remaining_length = content.length() - pos - 7; if (!remaining_length) return true; const UChar* start_pos = content.characters() + pos + 7; // Find "=" symbol. while (remaining_length--) if (*start_pos++ == L'=') break; // Skip beginning space. while (remaining_length) { if (*start_pos > 0x0020) break; ++start_pos; --remaining_length; } if (!remaining_length) return true; const UChar* end_pos = start_pos; // Now we find out the start point of charset info. Search the end point. while (remaining_length--) { if (*end_pos <= 0x0020 || *end_pos == L';') break; ++end_pos; } // Get actual charset info. *charset_info = WebCore::String(start_pos, static_cast(end_pos - start_pos)); return true; } } return true; } // If original contents have document type, the serialized contents also have // document type. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("youtube_1.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Make sure original contents have document type. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->doctype() != NULL); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); // Make sure serialized contents still have document type. web_frame = static_cast(test_shell_->webView()->GetMainFrame()); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->doctype() != NULL); } // If original contents do not have document type, the serialized contents // also do not have document type. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("youtube_2.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Make sure original contents do not have document type. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->doctype() == NULL); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); // Make sure serialized contents do not have document type. web_frame = static_cast(test_shell_->webView()->GetMainFrame()); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->doctype() == NULL); } // Serialize XML document which has all 5 built-in entities. After // finishing serialization, the serialized contents should be same // with original XML document. TEST_F(DomSerializerTests, SerialzeXMLDocWithBuiltInEntities) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("note.xml"); // Read original contents for later comparison. std::string original_contents; ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Do serialization. SerializeDomForURL(file_url, false); // Compare the serialized contents with original contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); ASSERT_EQ(original_contents, serialized_contents); } // When serializing DOM, we add MOTW declaration before html tag. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithAddingMOTW) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("youtube_2.htm"); // Read original contents for later comparison . std::string original_contents; ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Make sure original contents does not have MOTW; std::string motw_declaration = webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); ASSERT_FALSE(motw_declaration.empty()); // The encoding of original contents is ISO-8859-1, so we convert the MOTW // declaration to ASCII and search whether original contents has it or not. ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); // Load the test file. LoadPageFromURL(file_url); // Do serialization. SerializeDomForURL(file_url, false); // Make sure the serialized contents have MOTW ; ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); ASSERT_FALSE(std::string::npos == serialized_contents.find(motw_declaration)); } // When serializing DOM, we will add the META which have correct charset // declaration as first child of HEAD element for resolving WebKit bug: // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document // does not have META charset declaration. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("youtube_1.htm"); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Make sure there is no META charset declaration in original document. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLHeadElement* head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); // Go through all children of HEAD element. WebCore::String charset_info; for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; child = child->nextSibling()) if (IsMetaElement(child, &charset_info)) ASSERT_TRUE(charset_info.isEmpty()); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); // Make sure the first child of HEAD element is META which has charset // declaration in serialized contents. web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); WebCore::Node* meta_node = head_ele->firstChild(); ASSERT_TRUE(meta_node != NULL); // Get meta charset info. ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); ASSERT_TRUE(!charset_info.isEmpty()); ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); // Make sure no more additional META tags which have charset declaration. for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; child = child->nextSibling()) if (IsMetaElement(child, &charset_info)) ASSERT_TRUE(charset_info.isEmpty()); } // When serializing DOM, if the original document has multiple META charset // declaration, we will add the META which have correct charset declaration // as first child of HEAD element and remove all original META charset // declarations. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("youtube_2.htm"); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Make sure there are multiple META charset declarations in original // document. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLHeadElement* head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); // Go through all children of HEAD element. int charset_declaration_count = 0; WebCore::String charset_info; for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; child = child->nextSibling()) { if (IsMetaElement(child, &charset_info) && !charset_info.isEmpty()) charset_declaration_count++; } // The original doc has more than META tags which have charset declaration. ASSERT(charset_declaration_count > 1); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); // Make sure only first child of HEAD element is META which has charset // declaration in serialized contents. web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); WebCore::Node* meta_node = head_ele->firstChild(); ASSERT_TRUE(meta_node != NULL); // Get meta charset info. ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); ASSERT_TRUE(!charset_info.isEmpty()); ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); // Make sure no more additional META tags which have charset declaration. for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; child = child->nextSibling()) if (IsMetaElement(child, &charset_info)) ASSERT_TRUE(charset_info.isEmpty()); } // Test situation of html entities in text when serializing HTML DOM. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII( "dom_serializer/htmlentities_in_text.htm"); // Get file URL. The URL is dummy URL to identify the following loading // actions. The test content is in constant:original_contents. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Test contents. static const char* const original_contents = "&<>\"\'"; // Load the test contents. LoadContents(original_contents, file_url, ""); // Get BODY's text content in DOM. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLElement* body_ele = doc->body(); ASSERT_TRUE(body_ele != NULL); WebCore::Node* text_node = body_ele->firstChild(); ASSERT_TRUE(text_node->isTextNode()); ASSERT_TRUE(createMarkup(text_node) == "&<>\"\'"); // Do serialization. SerializeDomForURL(file_url, false); // Compare the serialized contents with original contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); // Compare the serialized contents with original contents to make sure // they are same. // Because we add MOTW when serializing DOM, so before comparison, we also // need to add MOTW to original_contents. std::string original_str = webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); original_str += original_contents; // Since WebCore now inserts a new HEAD element if there is no HEAD element // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) // We need to append the HEAD content and corresponding META content if we // find WebCore-generated HEAD element. if (doc->head()) { WebCore::String encoding = web_frame->frame()->loader()->encoding(); std::string htmlTag(""); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part(""); head_part += WideToASCII( webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration( webkit_glue::StringToStdWString(encoding))); head_part += ""; original_str.insert(pos, head_part); } ASSERT_EQ(original_str, serialized_contents); } // Test situation of html entities in attribute value when serializing // HTML DOM. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII( "dom_serializer/htmlentities_in_attribute_value.htm"); // Get file URL. The URL is dummy URL to identify the following loading // actions. The test content is in constant:original_contents. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Test contents. static const char* const original_contents = ""; // Load the test contents. LoadContents(original_contents, file_url, ""); // Get value of BODY's title attribute in DOM. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLElement* body_ele = doc->body(); ASSERT_TRUE(body_ele != NULL); const WebCore::String& value = body_ele->getAttribute( WebCore::HTMLNames::titleAttr); ASSERT_TRUE(value == WebCore::String("&<>\"\'")); // Do serialization. SerializeDomForURL(file_url, false); // Compare the serialized contents with original contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); // Compare the serialized contents with original contents to make sure // they are same. std::string original_str = webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); original_str += original_contents; if (doc->head()) { WebCore::String encoding = web_frame->frame()->loader()->encoding(); std::string htmlTag(""); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part(""); head_part += WideToASCII( webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration( webkit_glue::StringToStdWString(encoding))); head_part += ""; original_str.insert(pos, head_part); } ASSERT_EQ(original_str, serialized_contents); } // Test situation of non-standard HTML entities when serializing HTML DOM. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNonStandardEntities) { // Make a test file URL and load it. FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); LoadPageFromURL(file_url); // Get value of BODY's title attribute in DOM. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLElement* body_ele = doc->body(); // Unescaped string for "%⊅&supl;'". static const UChar parsed_value[] = { '%', 0x2285, 0x00b9, '\'', 0 }; const WebCore::String& value = body_ele->getAttribute( WebCore::HTMLNames::titleAttr); ASSERT_TRUE(value == WebCore::String(parsed_value)); // Check the BODY content. WebCore::Node* text_node = body_ele->firstChild(); ASSERT_TRUE(text_node->isTextNode()); ASSERT_TRUE(text_node->nodeValue() == WebCore::String(parsed_value)); // Do serialization. SerializeDomForURL(file_url, false); // Check the serialized string. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); // Confirm that the serialized string has no non-standard HTML entities. ASSERT_EQ(std::string::npos, serialized_contents.find("%")); ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); ASSERT_EQ(std::string::npos, serialized_contents.find("&supl;")); ASSERT_EQ(std::string::npos, serialized_contents.find("'")); } // Test situation of BASE tag in original document when serializing HTML DOM. // When serializing, we should comment the BASE tag, append a new BASE tag. // rewrite all the savable URLs to relative local path, and change other URLs // to absolute URLs. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { // There are total 2 available base tags in this test file. const int kTotalBaseTagCountInTestFile = 2; FilePath page_file_path = data_dir_.AppendASCII("dom_serializer"); file_util::EnsureEndsWithSeparator(&page_file_path); // Get page dir URL which is base URL of this file. GURL path_dir_url = net::FilePathToFileURL(page_file_path); // Get file path. page_file_path = page_file_path.AppendASCII("html_doc_has_base_tag.htm"); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. LoadPageFromURL(file_url); // Since for this test, we assume there is no savable sub-resource links for // this test file, also all links are relative URLs in this test file, so we // need to check those relative URLs and make sure document has BASE tag. WebFrameImpl* web_frame = webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); // Go through all descent nodes. RefPtr all = doc->all(); int original_base_tag_count = 0; for (WebCore::Node* node = all->firstItem(); node != NULL; node = all->nextItem()) { if (!node->isHTMLElement()) continue; WebCore::Element* element = static_cast(node); if (element->hasTagName(WebCore::HTMLNames::baseTag)) { original_base_tag_count++; } else { // Get link. const WebCore::AtomicString* value = webkit_glue::GetSubResourceLinkFromElement(element); if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); if (value->isEmpty()) value = NULL; } // Each link is relative link. if (value) { GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string()))); ASSERT_TRUE(link.scheme().empty()); } } } ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); // Make sure in original document, the base URL is not equal with the // |path_dir_url|. GURL original_base_url( WideToUTF8(webkit_glue::StringToStdWString(doc->baseURL()))); ASSERT_NE(original_base_url, path_dir_url); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); // Make sure all links are absolute URLs and doc there are some number of // BASE tags in serialized HTML data. Each of those BASE tags have same base // URL which is as same as URL of current test file. web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); // Go through all descent nodes. all = doc->all(); int new_base_tag_count = 0; for (WebCore::Node* node = all->firstItem(); node != NULL; node = all->nextItem()) { if (!node->isHTMLElement()) continue; WebCore::Element* element = static_cast(node); if (element->hasTagName(WebCore::HTMLNames::baseTag)) { new_base_tag_count++; } else { // Get link. const WebCore::AtomicString* value = webkit_glue::GetSubResourceLinkFromElement(element); if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); if (value->isEmpty()) value = NULL; } // Each link is absolute link. if (value) { GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string()))); ASSERT_FALSE(link.scheme().empty()); } } } // We have one more added BASE tag which is generated by JavaScript. ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); // Make sure in new document, the base URL is equal with the |path_dir_url|. GURL new_base_url( webkit_glue::StringToStdString(doc->baseURL())); ASSERT_EQ(new_base_url, path_dir_url); } // Serializing page which has an empty HEAD tag. TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEmptyHead) { FilePath page_file_path = data_dir_; page_file_path = page_file_path.AppendASCII("dom_serializer"); page_file_path = page_file_path.AppendASCII("empty_head.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test html content. static const char* const empty_head_contents = "hello world"; LoadContents(empty_head_contents, file_url, ""); // Make sure the head tag is empty. WebFrameImpl* web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); WebCore::Document* doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); WebCore::HTMLHeadElement* head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); WTF::PassRefPtr children = head_ele->children(); ASSERT_TRUE(0 == children->length()); // Do serialization. SerializeDomForURL(file_url, false); // Make sure the serialized contents have META ; ASSERT_TRUE(HasSerializedFrame(file_url)); const std::string& serialized_contents = GetSerializedContentForFrame(file_url); // Reload serialized contents and make sure there is only one META tag. LoadContents(serialized_contents, file_url, web_frame->frame()->loader()->encoding()); web_frame = static_cast(test_shell_->webView()->GetMainFrame()); ASSERT_TRUE(web_frame != NULL); doc = web_frame->frame()->document(); ASSERT_TRUE(doc->isHTMLDocument()); head_ele = doc->head(); ASSERT_TRUE(head_ele != NULL); children = head_ele->children(); ASSERT_TRUE(1 == children->length()); WebCore::Node* meta_node = head_ele->firstChild(); ASSERT_TRUE(meta_node != NULL); // Get meta charset info. WebCore::String charset_info; ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); ASSERT_TRUE(!charset_info.isEmpty()); ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); // Check the body's first node is text node and its contents are // "hello world" WebCore::HTMLElement* body_ele = doc->body(); ASSERT_TRUE(body_ele != NULL); WebCore::Node* text_node = body_ele->firstChild(); ASSERT_TRUE(text_node->isTextNode()); const WebCore::String& text_node_contents = text_node->nodeValue(); ASSERT_TRUE(text_node_contents == WebCore::String("hello world")); } } // namespace