diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:20:51 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-27 00:20:51 +0000 |
commit | f5b16fed647e941aa66933178da85db2860d639b (patch) | |
tree | f00e9856c04aad3b558a140955e7674add33f051 /webkit/glue/dom_serializer_unittest.cc | |
parent | 920c091ac3ee15079194c82ae8a7a18215f3f23c (diff) | |
download | chromium_src-f5b16fed647e941aa66933178da85db2860d639b.zip chromium_src-f5b16fed647e941aa66933178da85db2860d639b.tar.gz chromium_src-f5b16fed647e941aa66933178da85db2860d639b.tar.bz2 |
Add webkit to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@18 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit/glue/dom_serializer_unittest.cc')
-rw-r--r-- | webkit/glue/dom_serializer_unittest.cc | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/webkit/glue/dom_serializer_unittest.cc b/webkit/glue/dom_serializer_unittest.cc new file mode 100644 index 0000000..afc4db0 --- /dev/null +++ b/webkit/glue/dom_serializer_unittest.cc @@ -0,0 +1,702 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <hash_map> + +#include "config.h" + +#pragma warning(push, 0) +#include "Document.h" +#include "DocumentType.h" +#include "Element.h" +#include "FrameLoader.h" +#include "FrameView.h" +#include "HTMLHeadElement.h" +#include "HTMLMetaElement.h" +#include "HTMLNames.h" +#include "KURL.h" +#include "SharedBuffer.h" +#include "SubstituteData.h" +#pragma warning(pop) +#undef LOG + +#include "base/file_util.h" +#include "base/string_util.h" +#include "net/base/net_util.h" +#include "net/url_request/url_request_context.h" +#include "webkit/glue/dom_operations.h" +#include "webkit/glue/dom_serializer.h" +#include "webkit/glue/dom_serializer_delegate.h" +#include "webkit/glue/webview.h" +#include "webkit/glue/webframe.h" +#include "webkit/glue/webframe_impl.h" +#include "webkit/tools/test_shell/simple_resource_loader_bridge.h" +#include "webkit/tools/test_shell/test_shell_test.h" + +namespace { + +class DomSerializerTests : public TestShellTest, + public webkit_glue::DomSerializerDelegate { + public: + DomSerializerTests() + : local_directory_name_(L"./dummy_files/") { } + + // DomSerializerDelegate. + void DidSerializeDataForFrame(const GURL& frame_url, + const std::string& data, PageSavingSerializationStatus status) { + // If the all frames are finished saving, check all finish status + if (status == ALL_FRAMES_ARE_FINISHED) { + SerializationFinishStatusMap::iterator it = + serialization_finish_status_.begin(); + for (; it != serialization_finish_status_.end(); ++it) + ASSERT_TRUE(it->second); + serialized_ = true; + return; + } + + std::wstring current_frame_url = UTF8ToWide(frame_url.spec()); + // Check finish status of current frame. + SerializationFinishStatusMap::iterator it = + serialization_finish_status_.find(current_frame_url); + // New frame, set initial status as false. + if (it == serialization_finish_status_.end()) + serialization_finish_status_[current_frame_url] = false; + + it = serialization_finish_status_.find(current_frame_url); + ASSERT_TRUE(it != serialization_finish_status_.end()); + // In process frame, finish status should be false. + ASSERT_FALSE(it->second); + + // Add data to corresponding frame's content. + serialized_frame_map_[current_frame_url] += data; + + // Current frame is completed saving, change the finish status. + if (status == CURRENT_FRAME_IS_FINISHED) + it->second = true; + } + + bool HasSerializedFrame(const std::wstring& frame_url) { + return serialized_frame_map_.find(frame_url) != + serialized_frame_map_.end(); + } + + const std::string& GetSerializedContentForFrame( + const std::wstring& frame_url) { + return serialized_frame_map_[frame_url]; + } + + // Load web page according to specific URL. + void LoadPageFromURL(const std::wstring& page_url) { + // Load the test file. + test_shell_->ResetTestController(); + test_shell_->LoadURL(page_url.c_str()); + test_shell_->WaitTestFinished(); + } + + // Load web page according to input content and relative URLs within + // the document. + void LoadContents(const std::string& contents, + const GURL& base_url, + const WebCore::String encoding_info) { + test_shell_->ResetTestController(); + // If input encoding is empty, use UTF-8 as default encoding. + if (encoding_info.isEmpty()) { + test_shell_->webView()->GetMainFrame()->LoadHTMLString(contents, + base_url); + } else { + // Do not use WebFrame.LoadHTMLString because it assumes that input + // html contents use UTF-8 encoding. + WebFrameImpl* web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + ASSERT_TRUE(web_frame != NULL); + int len = static_cast<int>(contents.size()); + RefPtr<WebCore::SharedBuffer> buf( + new WebCore::SharedBuffer(contents.data(), len)); + + WebCore::SubstituteData subst_data( + buf, WebCore::String("text/html"), encoding_info, WebCore::KURL()); + WebCore::ResourceRequest request(webkit_glue::GURLToKURL(base_url), + WebCore::CString()); + web_frame->frame()->loader()->load(request, subst_data); + } + + test_shell_->WaitTestFinished(); + } + + // Serialize page DOM according to specific page URL. The parameter + // recursive_serialization indicates whether we will serialize all + // sub-frames. + void SerializeDomForURL(const std::wstring& page_url, + bool recursive_serialization) { + // Find corresponding WebFrameImpl according to page_url. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), GURL(page_url)); + ASSERT_TRUE(web_frame != NULL); + // Add input file URl to links_. + links_.push_back(page_url); + // Add dummy file path to local_path_. + local_paths_.push_back(std::wstring(L"c:\\dummy.htm")); + // Start serializing DOM. + webkit_glue::DomSerializer dom_serializer(web_frame, + recursive_serialization, this, links_, local_paths_, + local_directory_name_); + ASSERT_TRUE(dom_serializer.SerializeDom()); + ASSERT_TRUE(serialized_); + } + + private: + // Map frame_url to corresponding serialized_content. + typedef stdext::hash_map<std::wstring, std::string> SerializedFrameContentMap; + SerializedFrameContentMap serialized_frame_map_; + // Map frame_url to corresponding status of serialization finish. + typedef stdext::hash_map<std::wstring, bool> SerializationFinishStatusMap; + SerializationFinishStatusMap serialization_finish_status_; + // Flag indicates whether the process of serializing DOM is finished or not. + bool serialized_; + // The links_ contain dummy original URLs of all saved links. + std::vector<std::wstring> links_; + // The local_paths_ contain dummy corresponding local file paths of all saved + // links, which matched links_ one by one. + std::vector<std::wstring> local_paths_; + // The local_directory_name_ is dummy relative path of directory which + // contain all saved auxiliary files included all sub frames and resources. + const std::wstring local_directory_name_; + + protected: + // testing::Test + virtual void SetUp() { + TestShellTest::SetUp(); + serialized_ = false; + } + + virtual void TearDown() { + TestShellTest::TearDown(); + } +}; + +// Helper function for checking whether input node is META tag. Return true +// means it is META element, otherwise return false. The parameter charset_info +// return actual charset info if the META tag has charset declaration. +bool IsMetaElement(const WebCore::Node* node, WebCore::String* charset_info) { + if (!node->isHTMLElement()) + return false; + if (!(static_cast<const WebCore::HTMLElement*>(node))->hasTagName( + WebCore::HTMLNames::metaTag)) + return false; + charset_info->remove(0, charset_info->length()); + const WebCore::HTMLMetaElement* meta = + static_cast<const WebCore::HTMLMetaElement*>(node); + // Check the META charset declaration. + WebCore::String equiv = meta->httpEquiv(); + if (equalIgnoringCase(equiv, "content-type")) { + WebCore::String content = meta->content(); + int pos = content.find("charset", 0, false); + if (pos > -1) { + // Add a dummy charset declaration to charset_info, which indicates this + // META tag has charset declaration although we do not get correct value + // yet. + charset_info->append("has-charset-declaration"); + int remaining_length = content.length() - pos - 7; + if (!remaining_length) + return true; + const UChar* start_pos = content.characters() + pos + 7; + // Find "=" symbol. + while (remaining_length--) + if (*start_pos++ == L'=') + break; + // Skip beginning space. + while (remaining_length) { + if (*start_pos > 0x0020) + break; + ++start_pos; + --remaining_length; + } + if (!remaining_length) + return true; + const UChar* end_pos = start_pos; + // Now we find out the start point of charset info. Search the end point. + while (remaining_length--) { + if (*end_pos <= 0x0020 || *end_pos == L';') + break; + ++end_pos; + } + // Get actual charset info. + *charset_info = WebCore::String(start_pos, + static_cast<unsigned>(end_pos - start_pos)); + return true; + } + } + return true; +} + +} // namespace + +// If original contents have document type, the serialized contents also have +// document type. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/youtube_1.htm"); + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Make sure original contents have document type. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->doctype() != NULL); + // Do serialization. + SerializeDomForURL(page_url, false); + // Load the serialized contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + LoadContents(serialized_contents, GURL(page_url), + web_frame->frame()->loader()->encoding()); + // Make sure serialized contents still have document type. + web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->doctype() != NULL); +} + +// If original contents do not have document type, the serialized contents +// also do not have document type. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/youtube_2.htm"); + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Make sure original contents do not have document type. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->doctype() == NULL); + // Do serialization. + SerializeDomForURL(page_url, false); + // Load the serialized contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + LoadContents(serialized_contents, GURL(page_url), + web_frame->frame()->loader()->encoding()); + // Make sure serialized contents do not have document type. + web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->doctype() == NULL); +} + +// Serialize XML document which has all 5 built-in entities. After +// finishing serialization, the serialized contents should be same +// with original XML document. +TEST_F(DomSerializerTests, SerialzeXMLDocWithBuiltInEntities) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/note.xml"); + // Read original contents for later comparison. + std::string orginal_contents; + ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &orginal_contents)); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Do serialization. + SerializeDomForURL(page_url, false); + // Compare the serialized contents with original contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + ASSERT_EQ(serialized_contents, orginal_contents); +} + +// When serializing DOM, we add MOTW declaration before html tag. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithAddingMOTW) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/youtube_2.htm"); + // Read original contents for later comparison . + std::string orginal_contents; + ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &orginal_contents)); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Make sure original contents does not have MOTW; + std::wstring motw_declaration = + webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(page_url); + ASSERT_FALSE(motw_declaration.empty()); + // The encoding of original contents is ISO-8859-1, so we convert the MOTW + // declaration to ASCII and search whether original contents has it or not. + ASSERT_TRUE(std::wstring::npos == + orginal_contents.find(WideToASCII(motw_declaration))); + // Load the test file. + LoadPageFromURL(page_url); + // Do serialization. + SerializeDomForURL(page_url, false); + // Make sure the serialized contents have MOTW ; + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + ASSERT_TRUE(std::wstring::npos != + serialized_contents.find(WideToUTF8(motw_declaration))); +} + +// When serializing DOM, we will add the META which have correct charset +// declaration as first child of HEAD element for resolving WebKit bug: +// http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document +// does not have META charset declaration. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/youtube_1.htm"); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + + // Make sure there is no META charset declaration in original document. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + WebCore::HTMLHeadElement* head_ele = doc->head(); + ASSERT_TRUE(head_ele != NULL); + // Go through all children of HEAD element. + WebCore::String charset_info; + for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; + child = child->nextSibling()) + if (IsMetaElement(child, &charset_info)) + ASSERT_TRUE(charset_info.isEmpty()); + + // Do serialization. + SerializeDomForURL(page_url, false); + + // Load the serialized contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + LoadContents(serialized_contents, GURL(page_url), + web_frame->frame()->loader()->encoding()); + // Make sure the first child of HEAD element is META which has charset + // declaration in serialized contents. + web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + ASSERT_TRUE(web_frame != NULL); + doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + head_ele = doc->head(); + ASSERT_TRUE(head_ele != NULL); + WebCore::Node* meta_node = head_ele->firstChild(); + ASSERT_TRUE(meta_node != NULL); + // Get meta charset info. + ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); + ASSERT_TRUE(!charset_info.isEmpty()); + ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); + + // Make sure no more additional META tags which have charset declaration. + for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; + child = child->nextSibling()) + if (IsMetaElement(child, &charset_info)) + ASSERT_TRUE(charset_info.isEmpty()); +} + +// When serializing DOM, if the original document has multiple META charset +// declaration, we will add the META which have correct charset declaration +// as first child of HEAD element and remove all original META charset +// declarations. +TEST_F(DomSerializerTests, + SerialzeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer/youtube_2.htm"); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + + // Make sure there are multiple META charset declarations in original + // document. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + WebCore::HTMLHeadElement* head_ele = doc->head(); + ASSERT_TRUE(head_ele != NULL); + // Go through all children of HEAD element. + int charset_declaration_count = 0; + WebCore::String charset_info; + for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; + child = child->nextSibling()) { + if (IsMetaElement(child, &charset_info) && !charset_info.isEmpty()) + charset_declaration_count++; + } + // The original doc has more than META tags which have charset declaration. + ASSERT(charset_declaration_count > 1); + + // Do serialization. + SerializeDomForURL(page_url, false); + + // Load the serialized contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + LoadContents(serialized_contents, GURL(page_url), + web_frame->frame()->loader()->encoding()); + // Make sure only first child of HEAD element is META which has charset + // declaration in serialized contents. + web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + ASSERT_TRUE(web_frame != NULL); + doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + head_ele = doc->head(); + ASSERT_TRUE(head_ele != NULL); + WebCore::Node* meta_node = head_ele->firstChild(); + ASSERT_TRUE(meta_node != NULL); + // Get meta charset info. + ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); + ASSERT_TRUE(!charset_info.isEmpty()); + ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); + + // Make sure no more additional META tags which have charset declaration. + for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; + child = child->nextSibling()) + if (IsMetaElement(child, &charset_info)) + ASSERT_TRUE(charset_info.isEmpty()); +} + +// Test situation of html entities in text when serializing HTML DOM. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, + L"dom_serializer/htmlentities_in_text.htm"); + // Read original contents for later comparison . + std::string orginal_contents; + ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &orginal_contents)); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Get BODY's text content in DOM. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + WebCore::HTMLElement* body_ele = doc->body(); + ASSERT_TRUE(body_ele != NULL); + WebCore::Node* text_node = body_ele->firstChild(); + ASSERT_TRUE(text_node->isTextNode()); + ASSERT_TRUE(text_node->toString() == WebCore::String("&<>\"\'")); + // Do serialization. + SerializeDomForURL(page_url, false); + // Compare the serialized contents with original contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + // Because we add MOTW when serializing DOM, so before comparison, we also + // need to add MOTW to original_contents. + std::wstring motw_declaration = + webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(page_url); + orginal_contents = WideToASCII(motw_declaration) + orginal_contents; + ASSERT_EQ(serialized_contents, orginal_contents); +} + +// Test situation of html entities in attribute value when serializing +// HTML DOM. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) { + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, + L"dom_serializer/htmlentities_in_attribute_value.htm"); + // Read original contents for later comparison. + std::string orginal_contents; + ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &orginal_contents)); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Get value of BODY's title attribute in DOM. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + WebCore::HTMLElement* body_ele = doc->body(); + ASSERT_TRUE(body_ele != NULL); + const WebCore::String& value = body_ele->getAttribute( + WebCore::HTMLNames::titleAttr); + ASSERT_TRUE(value == WebCore::String("&<>\"\'")); + // Do serialization. + SerializeDomForURL(page_url, false); + // Compare the serialized contents with original contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + // Because we add MOTW when serializing DOM, so before comparison, we also + // need to add MOTW to original_contents. + std::wstring motw_declaration = + webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(page_url); + orginal_contents = WideToASCII(motw_declaration) + orginal_contents; + ASSERT_EQ(serialized_contents, orginal_contents); +} + +// Test situation of BASE tag in original document when serializing HTML DOM. +// When serializing, we should comment the BASE tag, append a new BASE tag. +// rewrite all the savable URLs to relative local path, and change other URLs +// to absolute URLs. +TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { + // There are total 2 available base tags in this test file. + const int kTotalBaseTagCountInTestFile = 2; + std::wstring page_file_path = data_dir_; + file_util::AppendToPath(&page_file_path, L"dom_serializer\\"); + // Get page dir URL which is base URL of this file. + GURL path_dir_url = net_util::FilePathToFileURL(page_file_path); + // Get file path. + file_util::AppendToPath(&page_file_path, L"html_doc_has_base_tag.htm"); + // Get file URL. + GURL file_url = net_util::FilePathToFileURL(page_file_path); + ASSERT_TRUE(file_url.SchemeIsFile()); + std::wstring page_url = ASCIIToWide(file_url.spec()); + // Load the test file. + LoadPageFromURL(page_url); + // Since for this test, we assume there is no savable sub-resource links for + // this test file, also all links are relative URLs in this test file, so we + // need to check those relative URLs and make sure document has BASE tag. + WebFrameImpl* web_frame = + webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( + test_shell_->webView(), file_url); + ASSERT_TRUE(web_frame != NULL); + WebCore::Document* doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + // Go through all descent nodes. + RefPtr<WebCore::HTMLCollection> all = doc->all(); + int original_base_tag_count = 0; + for (WebCore::Node* node = all->firstItem(); node != NULL; + node = all->nextItem()) { + if (!node->isHTMLElement()) + continue; + WebCore::Element* element = static_cast<WebCore::Element*>(node); + if (element->hasTagName(WebCore::HTMLNames::baseTag)) { + original_base_tag_count++; + } else { + // Get link. + const WebCore::AtomicString* value = + webkit_glue::GetSubResourceLinkFromElement(element); + if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { + value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); + if (value->domString().isEmpty()) + value = NULL; + } + // Each link is relative link. + if (value) { + GURL link(webkit_glue::StringToStdWString(value->domString()).c_str()); + ASSERT_TRUE(link.scheme().empty()); + } + } + } + ASSERT_TRUE(original_base_tag_count == kTotalBaseTagCountInTestFile); + // Make sure in original document, the base URL is not equal with the + // |path_dir_url|. + GURL original_base_url( + webkit_glue::DeprecatedStringToStdWString(doc->baseURL()).c_str()); + ASSERT_TRUE(original_base_url != path_dir_url); + + // Do serialization. + SerializeDomForURL(page_url, false); + + // Load the serialized contents. + ASSERT_TRUE(HasSerializedFrame(page_url)); + const std::string& serialized_contents = + GetSerializedContentForFrame(page_url); + LoadContents(serialized_contents, GURL(page_url), + web_frame->frame()->loader()->encoding()); + + // Make sure all links are absolute URLs and doc there are some number of + // BASE tags in serialized HTML data. Each of those BASE tags have same base + // URL which is as same as URL of current test file. + web_frame = + static_cast<WebFrameImpl*>(test_shell_->webView()->GetMainFrame()); + ASSERT_TRUE(web_frame != NULL); + doc = web_frame->frame()->document(); + ASSERT_TRUE(doc->isHTMLDocument()); + // Go through all descent nodes. + all = doc->all(); + int new_base_tag_count = 0; + for (WebCore::Node* node = all->firstItem(); node != NULL; + node = all->nextItem()) { + if (!node->isHTMLElement()) + continue; + WebCore::Element* element = static_cast<WebCore::Element*>(node); + if (element->hasTagName(WebCore::HTMLNames::baseTag)) { + new_base_tag_count++; + } else { + // Get link. + const WebCore::AtomicString* value = + webkit_glue::GetSubResourceLinkFromElement(element); + if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { + value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); + if (value->domString().isEmpty()) + value = NULL; + } + // Each link is absolute link. + if (value) { + GURL link(webkit_glue::StringToStdWString(value->domString()).c_str()); + ASSERT_FALSE(link.scheme().empty()); + } + } + } + // We have one more added BASE tag which is generated by JavaScript. + ASSERT_TRUE(new_base_tag_count == original_base_tag_count + 1); + // Make sure in new document, the base URL is equal with the |path_dir_url|. + GURL new_base_url( + webkit_glue::DeprecatedStringToStdWString(doc->baseURL()).c_str()); + ASSERT_TRUE(new_base_url == path_dir_url); +} |