diff options
author | yaar@chromium.org <yaar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-23 11:55:07 +0000 |
---|---|---|
committer | yaar@chromium.org <yaar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-23 11:55:07 +0000 |
commit | d9ec5c0f2549db5a717834da1c30a9bf98dd86b7 (patch) | |
tree | 38d0def5964caf2d98cbfa168d045d8d0c02ec1d /webkit/glue | |
parent | b1e69a58480cb3644dcb1fbdae3e8b8fcf487491 (diff) | |
download | chromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.zip chromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.tar.gz chromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.tar.bz2 |
Chromium to use upstream WebPageSerializer instead of glue/DomSerializer.
See corresponding changes in webkit here: https://bugs.webkit.org/show_bug.cgi?id=31737
Review URL: http://codereview.chromium.org/434087
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35216 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit/glue')
-rw-r--r-- | webkit/glue/dom_operations.cc | 248 | ||||
-rw-r--r-- | webkit/glue/dom_operations.h | 8 | ||||
-rw-r--r-- | webkit/glue/dom_operations_private.h | 58 | ||||
-rw-r--r-- | webkit/glue/dom_serializer.cc | 627 | ||||
-rw-r--r-- | webkit/glue/dom_serializer.h | 180 | ||||
-rw-r--r-- | webkit/glue/dom_serializer_delegate.h | 52 | ||||
-rw-r--r-- | webkit/glue/dom_serializer_unittest.cc | 557 | ||||
-rw-r--r-- | webkit/glue/entity_map.cc | 113 | ||||
-rw-r--r-- | webkit/glue/entity_map.h | 29 |
9 files changed, 364 insertions, 1508 deletions
diff --git a/webkit/glue/dom_operations.cc b/webkit/glue/dom_operations.cc index 06c6519..8d0d0f7 100644 --- a/webkit/glue/dom_operations.cc +++ b/webkit/glue/dom_operations.cc @@ -19,7 +19,6 @@ MSVC_PUSH_WARNING_LEVEL(0); #include "HTMLAllCollection.h" #include "HTMLElement.h" #include "HTMLFormElement.h" -#include "HTMLFrameOwnerElement.h" #include "HTMLHeadElement.h" #include "HTMLInputElement.h" #include "HTMLLinkElement.h" @@ -31,23 +30,33 @@ MSVC_POP_WARNING(); #undef LOG #include "base/string_util.h" +#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h" +#include "third_party/WebKit/WebKit/chromium/public/WebElement.h" +#include "third_party/WebKit/WebKit/chromium/public/WebFormElement.h" +#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h" +#include "third_party/WebKit/WebKit/chromium/public/WebInputElement.h" +#include "third_party/WebKit/WebKit/chromium/public/WebNode.h" +#include "third_party/WebKit/WebKit/chromium/public/WebNodeCollection.h" #include "third_party/WebKit/WebKit/chromium/public/WebVector.h" #include "third_party/WebKit/WebKit/chromium/public/WebView.h" // TODO(yaar) Eventually should not depend on api/src. #include "third_party/WebKit/WebKit/chromium/src/DOMUtilitiesPrivate.h" #include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h" #include "webkit/glue/dom_operations.h" -#include "webkit/glue/dom_operations_private.h" #include "webkit/glue/form_data.h" #include "webkit/glue/glue_util.h" #include "webkit/glue/webpasswordautocompletelistener_impl.h" using WebCore::String; using WebKit::FrameLoaderClientImpl; +using WebKit::WebDocument; +using WebKit::WebElement; using WebKit::WebFormElement; using WebKit::WebFrame; using WebKit::WebFrameImpl; +using WebKit::WebInputElement; using WebKit::WebNode; +using WebKit::WebNodeCollection; using WebKit::WebVector; using WebKit::WebView; @@ -64,7 +73,7 @@ struct SavableResourcesUniqueCheck { std::set<GURL>* frames_set; // Collection of all frames we go through when getting all savable resource // links. - std::vector<WebFrameImpl*>* frames; + std::vector<WebFrame*>* frames; SavableResourcesUniqueCheck() : resources_set(NULL), @@ -72,7 +81,7 @@ struct SavableResourcesUniqueCheck { frames(NULL) {} SavableResourcesUniqueCheck(std::set<GURL>* resources_set, - std::set<GURL>* frames_set, std::vector<WebFrameImpl*>* frames) + std::set<GURL>* frames_set, std::vector<WebFrame*>* frames) : resources_set(resources_set), frames_set(frames_set), frames(frames) {} @@ -81,25 +90,28 @@ struct SavableResourcesUniqueCheck { // Get all savable resource links from current element. One element might // have more than one resource link. It is possible to have some links // in one CSS stylesheet. -void GetSavableResourceLinkForElement(WebCore::Element* element, - WebCore::Document* current_doc, SavableResourcesUniqueCheck* unique_check, +void GetSavableResourceLinkForElement( + const WebElement& element, + const WebDocument& current_doc, + SavableResourcesUniqueCheck* unique_check, webkit_glue::SavableResourcesResult* result) { + // Handle frame and iframe tag. - bool is_frame_element; - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromElement(element, &is_frame_element); - if (is_frame_element) { - if (web_frame) - unique_check->frames->push_back(web_frame); + if (element.hasTagName("iframe") || + element.hasTagName("frame")) { + WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); + if (sub_frame) + unique_check->frames->push_back(sub_frame); return; } + // Check whether the node has sub resource URL or not. - const WebCore::AtomicString* value = + WebString value = webkit_glue::GetSubResourceLinkFromElement(element); - if (!value) + if (value.isNull()) return; // Get absolute URL. - GURL u(webkit_glue::KURLToGURL(current_doc->completeURL((*value).string()))); + GURL u = current_doc.completeURL(value); // ignore invalid URL if (!u.is_valid()) return; @@ -113,35 +125,25 @@ void GetSavableResourceLinkForElement(WebCore::Element* element, return; result->resources_list->push_back(u); // Insert referrer for above new resource link. - if (current_doc->frame()) { - GURL u(webkit_glue::KURLToGURL( - WebCore::KURL(WebCore::ParsedURLString, - current_doc->frame()->loader()->outgoingReferrer()))); - result->referrers_list->push_back(u); - } else { - // Insert blank referrer. - result->referrers_list->push_back(GURL()); - } + result->referrers_list->push_back(GURL()); } // Get all savable resource links from current WebFrameImpl object pointer. -void GetAllSavableResourceLinksForFrame(WebFrameImpl* current_frame, +void GetAllSavableResourceLinksForFrame(WebFrame* current_frame, SavableResourcesUniqueCheck* unique_check, webkit_glue::SavableResourcesResult* result, const char** savable_schemes) { // Get current frame's URL. - const WebCore::KURL& current_frame_kurl = - current_frame->frame()->loader()->url(); - GURL current_frame_gurl(webkit_glue::KURLToGURL(current_frame_kurl)); + GURL current_frame_url = current_frame->url(); // If url of current frame is invalid, ignore it. - if (!current_frame_gurl.is_valid()) + if (!current_frame_url.is_valid()) return; // If url of current frame is not a savable protocol, ignore it. bool is_valid_protocol = false; for (int i = 0; savable_schemes[i] != NULL; ++i) { - if (current_frame_gurl.SchemeIs(savable_schemes[i])) { + if (current_frame_url.SchemeIs(savable_schemes[i])) { is_valid_protocol = true; break; } @@ -150,20 +152,20 @@ void GetAllSavableResourceLinksForFrame(WebFrameImpl* current_frame, return; // If find same frame we have recorded, ignore it. - if (!unique_check->frames_set->insert(current_frame_gurl).second) + if (!unique_check->frames_set->insert(current_frame_url).second) return; // Get current using document. - WebCore::Document* current_doc = current_frame->frame()->document(); + WebDocument current_doc = current_frame->document(); // Go through all descent nodes. - PassRefPtr<WebCore::HTMLCollection> all = current_doc->all(); + WebNodeCollection all = current_doc.all(); // Go through all node in this frame. - for (WebCore::Node* node = all->firstItem(); node != NULL; - node = all->nextItem()) { + for (WebNode node = all.firstItem(); !node.isNull(); + node = all.nextItem()) { // We only save HTML resources. - if (!node->isHTMLElement()) + if (!node.isElementNode()) continue; - WebCore::Element* element = static_cast<WebCore::Element*>(node); + WebElement element = node.toElement<WebElement>(); GetSavableResourceLinkForElement(element, current_doc, unique_check, @@ -360,159 +362,45 @@ void FillPasswordForm(WebView* view, } } -WebFrameImpl* GetWebFrameImplFromElement(WebCore::Element* element, - bool* is_frame_element) { - *is_frame_element = false; - if (element->hasTagName(WebCore::HTMLNames::iframeTag) || - element->hasTagName(WebCore::HTMLNames::frameTag)) { - *is_frame_element = true; - if (element->isFrameOwnerElement()) { - // Check whether this frame has content. - WebCore::HTMLFrameOwnerElement* frame_element = - static_cast<WebCore::HTMLFrameOwnerElement*>(element); - WebCore::Frame* content_frame = frame_element->contentFrame(); - return WebFrameImpl::fromFrame(content_frame); +WebString GetSubResourceLinkFromElement(const WebElement& element) { + const char* attribute_name = NULL; + if (element.hasTagName("img") || + element.hasTagName("script")) { + attribute_name = "src"; + } else if (element.hasTagName("input")) { + const WebInputElement input = element.toConstElement<WebInputElement>(); + if (input.inputType() == WebInputElement::Image) { + attribute_name = "src"; } - } - return NULL; -} - -const WebCore::AtomicString* GetSubResourceLinkFromElement( - const WebCore::Element* element) { - const WebCore::QualifiedName* attribute_name = NULL; - if (element->hasTagName(WebCore::HTMLNames::imgTag) || - element->hasTagName(WebCore::HTMLNames::scriptTag) || - element->hasTagName(WebCore::HTMLNames::linkTag)) { - // Get value. - if (element->hasTagName(WebCore::HTMLNames::linkTag)) { + } else if (element.hasTagName("body") || + element.hasTagName("table") || + element.hasTagName("tr") || + element.hasTagName("td")) { + attribute_name = "background"; + } else if (element.hasTagName("blockquote") || + element.hasTagName("q") || + element.hasTagName("del") || + element.hasTagName("ins")) { + attribute_name = "cite"; + } else if (element.hasTagName("link")) { // If the link element is not linked to css, ignore it. - const WebCore::HTMLLinkElement* link = - static_cast<const WebCore::HTMLLinkElement*>(element); - if (!link->sheet()) - return NULL; + if (LowerCaseEqualsASCII(element.getAttribute("type"), "text/css")) { // TODO(jnd). Add support for extracting links of sub-resources which // are inside style-sheet such as @import, url(), etc. // See bug: http://b/issue?id=1111667. - attribute_name = &WebCore::HTMLNames::hrefAttr; - } else { - attribute_name = &WebCore::HTMLNames::srcAttr; - } - } else if (element->hasTagName(WebCore::HTMLNames::inputTag)) { - const WebCore::HTMLInputElement* input = - static_cast<const WebCore::HTMLInputElement*>(element); - if (input->inputType() == WebCore::HTMLInputElement::IMAGE) { - attribute_name = &WebCore::HTMLNames::srcAttr; + attribute_name = "href"; } - } else if (element->hasTagName(WebCore::HTMLNames::bodyTag) || - element->hasTagName(WebCore::HTMLNames::tableTag) || - element->hasTagName(WebCore::HTMLNames::trTag) || - element->hasTagName(WebCore::HTMLNames::tdTag)) { - attribute_name = &WebCore::HTMLNames::backgroundAttr; - } else if (element->hasTagName(WebCore::HTMLNames::blockquoteTag) || - element->hasTagName(WebCore::HTMLNames::qTag) || - element->hasTagName(WebCore::HTMLNames::delTag) || - element->hasTagName(WebCore::HTMLNames::insTag)) { - attribute_name = &WebCore::HTMLNames::citeAttr; } if (!attribute_name) - return NULL; - const WebCore::AtomicString* value = - &element->getAttribute(*attribute_name); + return WebString(); + WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); // If value has content and not start with "javascript:" then return it, // otherwise return NULL. - if (value && !value->isEmpty() && - !value->startsWith("javascript:", false)) + if (!value.isNull() && !value.isEmpty() && + !StartsWithASCII(value.utf8(),"javascript:", false)) return value; - return NULL; -} - -bool ElementHasLegalLinkAttribute(const WebCore::Element* element, - const WebCore::QualifiedName& attr_name) { - if (attr_name == WebCore::HTMLNames::srcAttr) { - // Check src attribute. - if (element->hasTagName(WebCore::HTMLNames::imgTag) || - element->hasTagName(WebCore::HTMLNames::scriptTag) || - element->hasTagName(WebCore::HTMLNames::iframeTag) || - element->hasTagName(WebCore::HTMLNames::frameTag)) - return true; - if (element->hasTagName(WebCore::HTMLNames::inputTag)) { - const WebCore::HTMLInputElement* input = - static_cast<const WebCore::HTMLInputElement*>(element); - if (input->inputType() == WebCore::HTMLInputElement::IMAGE) - return true; - } - } else if (attr_name == WebCore::HTMLNames::hrefAttr) { - // Check href attribute. - if (element->hasTagName(WebCore::HTMLNames::linkTag) || - element->hasTagName(WebCore::HTMLNames::aTag) || - element->hasTagName(WebCore::HTMLNames::areaTag)) - return true; - } else if (attr_name == WebCore::HTMLNames::actionAttr) { - if (element->hasTagName(WebCore::HTMLNames::formTag)) - return true; - } else if (attr_name == WebCore::HTMLNames::backgroundAttr) { - if (element->hasTagName(WebCore::HTMLNames::bodyTag) || - element->hasTagName(WebCore::HTMLNames::tableTag) || - element->hasTagName(WebCore::HTMLNames::trTag) || - element->hasTagName(WebCore::HTMLNames::tdTag)) - return true; - } else if (attr_name == WebCore::HTMLNames::citeAttr) { - if (element->hasTagName(WebCore::HTMLNames::blockquoteTag) || - element->hasTagName(WebCore::HTMLNames::qTag) || - element->hasTagName(WebCore::HTMLNames::delTag) || - element->hasTagName(WebCore::HTMLNames::insTag)) - return true; - } else if (attr_name == WebCore::HTMLNames::classidAttr || - attr_name == WebCore::HTMLNames::dataAttr) { - if (element->hasTagName(WebCore::HTMLNames::objectTag)) - return true; - } else if (attr_name == WebCore::HTMLNames::codebaseAttr) { - if (element->hasTagName(WebCore::HTMLNames::objectTag) || - element->hasTagName(WebCore::HTMLNames::appletTag)) - return true; - } - return false; -} - -WebFrameImpl* GetWebFrameImplFromWebViewForSpecificURL(WebView* view, - const GURL& page_url) { - WebFrame* main_frame = view->mainFrame(); - if (!main_frame) - return NULL; - WebFrameImpl* main_frame_impl = static_cast<WebFrameImpl*>(main_frame); - - std::vector<WebFrameImpl*> frames; - // First, process main frame. - frames.push_back(main_frame_impl); - // Collect all frames inside the specified frame. - for (int i = 0; i < static_cast<int>(frames.size()); ++i) { - WebFrameImpl* current_frame = frames[i]; - // Get current using document. - WebCore::Document* current_doc = current_frame->frame()->document(); - // Check whether current frame is target or not. - const WebCore::KURL& current_frame_kurl = - current_frame->frame()->loader()->url(); - GURL current_frame_gurl(KURLToGURL(current_frame_kurl)); - if (page_url == current_frame_gurl) - return current_frame; - // Go through sub-frames. - RefPtr<WebCore::HTMLCollection> all = current_doc->all(); - for (WebCore::Node* node = all->firstItem(); node != NULL; - node = all->nextItem()) { - if (!node->isHTMLElement()) - continue; - WebCore::Element* element = static_cast<WebCore::Element*>(node); - // Check frame tag and iframe tag. - bool is_frame_element; - WebFrameImpl* web_frame = GetWebFrameImplFromElement( - element, &is_frame_element); - if (is_frame_element && web_frame) - frames.push_back(web_frame); - } - } - - return NULL; + return WebString(); } // Get all savable resource links from current webview, include main @@ -527,12 +415,12 @@ bool GetAllSavableResourceLinksForCurrentPage(WebView* view, std::set<GURL> resources_set; std::set<GURL> frames_set; - std::vector<WebFrameImpl*> frames; + std::vector<WebFrame*> frames; SavableResourcesUniqueCheck unique_check(&resources_set, &frames_set, &frames); - GURL main_page_gurl(KURLToGURL(main_frame_impl->frame()->loader()->url())); + GURL main_page_gurl(main_frame_impl->url()); // Make sure we are saving same page between embedder and webkit. // If page has being navigated, embedder will get three empty vector, diff --git a/webkit/glue/dom_operations.h b/webkit/glue/dom_operations.h index e0d7142..551666b 100644 --- a/webkit/glue/dom_operations.h +++ b/webkit/glue/dom_operations.h @@ -125,6 +125,14 @@ bool ElementDoesAutoCompleteForElementWithId(WebKit::WebView* view, // Returns the number of animations currently running. int NumberOfActiveAnimations(WebKit::WebView* view); +// Returns the value in an elements resource url attribute. For IMG, SCRIPT or +// INPUT TYPE=image, returns the value in "src". For LINK TYPE=text/css, returns +// the value in "href". For BODY, TABLE, TR, TD, returns the value in +// "background". For BLOCKQUOTE, Q, DEL, INS, returns the value in "cite" +// attribute. Otherwise returns a null WebString. +WebKit::WebString GetSubResourceLinkFromElement( + const WebKit::WebElement& element); + } // namespace webkit_glue #endif // WEBKIT_GLUE_DOM_OPERATIONS_H__ diff --git a/webkit/glue/dom_operations_private.h b/webkit/glue/dom_operations_private.h deleted file mode 100644 index 403ca16..0000000 --- a/webkit/glue/dom_operations_private.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_ -#define WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_ - -namespace WebCore { -class AtomicString; -class Document; -class Element; -class Node; -class QualifiedName; -class String; -} - -namespace WebKit { -class WebFrameImpl; -class WebView; -} - -class GURL; - -namespace webkit_glue { - -// If element is HTML:IFrame or HTML:Frame, then return the WebFrameImpl -// object corresponding to the content frame, otherwise return NULL. -// The parameter is_frame_element indicates whether the input element -// is frame/iframe element or not. -WebKit::WebFrameImpl* GetWebFrameImplFromElement(WebCore::Element* element, - bool* is_frame_element); - -// If element is img, script or input type=image, then return its link refer -// to the "src" attribute. If element is link, then return its link refer to -// the "href" attribute. If element is body, table, tr, td, then return its -// link refer to the "background" attribute. If element is blockquote, q, del, -// ins, then return its link refer to the "cite" attribute. Otherwise return -// NULL. -const WebCore::AtomicString* GetSubResourceLinkFromElement( - const WebCore::Element* element); - -// For img, script, iframe, frame element, when attribute name is src, -// for link, a, area element, when attribute name is href, -// for form element, when attribute name is action, -// for input, type=image, when attribute name is src, -// for body, table, tr, td, when attribute name is background, -// for blockquote, q, del, ins, when attribute name is cite, -// we can consider the attribute value has legal link. -bool ElementHasLegalLinkAttribute(const WebCore::Element* element, - const WebCore::QualifiedName& attr_name); - -// Get pointer of WebFrameImpl from webview according to specific URL. -WebKit::WebFrameImpl* GetWebFrameImplFromWebViewForSpecificURL( - WebKit::WebView* view, const GURL& page_url); - -} // namespace webkit_glue - -#endif // WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_ diff --git a/webkit/glue/dom_serializer.cc b/webkit/glue/dom_serializer.cc deleted file mode 100644 index d8cf3a4..0000000 --- a/webkit/glue/dom_serializer.cc +++ /dev/null @@ -1,627 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// How we handle the base tag better. -// Current status: -// At now the normal way we use to handling base tag is -// a) For those links which have corresponding local saved files, such as -// savable CSS, JavaScript files, they will be written to relative URLs which -// point to local saved file. Why those links can not be resolved as absolute -// file URLs, because if they are resolved as absolute URLs, after moving the -// file location from one directory to another directory, the file URLs will -// be dead links. -// b) For those links which have not corresponding local saved files, such as -// links in A, AREA tags, they will be resolved as absolute URLs. -// c) We comment all base tags when serialzing DOM for the page. -// FireFox also uses above way to handle base tag. -// -// Problem: -// This way can not handle the following situation: -// the base tag is written by JavaScript. -// For example. The page "www.yahoo.com" use -// "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL -// of page when loading page. So when saving page as completed-HTML, we assume -// that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved -// completed-HTML page, then the JavaScript will insert a base tag -// <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to -// local saved resource files will be resolved as -// "http://www.yahoo.com/yahoo_files/...", which will cause all saved resource -// files can not be loaded correctly. Also the page will be rendered ugly since -// all saved sub-resource files (such as CSS, JavaScript files) and sub-frame -// files can not be fetched. -// Now FireFox, IE and WebKit based Browser all have this problem. -// -// Solution: -// My solution is that we comment old base tag and write new base tag: -// <base href="." ...> after the previous commented base tag. In WebKit, it -// always uses the latest "href" attribute of base tag to set document's base -// URL. Based on this behavior, when we encounter a base tag, we comment it and -// write a new base tag <base href="."> after the previous commented base tag. -// The new added base tag can help engine to locate correct base URL for -// correctly loading local saved resource files. Also I think we need to inherit -// the base target value from document object when appending new base tag. -// If there are multiple base tags in original document, we will comment all old -// base tags and append new base tag after each old base tag because we do not -// know those old base tags are original content or added by JavaScript. If -// they are added by JavaScript, it means when loading saved page, the script(s) -// will still insert base tag(s) to DOM, so the new added base tag(s) can -// override the incorrect base URL and make sure we alway load correct local -// saved resource files. - -// We must include format_macros up here, before any WebKit headers -// include inttypes.h. -#include "base/format_macros.h" - -#include "config.h" - -#include "base/compiler_specific.h" - -MSVC_PUSH_WARNING_LEVEL(0); -#include "DocumentType.h" -#include "FrameLoader.h" -#include "Document.h" -#include "Element.h" -#include "HTMLAllCollection.h" -#include "HTMLElement.h" -#include "HTMLFormElement.h" -#include "HTMLMetaElement.h" -#include "HTMLNames.h" -#include "KURL.h" -#include "markup.h" -#include "PlatformString.h" -#include "TextEncoding.h" -MSVC_POP_WARNING(); -#undef LOG - -#include "webkit/glue/dom_serializer.h" - -#include "base/string_util.h" -#include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h" -#include "webkit/glue/dom_operations.h" -#include "webkit/glue/dom_operations_private.h" -#include "webkit/glue/dom_serializer_delegate.h" -#include "webkit/glue/entity_map.h" -#include "webkit/glue/glue_util.h" - -using WebKit::WebFrame; -using WebKit::WebFrameImpl; - -namespace { - -// Default "mark of the web" declaration -static const char* const kDefaultMarkOfTheWeb = - "\n<!-- saved from url=(%04" PRIuS ")%s -->\n"; - -// Default meat content for writing correct charset declaration. -static const wchar_t* const kDefaultMetaContent = - L"<META http-equiv=\"Content-Type\" content=\"text/html; charset=%ls\">"; - -// Notation of start comment. -static const wchar_t* const kStartCommentNotation = L"<!-- "; - -// Notation of end comment. -static const wchar_t* const kEndCommentNotation = L" -->"; - -// Default XML declaration. -static const wchar_t* const kXMLDeclaration = - L"<?xml version=\"%ls\" encoding=\"%ls\"%ls?>\n"; - -// Default base tag declaration -static const wchar_t* const kBaseTagDeclaration = - L"<BASE href=\".\"%ls>"; - -static const wchar_t* const kBaseTargetDeclaration = - L" target=\"%ls\""; - -// Maximum length of data buffer which is used to temporary save generated -// html content data. -static const int kHtmlContentBufferLength = 65536; - -// Check whether specified unicode has corresponding html/xml entity name. -// If yes, replace the character with the returned entity notation, if not -// then still use original character. -void ConvertCorrespondingSymbolToEntity(WebCore::String* result, - const WebCore::String& value, - bool in_html_doc) { - unsigned len = value.length(); - const UChar* start_pos = value.characters(); - const UChar* cur_pos = start_pos; - while (len--) { - const char* entity_name = - webkit_glue::EntityMap::GetEntityNameByCode(*cur_pos, in_html_doc); - if (entity_name) { - // Append content before entity code. - if (cur_pos > start_pos) - result->append(start_pos, cur_pos - start_pos); - result->append("&"); - result->append(entity_name); - result->append(";"); - start_pos = ++cur_pos; - } else { - cur_pos++; - } - } - // Append the remaining content. - if (cur_pos > start_pos) - result->append(start_pos, cur_pos - start_pos); -} - -} // namespace - -namespace webkit_glue { - -// SerializeDomParam Constructor. -DomSerializer::SerializeDomParam::SerializeDomParam( - const GURL& current_frame_gurl, - const WebCore::TextEncoding& text_encoding, - WebCore::Document* doc, - const FilePath& directory_name) - : current_frame_gurl(current_frame_gurl), - text_encoding(text_encoding), - doc(doc), - directory_name(directory_name), - has_doctype(false), - has_checked_meta(false), - skip_meta_element(NULL), - is_in_script_or_style_tag(false), - has_doc_declaration(false) { - // Cache the value since we check it lots of times. - is_html_document = doc->isHTMLDocument(); -} - -// Static -std::wstring DomSerializer::GenerateMetaCharsetDeclaration( - const std::wstring& charset) { - return StringPrintf(kDefaultMetaContent, charset.c_str()); -} - -// Static. -std::string DomSerializer::GenerateMarkOfTheWebDeclaration( - const GURL& url) { - return StringPrintf(kDefaultMarkOfTheWeb, - url.spec().size(), url.spec().c_str()); -} - -// Static. -std::wstring DomSerializer::GenerateBaseTagDeclaration( - const std::wstring& base_target) { - std::wstring target_declaration = base_target.empty() ? L"" : - StringPrintf(kBaseTargetDeclaration, base_target.c_str()); - return StringPrintf(kBaseTagDeclaration, target_declaration.c_str()); -} - -WebCore::String DomSerializer::PreActionBeforeSerializeOpenTag( - const WebCore::Element* element, SerializeDomParam* param, - bool* need_skip) { - WebCore::String result; - - *need_skip = false; - if (param->is_html_document) { - // Skip the open tag of original META tag which declare charset since we - // have overrided the META which have correct charset declaration after - // serializing open tag of HEAD element. - if (element->hasTagName(WebCore::HTMLNames::metaTag)) { - const WebCore::HTMLMetaElement* meta = - static_cast<const WebCore::HTMLMetaElement*>(element); - // Check whether the META tag has declared charset or not. - WebCore::String equiv = meta->httpEquiv(); - if (equalIgnoringCase(equiv, "content-type")) { - WebCore::String content = meta->content(); - if (content.length() && content.contains("charset", false)) { - // Find META tag declared charset, we need to skip it when - // serializing DOM. - param->skip_meta_element = element; - *need_skip = true; - } - } - } else if (element->hasTagName(WebCore::HTMLNames::htmlTag)) { - // Check something before processing the open tag of HEAD element. - // First we add doc type declaration if original doc has it. - if (!param->has_doctype) { - param->has_doctype = true; - result += createMarkup(param->doc->doctype()); - } - - // Add MOTW declaration before html tag. - // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. - result += StdStringToString(GenerateMarkOfTheWebDeclaration( - param->current_frame_gurl)); - } else if (element->hasTagName(WebCore::HTMLNames::baseTag)) { - // Comment the BASE tag when serializing dom. - result += StdWStringToString(kStartCommentNotation); - } - } else { - // Write XML declaration. - if (!param->has_doc_declaration) { - param->has_doc_declaration = true; - // Get encoding info. - WebCore::String xml_encoding = param->doc->xmlEncoding(); - if (xml_encoding.isEmpty()) - xml_encoding = param->doc->frame()->loader()->encoding(); - if (xml_encoding.isEmpty()) - xml_encoding = WebCore::UTF8Encoding().name(); - std::wstring str_xml_declaration = - StringPrintf(kXMLDeclaration, - StringToStdWString(param->doc->xmlVersion()).c_str(), - StringToStdWString(xml_encoding).c_str(), - param->doc->xmlStandalone() ? L" standalone=\"yes\"" : - L""); - result += StdWStringToString(str_xml_declaration); - } - // Add doc type declaration if original doc has it. - if (!param->has_doctype) { - param->has_doctype = true; - result += createMarkup(param->doc->doctype()); - } - } - - return result; -} - -WebCore::String DomSerializer::PostActionAfterSerializeOpenTag( - const WebCore::Element* element, SerializeDomParam* param) { - WebCore::String result; - - param->has_added_contents_before_end = false; - if (!param->is_html_document) - return result; - // Check after processing the open tag of HEAD element - if (!param->has_checked_meta && - element->hasTagName(WebCore::HTMLNames::headTag)) { - param->has_checked_meta = true; - // Check meta element. WebKit only pre-parse the first 512 bytes - // of the document. If the whole <HEAD> is larger and meta is the - // end of head part, then this kind of pages aren't decoded correctly - // because of this issue. So when we serialize the DOM, we need to - // make sure the meta will in first child of head tag. - // See http://bugs.webkit.org/show_bug.cgi?id=16621. - // First we generate new content for writing correct META element. - std::wstring str_meta = - GenerateMetaCharsetDeclaration( - ASCIIToWide(param->text_encoding.name())); - result += StdWStringToString(str_meta); - - param->has_added_contents_before_end = true; - // Will search each META which has charset declaration, and skip them all - // in PreActionBeforeSerializeOpenTag. - } else if (element->hasTagName(WebCore::HTMLNames::scriptTag) || - element->hasTagName(WebCore::HTMLNames::styleTag)) { - param->is_in_script_or_style_tag = true; - } - - return result; -} - -WebCore::String DomSerializer::PreActionBeforeSerializeEndTag( - const WebCore::Element* element, SerializeDomParam* param, - bool* need_skip) { - WebCore::String result; - - *need_skip = false; - if (!param->is_html_document) - return result; - // Skip the end tag of original META tag which declare charset. - // Need not to check whether it's META tag since we guarantee - // skip_meta_element is definitely META tag if it's not NULL. - if (param->skip_meta_element == element) { - *need_skip = true; - } else if (element->hasTagName(WebCore::HTMLNames::scriptTag) || - element->hasTagName(WebCore::HTMLNames::styleTag)) { - DCHECK(param->is_in_script_or_style_tag); - param->is_in_script_or_style_tag = false; - } - - return result; -} - -// After we finish serializing end tag of a element, we give the target -// element a chance to do some post work to add some additional data. -WebCore::String DomSerializer::PostActionAfterSerializeEndTag( - const WebCore::Element* element, SerializeDomParam* param) { - WebCore::String result; - - if (!param->is_html_document) - return result; - // Comment the BASE tag when serializing DOM. - if (element->hasTagName(WebCore::HTMLNames::baseTag)) { - result += StdWStringToString(kEndCommentNotation); - // Append a new base tag declaration. - result += StdWStringToString(GenerateBaseTagDeclaration( - webkit_glue::StringToStdWString(param->doc->baseTarget()))); - } - - return result; -} - -void DomSerializer::SaveHtmlContentToBuffer(const WebCore::String& result, - SerializeDomParam* param) { - if (!result.length()) - return; - // Convert the unicode content to target encoding - WebCore::CString encoding_result = param->text_encoding.encode( - result.characters(), result.length(), WebCore::EntitiesForUnencodables); - - // if the data buffer will be full, then send it out first. - if (encoding_result.length() + data_buffer_.size() > - data_buffer_.capacity()) { - // Send data to delegate, tell it now we are serializing current frame. - delegate_->DidSerializeDataForFrame(param->current_frame_gurl, - data_buffer_, DomSerializerDelegate::CURRENT_FRAME_IS_NOT_FINISHED); - data_buffer_.clear(); - } - - // Append result to data buffer. - data_buffer_.append(CStringToStdString(encoding_result)); -} - -void DomSerializer::OpenTagToString(const WebCore::Element* element, - SerializeDomParam* param) { - bool need_skip; - // Do pre action for open tag. - WebCore::String result = PreActionBeforeSerializeOpenTag(element, - param, - &need_skip); - if (need_skip) - return; - // Add open tag - result += "<" + element->nodeName(); - // Go through all attributes and serialize them. - const WebCore::NamedNodeMap *attrMap = element->attributes(true); - if (attrMap) { - unsigned numAttrs = attrMap->length(); - for (unsigned i = 0; i < numAttrs; i++) { - result += " "; - // Add attribute pair - const WebCore::Attribute *attribute = attrMap->attributeItem(i); - result += attribute->name().toString(); - result += "=\""; - if (!attribute->value().isEmpty()) { - // Check whether we need to replace some resource links - // with local resource paths. - const WebCore::QualifiedName& attr_name = attribute->name(); - // Check whether need to change the attribute which has link - bool need_replace_link = - ElementHasLegalLinkAttribute(element, attr_name); - if (need_replace_link) { - // First, get the absolute link - const WebCore::String& attr_value = attribute->value(); - // For links start with "javascript:", we do not change it. - if (attr_value.startsWith("javascript:", false)) { - result += attr_value; - } else { - WebCore::String str_value = param->doc->completeURL(attr_value); - std::string value(StringToStdString(str_value)); - // Check whether we local files for those link. - LinkLocalPathMap::const_iterator it = local_links_.find(value); - if (it != local_links_.end()) { - // Replace the link when we have local files. - FilePath::StringType path(FilePath::kCurrentDirectory); - if (!param->directory_name.empty()) - path += FILE_PATH_LITERAL("/") + param->directory_name.value(); - path += FILE_PATH_LITERAL("/") + it->second.value(); - result += FilePathStringToString(path); - } else { - // If not found local path, replace it with absolute link. - result += str_value; - } - } - } else { - ConvertCorrespondingSymbolToEntity(&result, attribute->value(), - param->is_html_document); - } - } - result += "\""; - } - } - - // Do post action for open tag. - WebCore::String added_contents = - PostActionAfterSerializeOpenTag(element, param); - // Complete the open tag for element when it has child/children. - if (element->hasChildNodes() || param->has_added_contents_before_end) - result += ">"; - // Append the added contents generate in post action of open tag. - result += added_contents; - // Save the result to data buffer. - SaveHtmlContentToBuffer(result, param); -} - -// Serialize end tag of an specified element. -void DomSerializer::EndTagToString(const WebCore::Element* element, - SerializeDomParam* param) { - bool need_skip; - // Do pre action for end tag. - WebCore::String result = PreActionBeforeSerializeEndTag(element, - param, - &need_skip); - if (need_skip) - return; - // Write end tag when element has child/children. - if (element->hasChildNodes() || param->has_added_contents_before_end) { - result += "</"; - result += element->nodeName(); - result += ">"; - } else { - // Check whether we have to write end tag for empty element. - if (param->is_html_document) { - result += ">"; - const WebCore::HTMLElement* html_element = - static_cast<const WebCore::HTMLElement*>(element); - if (html_element->endTagRequirement() == WebCore::TagStatusRequired) { - // We need to write end tag when it is required. - result += "</"; - result += element->nodeName(); - result += ">"; - } - } else { - // For xml base document. - result += " />"; - } - } - // Do post action for end tag. - result += PostActionAfterSerializeEndTag(element, param); - // Save the result to data buffer. - SaveHtmlContentToBuffer(result, param); -} - -void DomSerializer::BuildContentForNode(const WebCore::Node* node, - SerializeDomParam* param) { - switch (node->nodeType()) { - case WebCore::Node::ELEMENT_NODE: { - // Process open tag of element. - OpenTagToString(static_cast<const WebCore::Element*>(node), param); - // Walk through the children nodes and process it. - for (const WebCore::Node *child = node->firstChild(); child != NULL; - child = child->nextSibling()) - BuildContentForNode(child, param); - // Process end tag of element. - EndTagToString(static_cast<const WebCore::Element*>(node), param); - break; - } - case WebCore::Node::TEXT_NODE: { - SaveHtmlContentToBuffer(createMarkup(node), param); - break; - } - case WebCore::Node::ATTRIBUTE_NODE: - case WebCore::Node::DOCUMENT_NODE: - case WebCore::Node::DOCUMENT_FRAGMENT_NODE: { - // Should not exist. - DCHECK(false); - break; - } - // Document type node can be in DOM? - case WebCore::Node::DOCUMENT_TYPE_NODE: - param->has_doctype = true; - default: { - // For other type node, call default action. - SaveHtmlContentToBuffer(createMarkup(node), param); - break; - } - } -} - -DomSerializer::DomSerializer(WebFrame* webframe, - bool recursive_serialization, - DomSerializerDelegate* delegate, - const std::vector<GURL>& links, - const std::vector<FilePath>& local_paths, - const FilePath& local_directory_name) - : delegate_(delegate), - recursive_serialization_(recursive_serialization), - frames_collected_(false), - local_directory_name_(local_directory_name) { - // Must specify available webframe. - DCHECK(webframe); - specified_webframeimpl_ = static_cast<WebFrameImpl*>(webframe); - // Make sure we have not-NULL delegate. - DCHECK(delegate); - // Build local resources map. - DCHECK(links.size() == local_paths.size()); - std::vector<GURL>::const_iterator link_it = links.begin(); - std::vector<FilePath>::const_iterator path_it = local_paths.begin(); - for (; link_it != links.end(); ++link_it, ++path_it) { - bool never_present = local_links_.insert( - LinkLocalPathMap::value_type(link_it->spec(), *path_it)). - second; - DCHECK(never_present); - } - - // Init data buffer. - data_buffer_.reserve(kHtmlContentBufferLength); - DCHECK(data_buffer_.empty()); -} - -void DomSerializer::CollectTargetFrames() { - DCHECK(!frames_collected_); - frames_collected_ = true; - - // First, process main frame. - frames_.push_back(specified_webframeimpl_); - // Return now if user only needs to serialize specified frame, not including - // all sub-frames. - if (!recursive_serialization_) - return; - // Collect all frames inside the specified frame. - for (int i = 0; i < static_cast<int>(frames_.size()); ++i) { - WebFrameImpl* current_frame = frames_[i]; - // Get current using document. - WebCore::Document* current_doc = current_frame->frame()->document(); - // Go through sub-frames. - RefPtr<WebCore::HTMLAllCollection> all = current_doc->all(); - for (WebCore::Node* node = all->firstItem(); node != NULL; - node = all->nextItem()) { - if (!node->isHTMLElement()) - continue; - WebCore::Element* element = static_cast<WebCore::Element*>(node); - // Check frame tag and iframe tag. - bool is_frame_element; - WebFrameImpl* web_frame = GetWebFrameImplFromElement( - element, &is_frame_element); - if (is_frame_element && web_frame) - frames_.push_back(web_frame); - } - } -} - -bool DomSerializer::SerializeDom() { - // Collect target frames. - if (!frames_collected_) - CollectTargetFrames(); - bool did_serialization = false; - // Get GURL for main frame. - GURL main_page_gurl(KURLToGURL( - specified_webframeimpl_->frame()->loader()->url())); - - // Go through all frames for serializing DOM for whole page, include - // sub-frames. - for (int i = 0; i < static_cast<int>(frames_.size()); ++i) { - // Get current serializing frame. - WebFrameImpl* current_frame = frames_[i]; - // Get current using document. - WebCore::Document* current_doc = current_frame->frame()->document(); - // Get current frame's URL. - const WebCore::KURL& current_frame_kurl = - current_frame->frame()->loader()->url(); - GURL current_frame_gurl(KURLToGURL(current_frame_kurl)); - - // Check whether we have done this document. - if (local_links_.find(current_frame_gurl.spec()) != local_links_.end()) { - // A new document, we will serialize it. - did_serialization = true; - // Get target encoding for current document. - WebCore::String encoding = current_frame->frame()->loader()->encoding(); - // Create the text encoding object with target encoding. - WebCore::TextEncoding text_encoding(encoding); - // Construct serialize parameter for late processing document. - SerializeDomParam param( - current_frame_gurl, - encoding.length() ? text_encoding : WebCore::UTF8Encoding(), - current_doc, - current_frame_gurl == main_page_gurl ? - local_directory_name_ : - FilePath()); - - // Process current document. - WebCore::Element* root_element = current_doc->documentElement(); - if (root_element) - BuildContentForNode(root_element, ¶m); - - // Sink the remainder data and finish serializing current frame. - delegate_->DidSerializeDataForFrame(current_frame_gurl, data_buffer_, - DomSerializerDelegate::CURRENT_FRAME_IS_FINISHED); - // Clear the buffer. - data_buffer_.clear(); - } - } - - // We have done call frames, so we send message to embedder to tell it that - // frames are finished serializing. - DCHECK(data_buffer_.empty()); - delegate_->DidSerializeDataForFrame(GURL(), data_buffer_, - DomSerializerDelegate::ALL_FRAMES_ARE_FINISHED); - - return did_serialization; -} - -} // namespace webkit_glue diff --git a/webkit/glue/dom_serializer.h b/webkit/glue/dom_serializer.h deleted file mode 100644 index 3c70431..0000000 --- a/webkit/glue/dom_serializer.h +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef WEBKIT_GLUE_DOM_SERIALIZER_H__ -#define WEBKIT_GLUE_DOM_SERIALIZER_H__ - -#include <string> - -#include "base/file_path.h" -#include "base/hash_tables.h" -#include "googleurl/src/gurl.h" - -namespace WebCore { -class Document; -class Element; -class Node; -class String; -class TextEncoding; -} - -namespace WebKit { -class WebFrame; -class WebFrameImpl; -} - -namespace webkit_glue { - -class DomSerializerDelegate; - -// Get html data by serializing all frames of current page with lists -// which contain all resource links that have local copy. -// contain all saved auxiliary files included all sub frames and resources. -// This function will find out all frames and serialize them to HTML data. -// We have a data buffer to temporary saving generated html data. We will -// sequentially call WebViewDelegate::SendSerializedHtmlData once the data -// buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData -// for getting more information. -class DomSerializer { - public: - // Do serialization action. Return false means no available frame has been - // serialized, otherwise return true. - bool SerializeDom(); - // The parameter specifies which frame need to be serialized. - // The parameter recursive_serialization specifies whether we need to - // serialize all sub frames of the specified frame or not. - // The parameter delegate specifies the pointer of interface - // DomSerializerDelegate provide sink interface which can receive the - // individual chunks of data to be saved. - // The parameter links contain original URLs of all saved links. - // The parameter local_paths contain corresponding local file paths of all - // saved links, which matched with vector:links one by one. - // The parameter local_directory_name is relative path of directory which - // contain all saved auxiliary files included all sub frames and resources. - DomSerializer(WebKit::WebFrame* webframe, - bool recursive_serialization, - DomSerializerDelegate* delegate, - const std::vector<GURL>& links, - const std::vector<FilePath>& local_paths, - const FilePath& local_directory_name); - - // Generate the META for charset declaration. - static std::wstring GenerateMetaCharsetDeclaration( - const std::wstring& charset); - // Generate the MOTW declaration. - static std::string GenerateMarkOfTheWebDeclaration(const GURL& url); - // Generate the default base tag declaration. - static std::wstring GenerateBaseTagDeclaration( - const std::wstring& base_target); - - private: - // Specified frame which need to be serialized; - WebKit::WebFrameImpl* specified_webframeimpl_; - // This hash_map is used to map resource URL of original link to its local - // file path. - typedef base::hash_map<std::string, FilePath> LinkLocalPathMap; - // local_links_ include all pair of local resource path and corresponding - // original link. - LinkLocalPathMap local_links_; - // Pointer of DomSerializerDelegate - DomSerializerDelegate* delegate_; - // Data buffer for saving result of serialized DOM data. - std::string data_buffer_; - // Passing true to recursive_serialization_ indicates we will serialize not - // only the specified frame but also all sub-frames in the specific frame. - // Otherwise we only serialize the specified frame excluded all sub-frames. - bool recursive_serialization_; - // Flag indicates whether we have collected all frames which need to be - // serialized or not; - bool frames_collected_; - // Local directory name of all local resource files. - const FilePath& local_directory_name_; - // Vector for saving all frames which need to be serialized. - std::vector<WebKit::WebFrameImpl*> frames_; - - struct SerializeDomParam { - // Frame URL of current processing document presented by GURL - const GURL& current_frame_gurl; - // Current using text encoding object. - const WebCore::TextEncoding& text_encoding; - - // Document object of current frame. - WebCore::Document* doc; - // Local directory name of all local resource files. - const FilePath& directory_name; - - // Flag indicates current doc is html document or not. It's a cache value - // of Document.isHTMLDocument(). - bool is_html_document; - // Flag which indicate whether we have met document type declaration. - bool has_doctype; - // Flag which indicate whether will process meta issue. - bool has_checked_meta; - // This meta element need to be skipped when serializing DOM. - const WebCore::Element* skip_meta_element; - // Flag indicates we are in script or style tag. - bool is_in_script_or_style_tag; - // Flag indicates whether we have written xml document declaration. - // It is only used in xml document - bool has_doc_declaration; - // Flag indicates whether we have added additional contents before end tag. - // This flag will be re-assigned in each call of function - // PostActionAfterSerializeOpenTag and it could be changed in function - // PreActionBeforeSerializeEndTag if the function adds new contents into - // serialization stream. - bool has_added_contents_before_end; - - // Constructor. - SerializeDomParam( - const GURL& current_frame_gurl, - const WebCore::TextEncoding& text_encoding, - WebCore::Document* doc, - const FilePath& directory_name); - - private: - DISALLOW_EVIL_CONSTRUCTORS(SerializeDomParam); - }; - - // Collect all target frames which need to be serialized. - void CollectTargetFrames(); - // Before we begin serializing open tag of a element, we give the target - // element a chance to do some work prior to add some additional data. - WebCore::String PreActionBeforeSerializeOpenTag( - const WebCore::Element* element, - SerializeDomParam* param, - bool* need_skip); - // After we finish serializing open tag of a element, we give the target - // element a chance to do some post work to add some additional data. - WebCore::String PostActionAfterSerializeOpenTag( - const WebCore::Element* element, - SerializeDomParam* param); - // Before we begin serializing end tag of a element, we give the target - // element a chance to do some work prior to add some additional data. - WebCore::String PreActionBeforeSerializeEndTag( - const WebCore::Element* element, - SerializeDomParam* param, bool* need_skip); - // After we finish serializing end tag of a element, we give the target - // element a chance to do some post work to add some additional data. - WebCore::String PostActionAfterSerializeEndTag( - const WebCore::Element* element, - SerializeDomParam* param); - // Save generated html content to data buffer. - void SaveHtmlContentToBuffer(const WebCore::String& result, - SerializeDomParam* param); - // Serialize open tag of an specified element. - void OpenTagToString(const WebCore::Element* element, - SerializeDomParam* param); - // Serialize end tag of an specified element. - void EndTagToString(const WebCore::Element* element, - SerializeDomParam* param); - // Build content for a specified node - void BuildContentForNode(const WebCore::Node* node, - SerializeDomParam* param); - - DISALLOW_EVIL_CONSTRUCTORS(DomSerializer); -}; - -} // namespace webkit_glue - -#endif // WEBKIT_GLUE_DOM_SERIALIZER_H__ diff --git a/webkit/glue/dom_serializer_delegate.h b/webkit/glue/dom_serializer_delegate.h deleted file mode 100644 index 7a738c4..0000000 --- a/webkit/glue/dom_serializer_delegate.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__ -#define WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__ - -#include <string> - -class GURL; - -namespace webkit_glue { - -// This class is used for providing sink interface that can be used to receive -// the individual chunks of data to be saved. -class DomSerializerDelegate { - public: - // This enum indicates This sink interface can receive the individual chunks - // of serialized data to be saved, so we use values of following enum - // definition to indicate the serialization status of serializing all html - // content. If current frame is not complete serialized, call - // DidSerializeDataForFrame with URL of current frame, data, data length and - // flag CURRENT_FRAME_IS_NOT_FINISHED. - // If current frame is complete serialized, call DidSerializeDataForFrame - // with URL of current frame, data, data length and flag - // CURRENT_FRAME_IS_FINISHED. - // If all frames of page are complete serialized, call - // DidSerializeDataForFrame with empty URL, empty data, 0 and flag - // ALL_FRAMES_ARE_FINISHED. - enum PageSavingSerializationStatus { - // Current frame is not finished saving. - CURRENT_FRAME_IS_NOT_FINISHED = 0, - // Current frame is finished saving. - CURRENT_FRAME_IS_FINISHED, - // All frame are finished saving. - ALL_FRAMES_ARE_FINISHED, - }; - - // Receive the individual chunks of serialized data to be saved. - // The parameter frame_url specifies what frame the data belongs. The - // parameter data contains the available data for saving. The parameter - // status indicates the status of data serialization. - virtual void DidSerializeDataForFrame(const GURL& frame_url, - const std::string& data, PageSavingSerializationStatus status) = 0; - - DomSerializerDelegate() { } - virtual ~DomSerializerDelegate() { } -}; - -} // namespace webkit_glue - -#endif // WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__ diff --git a/webkit/glue/dom_serializer_unittest.cc b/webkit/glue/dom_serializer_unittest.cc index f6c4bb75..da31347 100644 --- a/webkit/glue/dom_serializer_unittest.cc +++ b/webkit/glue/dom_serializer_unittest.cc @@ -2,60 +2,94 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "config.h" - #include "base/compiler_specific.h" - -MSVC_PUSH_WARNING_LEVEL(0); -#include "Document.h" -#include "DocumentType.h" -#include "Element.h" -#include "FrameLoader.h" -#include "FrameView.h" -#include "HTMLAllCollection.h" -#include "HTMLHeadElement.h" -#include "HTMLMetaElement.h" -#include "HTMLNames.h" -#include "KURL.h" -#include "markup.h" -#include "SharedBuffer.h" -#include "SubstituteData.h" -MSVC_POP_WARNING(); -#undef LOG - #include "base/file_path.h" #include "base/file_util.h" #include "base/hash_tables.h" #include "base/string_util.h" #include "net/base/net_util.h" #include "net/url_request/url_request_context.h" +#include "third_party/WebKit/WebKit/chromium/public/WebCString.h" #include "third_party/WebKit/WebKit/chromium/public/WebData.h" +#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h" +#include "third_party/WebKit/WebKit/chromium/public/WebElement.h" +#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h" +#include "third_party/WebKit/WebKit/chromium/public/WebNode.h" +#include "third_party/WebKit/WebKit/chromium/public/WebNodeCollection.h" +#include "third_party/WebKit/WebKit/chromium/public/WebNodeList.h" +#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializer.h" +#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializerClient.h" +#include "third_party/WebKit/WebKit/chromium/public/WebString.h" #include "third_party/WebKit/WebKit/chromium/public/WebURL.h" +#include "third_party/WebKit/WebKit/chromium/public/WebVector.h" #include "third_party/WebKit/WebKit/chromium/public/WebView.h" #include "webkit/glue/dom_operations.h" -#include "webkit/glue/dom_operations_private.h" -#include "webkit/glue/dom_serializer.h" -#include "webkit/glue/dom_serializer_delegate.h" -#include "webkit/glue/glue_util.h" -#include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h" +#include "webkit/glue/webkit_glue.h" #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" #include "webkit/tools/test_shell/test_shell_test.h" -using WebKit::WebFrameImpl; +using WebKit::WebCString; +using WebKit::WebData; +using WebKit::WebDocument; +using WebKit::WebElement; +using WebKit::WebFrame; +using WebKit::WebNode; +using WebKit::WebNodeCollection; +using WebKit::WebNodeList; +using WebKit::WebPageSerializer; +using WebKit::WebPageSerializerClient; +using WebKit::WebNode; +using WebKit::WebString; +using WebKit::WebURL; +using WebKit::WebView; +using WebKit::WebVector; namespace { +// Iterate recursively over sub-frames to find one with with a given url. +WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { + if (!web_view->mainFrame()) + return NULL; + + std::vector<WebFrame*> stack; + stack.push_back(web_view->mainFrame()); + + while (!stack.empty()) { + WebFrame* current_frame = stack.back(); + stack.pop_back(); + if (GURL(current_frame->url()) == url) + return current_frame; + WebNodeCollection all = current_frame->document().all(); + for (WebNode node = all.firstItem(); + !node.isNull(); node = all.nextItem()) { + if (!node.isElementNode()) + continue; + // Check frame tag and iframe tag + WebElement element = node.toElement<WebElement>(); + if (!element.hasTagName("frame") && !element.hasTagName("iframe")) + continue; + WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); + if (sub_frame) + stack.push_back(sub_frame); + } + } + return NULL; +} + class DomSerializerTests : public TestShellTest, - public webkit_glue::DomSerializerDelegate { + public WebPageSerializerClient { public: DomSerializerTests() : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } // DomSerializerDelegate. - void DidSerializeDataForFrame(const GURL& frame_url, - const std::string& data, PageSavingSerializationStatus status) { + void didSerializeDataForFrame(const WebURL& frame_web_url, + const WebCString& data, + PageSerializationStatus status) { + + GURL frame_url(frame_web_url); // If the all frames are finished saving, check all finish status - if (status == ALL_FRAMES_ARE_FINISHED) { + if (status == WebPageSerializerClient::AllFramesAreFinished) { SerializationFinishStatusMap::iterator it = serialization_finish_status_.begin(); for (; it != serialization_finish_status_.end(); ++it) @@ -77,10 +111,10 @@ class DomSerializerTests : public TestShellTest, ASSERT_FALSE(it->second); // Add data to corresponding frame's content. - serialized_frame_map_[frame_url.spec()] += data; + serialized_frame_map_[frame_url.spec()] += data.data(); // Current frame is completed saving, change the finish status. - if (status == CURRENT_FRAME_IS_FINISHED) + if (status == WebPageSerializerClient::CurrentFrameIsFinished) it->second = true; } @@ -106,27 +140,23 @@ class DomSerializerTests : public TestShellTest, // the document. void LoadContents(const std::string& contents, const GURL& base_url, - const WebCore::String encoding_info) { + const WebString encoding_info) { test_shell_->ResetTestController(); // If input encoding is empty, use UTF-8 as default encoding. if (encoding_info.isEmpty()) { test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url); } else { + WebData data(contents.data(), contents.length()); + // Do not use WebFrame.LoadHTMLString because it assumes that input // html contents use UTF-8 encoding. // TODO(darin): This should use WebFrame::loadData. - WebFrameImpl* web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + WebFrame* web_frame = + test_shell_->webView()->mainFrame(); + ASSERT_TRUE(web_frame != NULL); - int len = static_cast<int>(contents.size()); - RefPtr<WebCore::SharedBuffer> buf( - WebCore::SharedBuffer::create(contents.data(), len)); - - WebCore::SubstituteData subst_data( - buf, WebCore::String("text/html"), encoding_info, WebCore::KURL()); - WebCore::ResourceRequest request(webkit_glue::GURLToKURL(base_url), - WebCore::CString()); - web_frame->frame()->loader()->load(request, subst_data, false); + + web_frame->loadData(data, "text/html", encoding_info, base_url); } test_shell_->WaitTestFinished(); @@ -137,20 +167,24 @@ class DomSerializerTests : public TestShellTest, // sub-frames. void SerializeDomForURL(const GURL& page_url, bool recursive_serialization) { - // Find corresponding WebFrameImpl according to page_url. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), page_url); + // Find corresponding WebFrame according to page_url. + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), + page_url); ASSERT_TRUE(web_frame != NULL); // Add input file URl to links_. - links_.push_back(page_url); + links_.assign(&page_url,1); // Add dummy file path to local_path_. - local_paths_.push_back(FilePath(FILE_PATH_LITERAL("c:\\dummy.htm"))); + WebString file_path = webkit_glue::FilePathStringToWebString( + FILE_PATH_LITERAL("c:\\dummy.htm")); + local_paths_.assign(&file_path, 1); // Start serializing DOM. - webkit_glue::DomSerializer dom_serializer(web_frame, - recursive_serialization, this, links_, local_paths_, - local_directory_name_); - ASSERT_TRUE(dom_serializer.SerializeDom()); + bool result = WebPageSerializer::serialize(web_frame, + recursive_serialization, + static_cast<WebPageSerializerClient*>(this), + links_, + local_paths_, + webkit_glue::FilePathToWebString(local_directory_name_)); + ASSERT_TRUE(result); ASSERT_TRUE(serialized_); } @@ -164,10 +198,10 @@ class DomSerializerTests : public TestShellTest, // Flag indicates whether the process of serializing DOM is finished or not. bool serialized_; // The links_ contain dummy original URLs of all saved links. - std::vector<GURL> links_; + WebVector<WebURL> links_; // The local_paths_ contain dummy corresponding local file paths of all saved // links, which matched links_ one by one. - std::vector<FilePath> local_paths_; + WebVector<WebString> local_paths_; // The local_directory_name_ is dummy relative path of directory which // contain all saved auxiliary files included all sub frames and resources. const FilePath local_directory_name_; @@ -184,55 +218,61 @@ class DomSerializerTests : public TestShellTest, } }; +// Helper function that test whether the first node in the doc is a doc type +// node. +bool HasDocType(const WebDocument& doc) { + WebNode node = doc.firstChild(); + if (node.isNull()) + return false; + return node.nodeType() == WebNode::DocumentTypeNode; +} + // Helper function for checking whether input node is META tag. Return true // means it is META element, otherwise return false. The parameter charset_info // return actual charset info if the META tag has charset declaration. -bool IsMetaElement(const WebCore::Node* node, WebCore::String* charset_info) { - if (!node->isHTMLElement()) +bool IsMetaElement(const WebNode& node, std::string& charset_info) { + if (!node.isElementNode()) return false; - if (!(static_cast<const WebCore::HTMLElement*>(node))->hasTagName( - WebCore::HTMLNames::metaTag)) + const WebElement meta = node.toConstElement<WebElement>(); + if (!meta.hasTagName("meta")) return false; - charset_info->remove(0, charset_info->length()); - const WebCore::HTMLMetaElement* meta = - static_cast<const WebCore::HTMLMetaElement*>(node); + charset_info.erase(0, charset_info.length()); // Check the META charset declaration. - WebCore::String equiv = meta->httpEquiv(); - if (equalIgnoringCase(equiv, "content-type")) { - WebCore::String content = meta->content(); - int pos = content.find("charset", 0, false); + WebString httpEquiv = meta.getAttribute("http-equiv"); + if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { + std::string content = meta.getAttribute("content").utf8(); + int pos = content.find("charset", 0); if (pos > -1) { // Add a dummy charset declaration to charset_info, which indicates this // META tag has charset declaration although we do not get correct value // yet. - charset_info->append("has-charset-declaration"); + charset_info.append("has-charset-declaration"); int remaining_length = content.length() - pos - 7; if (!remaining_length) return true; - const UChar* start_pos = content.characters() + pos + 7; + int start_pos = pos + 7; // Find "=" symbol. while (remaining_length--) - if (*start_pos++ == L'=') + if (content[start_pos++] == L'=') break; // Skip beginning space. while (remaining_length) { - if (*start_pos > 0x0020) + if (content[start_pos] > 0x0020) break; ++start_pos; --remaining_length; } if (!remaining_length) return true; - const UChar* end_pos = start_pos; + int end_pos = start_pos; // Now we find out the start point of charset info. Search the end point. while (remaining_length--) { - if (*end_pos <= 0x0020 || *end_pos == L';') + if (content[end_pos] <= 0x0020 || content[end_pos] == L';') break; ++end_pos; } // Get actual charset info. - *charset_info = WebCore::String(start_pos, - static_cast<unsigned>(end_pos - start_pos)); + charset_info = content.substr(start_pos, end_pos - start_pos); return true; } } @@ -250,12 +290,10 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) { // Load the test file. LoadPageFromURL(file_url); // Make sure original contents have document type. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->doctype() != NULL); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(HasDocType(doc)); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. @@ -263,12 +301,11 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) { const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); + web_frame->encoding()); // Make sure serialized contents still have document type. - web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->doctype() != NULL); + web_frame = test_shell_->webView()->mainFrame(); + doc = web_frame->document(); + ASSERT_TRUE(HasDocType(doc)); } // If original contents do not have document type, the serialized contents @@ -282,12 +319,10 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) { // Load the test file. LoadPageFromURL(file_url); // Make sure original contents do not have document type. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->doctype() == NULL); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(!HasDocType(doc)); // Do serialization. SerializeDomForURL(file_url, false); // Load the serialized contents. @@ -295,12 +330,11 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) { const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); + web_frame->encoding()); // Make sure serialized contents do not have document type. - web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->doctype() == NULL); + web_frame = test_shell_->webView()->mainFrame(); + doc = web_frame->document(); + ASSERT_TRUE(!HasDocType(doc)); } // Serialize XML document which has all 5 built-in entities. After @@ -340,7 +374,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithAddingMOTW) { ASSERT_TRUE(file_url.SchemeIsFile()); // Make sure original contents does not have MOTW; std::string motw_declaration = - webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); + WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); ASSERT_FALSE(motw_declaration.empty()); // The encoding of original contents is ISO-8859-1, so we convert the MOTW // declaration to ASCII and search whether original contents has it or not. @@ -373,21 +407,19 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) { LoadPageFromURL(file_url); // Make sure there is no META charset declaration in original document. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLHeadElement* head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement head_element = doc.head(); + ASSERT_TRUE(!head_element.isNull()); // Go through all children of HEAD element. - WebCore::String charset_info; - for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; - child = child->nextSibling()) - if (IsMetaElement(child, &charset_info)) - ASSERT_TRUE(charset_info.isEmpty()); - + for (WebNode child = head_element.firstChild(); !child.isNull(); + child = child.nextSibling()) { + std::string charset_info; + if (IsMetaElement(child, charset_info)) + ASSERT_TRUE(charset_info.empty()); + } // Do serialization. SerializeDomForURL(file_url, false); @@ -396,28 +428,30 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) { const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); + web_frame->encoding()); // Make sure the first child of HEAD element is META which has charset // declaration in serialized contents. - web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + web_frame = test_shell_->webView()->mainFrame(); ASSERT_TRUE(web_frame != NULL); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); - WebCore::Node* meta_node = head_ele->firstChild(); - ASSERT_TRUE(meta_node != NULL); + doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + head_element = doc.head(); + ASSERT_TRUE(!head_element.isNull()); + WebNode meta_node = head_element.firstChild(); + ASSERT_TRUE(!meta_node.isNull()); // Get meta charset info. - ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); - ASSERT_TRUE(!charset_info.isEmpty()); - ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); + std::string charset_info2; + ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); + ASSERT_TRUE(!charset_info2.empty()); + ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8())); // Make sure no more additional META tags which have charset declaration. - for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; - child = child->nextSibling()) - if (IsMetaElement(child, &charset_info)) - ASSERT_TRUE(charset_info.isEmpty()); + for (WebNode child = meta_node.nextSibling(); !child.isNull(); + child = child.nextSibling()) { + std::string charset_info; + if (IsMetaElement(child, charset_info)) + ASSERT_TRUE(charset_info.empty()); + } } // When serializing DOM, if the original document has multiple META charset @@ -437,24 +471,22 @@ TEST_F(DomSerializerTests, // Make sure there are multiple META charset declarations in original // document. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLHeadElement* head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement head_ele = doc.head(); + ASSERT_TRUE(!head_ele.isNull()); // Go through all children of HEAD element. int charset_declaration_count = 0; - WebCore::String charset_info; - for (const WebCore::Node *child = head_ele->firstChild(); child != NULL; - child = child->nextSibling()) { - if (IsMetaElement(child, &charset_info) && !charset_info.isEmpty()) + for (WebNode child = head_ele.firstChild(); !child.isNull(); + child = child.nextSibling()) { + std::string charset_info; + if (IsMetaElement(child, charset_info) && !charset_info.empty()) charset_declaration_count++; } // The original doc has more than META tags which have charset declaration. - ASSERT(charset_declaration_count > 1); + ASSERT_TRUE(charset_declaration_count > 1); // Do serialization. SerializeDomForURL(file_url, false); @@ -464,28 +496,30 @@ TEST_F(DomSerializerTests, const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); + web_frame->encoding()); // Make sure only first child of HEAD element is META which has charset // declaration in serialized contents. - web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + web_frame = test_shell_->webView()->mainFrame(); ASSERT_TRUE(web_frame != NULL); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); - WebCore::Node* meta_node = head_ele->firstChild(); - ASSERT_TRUE(meta_node != NULL); + doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + head_ele = doc.head(); + ASSERT_TRUE(!head_ele.isNull()); + WebNode meta_node = head_ele.firstChild(); + ASSERT_TRUE(!meta_node.isNull()); // Get meta charset info. - ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); - ASSERT_TRUE(!charset_info.isEmpty()); - ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); + std::string charset_info2; + ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); + ASSERT_TRUE(!charset_info2.empty()); + ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8())); // Make sure no more additional META tags which have charset declaration. - for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL; - child = child->nextSibling()) - if (IsMetaElement(child, &charset_info)) - ASSERT_TRUE(charset_info.isEmpty()); + for (WebNode child = meta_node.nextSibling(); !child.isNull(); + child = child.nextSibling()) { + std::string charset_info; + if (IsMetaElement(child, charset_info)) + ASSERT_TRUE(charset_info.empty()); + } } // Test situation of html entities in text when serializing HTML DOM. @@ -501,20 +535,19 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) { static const char* const original_contents = "<HTML><BODY>&<>\"\'</BODY></HTML>"; // Load the test contents. - LoadContents(original_contents, file_url, ""); + LoadContents(original_contents, file_url, WebString()); // Get BODY's text content in DOM. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLElement* body_ele = doc->body(); - ASSERT_TRUE(body_ele != NULL); - WebCore::Node* text_node = body_ele->firstChild(); - ASSERT_TRUE(text_node->isTextNode()); - ASSERT_TRUE(createMarkup(text_node) == "&<>\"\'"); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement body_ele = doc.body(); + ASSERT_TRUE(!body_ele.isNull()); + WebNode text_node = body_ele.firstChild(); + ASSERT_TRUE(text_node.isTextNode()); + ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == + "&<>\"\'"); // Do serialization. SerializeDomForURL(file_url, false); // Compare the serialized contents with original contents. @@ -526,22 +559,21 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) { // Because we add MOTW when serializing DOM, so before comparison, we also // need to add MOTW to original_contents. std::string original_str = - webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); + WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); original_str += original_contents; // Since WebCore now inserts a new HEAD element if there is no HEAD element // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) // We need to append the HEAD content and corresponding META content if we // find WebCore-generated HEAD element. - if (doc->head()) { - WebCore::String encoding = web_frame->frame()->loader()->encoding(); + if (!doc.head().isNull()) { + WebString encoding = web_frame->encoding(); std::string htmlTag("<HTML>"); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part("<HEAD>"); - head_part += WideToASCII( - webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration( - webkit_glue::StringToStdWString(encoding))); + head_part += + WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); head_part += "</HEAD>"; original_str.insert(pos, head_part); } @@ -562,19 +594,16 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) { static const char* const original_contents = "<HTML><BODY title=\"&<>"'\"></BODY></HTML>"; // Load the test contents. - LoadContents(original_contents, file_url, ""); + LoadContents(original_contents, file_url, WebString()); // Get value of BODY's title attribute in DOM. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLElement* body_ele = doc->body(); - ASSERT_TRUE(body_ele != NULL); - const WebCore::String& value = body_ele->getAttribute( - WebCore::HTMLNames::titleAttr); - ASSERT_TRUE(value == WebCore::String("&<>\"\'")); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement body_ele = doc.body(); + ASSERT_TRUE(!body_ele.isNull()); + WebString value = body_ele.getAttribute("title"); + ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); // Do serialization. SerializeDomForURL(file_url, false); // Compare the serialized contents with original contents. @@ -584,18 +613,17 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) { // Compare the serialized contents with original contents to make sure // they are same. std::string original_str = - webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url); + WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); original_str += original_contents; - if (doc->head()) { - WebCore::String encoding = web_frame->frame()->loader()->encoding(); + if (!doc.isNull()) { + WebString encoding = web_frame->encoding(); std::string htmlTag("<HTML>"); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part("<HEAD>"); - head_part += WideToASCII( - webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration( - webkit_glue::StringToStdWString(encoding))); + head_part += + WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); head_part += "</HEAD>"; original_str.insert(pos, head_part); } @@ -612,23 +640,20 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNonStandardEntities) { LoadPageFromURL(file_url); // Get value of BODY's title attribute in DOM. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLElement* body_ele = doc->body(); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement body_element = doc.body(); // Unescaped string for "%⊅&supl;'". - static const UChar parsed_value[] = { + static const wchar_t parsed_value[] = { '%', 0x2285, 0x00b9, '\'', 0 }; - const WebCore::String& value = body_ele->getAttribute( - WebCore::HTMLNames::titleAttr); - ASSERT_TRUE(value == WebCore::String(parsed_value)); + WebString value = body_element.getAttribute("title"); + ASSERT_TRUE(UTF16ToWide(value) == parsed_value); // Check the BODY content. - WebCore::Node* text_node = body_ele->firstChild(); - ASSERT_TRUE(text_node->isTextNode()); - ASSERT_TRUE(text_node->nodeValue() == WebCore::String(parsed_value)); + WebNode text_node = body_element.firstChild(); + ASSERT_TRUE(text_node.isTextNode()); + ASSERT_TRUE(UTF16ToWide(text_node.nodeValue()) == parsed_value); // Do serialization. SerializeDomForURL(file_url, false); @@ -667,34 +692,32 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { // Since for this test, we assume there is no savable sub-resource links for // this test file, also all links are relative URLs in this test file, so we // need to check those relative URLs and make sure document has BASE tag. - WebFrameImpl* web_frame = - webkit_glue::GetWebFrameImplFromWebViewForSpecificURL( - test_shell_->webView(), file_url); + WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); // Go through all descent nodes. - RefPtr<WebCore::HTMLAllCollection> all = doc->all(); + WebNodeCollection all = doc.all(); int original_base_tag_count = 0; - for (WebCore::Node* node = all->firstItem(); node != NULL; - node = all->nextItem()) { - if (!node->isHTMLElement()) + for (WebNode node = all.firstItem(); !node.isNull(); + node = all.nextItem()) { + if (!node.isElementNode()) continue; - WebCore::Element* element = static_cast<WebCore::Element*>(node); - if (element->hasTagName(WebCore::HTMLNames::baseTag)) { + WebElement element = node.toElement<WebElement>(); + if (element.hasTagName("base")) { original_base_tag_count++; } else { // Get link. - const WebCore::AtomicString* value = + WebString value = webkit_glue::GetSubResourceLinkFromElement(element); - if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { - value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); - if (value->isEmpty()) - value = NULL; + if (value.isNull() && element.hasTagName("a")) { + value = element.getAttribute("href"); + if (value.isEmpty()) + value = WebString(); } // Each link is relative link. - if (value) { - GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string()))); + if (!value.isNull()) { + GURL link(value.utf8()); ASSERT_TRUE(link.scheme().empty()); } } @@ -702,8 +725,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); // Make sure in original document, the base URL is not equal with the // |path_dir_url|. - GURL original_base_url( - WideToUTF8(webkit_glue::StringToStdWString(doc->baseURL()))); + GURL original_base_url(doc.baseURL()); ASSERT_NE(original_base_url, path_dir_url); // Do serialization. @@ -714,37 +736,37 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { const std::string& serialized_contents = GetSerializedContentForFrame(file_url); LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); + web_frame->encoding()); // Make sure all links are absolute URLs and doc there are some number of // BASE tags in serialized HTML data. Each of those BASE tags have same base // URL which is as same as URL of current test file. - web_frame = static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + web_frame = test_shell_->webView()->mainFrame(); ASSERT_TRUE(web_frame != NULL); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); + doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); // Go through all descent nodes. - all = doc->all(); + all = doc.all(); int new_base_tag_count = 0; - for (WebCore::Node* node = all->firstItem(); node != NULL; - node = all->nextItem()) { - if (!node->isHTMLElement()) + for (WebNode node = all.firstItem(); !node.isNull(); + node = all.nextItem()) { + if (!node.isElementNode()) continue; - WebCore::Element* element = static_cast<WebCore::Element*>(node); - if (element->hasTagName(WebCore::HTMLNames::baseTag)) { + WebElement element = node.toElement<WebElement>(); + if (element.hasTagName("base")) { new_base_tag_count++; } else { // Get link. - const WebCore::AtomicString* value = + WebString value = webkit_glue::GetSubResourceLinkFromElement(element); - if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) { - value = &element->getAttribute(WebCore::HTMLNames::hrefAttr); - if (value->isEmpty()) - value = NULL; + if (value.isNull() && element.hasTagName("a")) { + value = element.getAttribute("href"); + if (value.isEmpty()) + value = WebString(); } // Each link is absolute link. - if (value) { - GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string()))); + if (!value.isNull()) { + GURL link(std::string(value.utf8())); ASSERT_FALSE(link.scheme().empty()); } } @@ -752,8 +774,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) { // We have one more added BASE tag which is generated by JavaScript. ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); // Make sure in new document, the base URL is equal with the |path_dir_url|. - GURL new_base_url( - webkit_glue::StringToStdString(doc->baseURL())); + GURL new_base_url(doc.baseURL()); ASSERT_EQ(new_base_url, path_dir_url); } @@ -768,18 +789,17 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEmptyHead) { // Load the test html content. static const char* const empty_head_contents = "<HTML><HEAD></HEAD><BODY>hello world</BODY></HTML>"; - LoadContents(empty_head_contents, file_url, ""); + LoadContents(empty_head_contents, file_url, WebString()); // Make sure the head tag is empty. - WebFrameImpl* web_frame = - static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + WebFrame* web_frame = test_shell_->webView()->mainFrame(); ASSERT_TRUE(web_frame != NULL); - WebCore::Document* doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - WebCore::HTMLHeadElement* head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); - WTF::PassRefPtr<WebCore::HTMLCollection> children = head_ele->children(); - ASSERT_TRUE(0 == children->length()); + WebDocument doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + WebElement head_element = doc.head(); + ASSERT_TRUE(!head_element.isNull()); + ASSERT_TRUE(!head_element.hasChildNodes()); + ASSERT_TRUE(head_element.childNodes().length() == 0); // Do serialization. SerializeDomForURL(file_url, false); @@ -789,32 +809,31 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEmptyHead) { GetSerializedContentForFrame(file_url); // Reload serialized contents and make sure there is only one META tag. - LoadContents(serialized_contents, file_url, - web_frame->frame()->loader()->encoding()); - web_frame = static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame()); + LoadContents(serialized_contents, file_url, web_frame->encoding()); + web_frame = test_shell_->webView()->mainFrame(); ASSERT_TRUE(web_frame != NULL); - doc = web_frame->frame()->document(); - ASSERT_TRUE(doc->isHTMLDocument()); - head_ele = doc->head(); - ASSERT_TRUE(head_ele != NULL); - children = head_ele->children(); - ASSERT_TRUE(1 == children->length()); - WebCore::Node* meta_node = head_ele->firstChild(); - ASSERT_TRUE(meta_node != NULL); + doc = web_frame->document(); + ASSERT_TRUE(doc.isHTMLDocument()); + head_element = doc.head(); + ASSERT_TRUE(!head_element.isNull()); + ASSERT_TRUE(head_element.hasChildNodes()); + ASSERT_TRUE(head_element.childNodes().length() == 1); + WebNode meta_node = head_element.firstChild(); + ASSERT_TRUE(!meta_node.isNull()); // Get meta charset info. - WebCore::String charset_info; - ASSERT_TRUE(IsMetaElement(meta_node, &charset_info)); - ASSERT_TRUE(!charset_info.isEmpty()); - ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding()); + std::string charset_info; + ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); + ASSERT_TRUE(!charset_info.empty()); + ASSERT_TRUE(charset_info == std::string(web_frame->encoding().utf8())); // Check the body's first node is text node and its contents are // "hello world" - WebCore::HTMLElement* body_ele = doc->body(); - ASSERT_TRUE(body_ele != NULL); - WebCore::Node* text_node = body_ele->firstChild(); - ASSERT_TRUE(text_node->isTextNode()); - const WebCore::String& text_node_contents = text_node->nodeValue(); - ASSERT_TRUE(text_node_contents == WebCore::String("hello world")); + WebElement body_element = doc.body(); + ASSERT_TRUE(!body_element.isNull()); + WebNode text_node = body_element.firstChild(); + ASSERT_TRUE(text_node.isTextNode()); + WebString text_node_contents = text_node.nodeValue(); + ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); } } // namespace diff --git a/webkit/glue/entity_map.cc b/webkit/glue/entity_map.cc deleted file mode 100644 index 26f094f..0000000 --- a/webkit/glue/entity_map.cc +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "config.h" - -#undef LOG - -#include "webkit/glue/entity_map.h" - -#include "base/hash_tables.h" -#include "base/string_util.h" - -namespace webkit_glue { - -// Note that this file is also included by HTMLTokenizer.cpp so we are getting -// two copies of the data in memory. We can fix this by changing the script -// that generated the array to create a static const that is its length, but -// this is low priority since the data is less than 4K. -#include "HTMLEntityNames.c" - -typedef base::hash_map<char16, const char*> EntityMapType; - -class EntityMapData { - public: - EntityMapData(const Entity* entity_codes, int entity_codes_length, - bool standard_html_entities) - : entity_codes_(entity_codes), - entity_codes_length_(entity_codes_length), - standard_html_entities_(standard_html_entities), - map_(NULL) { - } - ~EntityMapData() { delete map_; } - const EntityMapType* GetEntityMapData(); - - private: - // Data structure which saves all pairs of Unicode character and its - // corresponding entity notation. - const Entity* entity_codes_; - const int entity_codes_length_; - // ', %, ⊅ and &supl; are not defined by the HTML standards. - // - IE does not support ' as an HTML entity (but support it as an XML - // entity.) - // - Firefox supports ' as an HTML entity. - // - Both of IE and Firefox don't support %, ⊅ and &supl;. - // - // A web page saved by Chromium should be able to be read by other browsers - // such as IE and Firefox. Chromium should produce only the standard entity - // references which other browsers can recognize. - // So if standard_html_entities_ is true, we will use a numeric character - // reference for ', and don't use entity references for %, ⊅ - // and &supl; for serialization. - const bool standard_html_entities_; - // Map the Unicode character to corresponding entity notation. - EntityMapType* map_; - - DISALLOW_EVIL_CONSTRUCTORS(EntityMapData); -}; - -const EntityMapType* EntityMapData::GetEntityMapData() { - if (!map_) { - // lazily create the entity map. - map_ = new EntityMapType; - const Entity* entity_code = &entity_codes_[0]; - for (int i = 0; i < entity_codes_length_; ++i, ++entity_code) { - // For consistency, use lower case for entity codes that have both. - EntityMapType::const_iterator it = map_->find(entity_code->code); - if (it != map_->end() && - StringToLowerASCII(std::string(entity_code->name)) == it->second) - continue; - if (!standard_html_entities_ || - // Don't register %, ⊅ and &supl;. - (entity_code->code != '%' && - entity_code->code != 0x2285 && entity_code->code != 0x00b9)) - (*map_)[entity_code->code] = entity_code->name; - } - if (standard_html_entities_) - (*map_)[0x0027] = "#39"; - } - return map_; -} - -static const Entity xml_built_in_entity_codes[] = { - {"lt", 0x003c}, - {"gt", 0x003e}, - {"amp", 0x0026}, - {"apos", 0x0027}, - {"quot", 0x0022} -}; - -const char* EntityMap::GetEntityNameByCode(char16 code, bool is_html) { - static EntityMapData html_entity_map_singleton( - wordlist, sizeof(wordlist) / sizeof(Entity), true); - static EntityMapData xml_entity_map_singleton( - xml_built_in_entity_codes, arraysize(xml_built_in_entity_codes), false); - - const EntityMapType* entity_map; - if (is_html) - entity_map = html_entity_map_singleton.GetEntityMapData(); - else - entity_map = xml_entity_map_singleton.GetEntityMapData(); - - // Check entity name according to unicode. - EntityMapType::const_iterator i = entity_map->find(code); - if (i == entity_map->end()) - // Not found, return NULL. - return NULL; - else - // Found, return entity notation. - return i->second; -} - -} // namespace webkit_glue diff --git a/webkit/glue/entity_map.h b/webkit/glue/entity_map.h deleted file mode 100644 index 1b7e28b..0000000 --- a/webkit/glue/entity_map.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef WEBKIT_GLUE_ENTITY_MAP_H__ -#define WEBKIT_GLUE_ENTITY_MAP_H__ - -#include <string> - -#include "base/basictypes.h" -#include "base/string16.h" - -namespace webkit_glue { - -class EntityMap { - public: - // Check whether specified unicode has corresponding html or xml built-in - // entity name. If yes, return the entity notation, if not then return NULL. - // Parameter is_html indicates check the code in html entity map or in xml - // entity map. THIS FUNCTION IS NOT THREADSAFE. - static const char* GetEntityNameByCode(char16 code, bool is_html); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(EntityMap); -}; - -} // namespace webkit_glue - -#endif // WEBKIT_GLUE_ENTITY_MAP_H__ |