summaryrefslogtreecommitdiffstats
path: root/webkit/glue
diff options
context:
space:
mode:
authoryaar@chromium.org <yaar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-23 11:55:07 +0000
committeryaar@chromium.org <yaar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-23 11:55:07 +0000
commitd9ec5c0f2549db5a717834da1c30a9bf98dd86b7 (patch)
tree38d0def5964caf2d98cbfa168d045d8d0c02ec1d /webkit/glue
parentb1e69a58480cb3644dcb1fbdae3e8b8fcf487491 (diff)
downloadchromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.zip
chromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.tar.gz
chromium_src-d9ec5c0f2549db5a717834da1c30a9bf98dd86b7.tar.bz2
Chromium to use upstream WebPageSerializer instead of glue/DomSerializer.
See corresponding changes in webkit here: https://bugs.webkit.org/show_bug.cgi?id=31737 Review URL: http://codereview.chromium.org/434087 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35216 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit/glue')
-rw-r--r--webkit/glue/dom_operations.cc248
-rw-r--r--webkit/glue/dom_operations.h8
-rw-r--r--webkit/glue/dom_operations_private.h58
-rw-r--r--webkit/glue/dom_serializer.cc627
-rw-r--r--webkit/glue/dom_serializer.h180
-rw-r--r--webkit/glue/dom_serializer_delegate.h52
-rw-r--r--webkit/glue/dom_serializer_unittest.cc557
-rw-r--r--webkit/glue/entity_map.cc113
-rw-r--r--webkit/glue/entity_map.h29
9 files changed, 364 insertions, 1508 deletions
diff --git a/webkit/glue/dom_operations.cc b/webkit/glue/dom_operations.cc
index 06c6519..8d0d0f7 100644
--- a/webkit/glue/dom_operations.cc
+++ b/webkit/glue/dom_operations.cc
@@ -19,7 +19,6 @@ MSVC_PUSH_WARNING_LEVEL(0);
#include "HTMLAllCollection.h"
#include "HTMLElement.h"
#include "HTMLFormElement.h"
-#include "HTMLFrameOwnerElement.h"
#include "HTMLHeadElement.h"
#include "HTMLInputElement.h"
#include "HTMLLinkElement.h"
@@ -31,23 +30,33 @@ MSVC_POP_WARNING();
#undef LOG
#include "base/string_util.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebElement.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFormElement.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebInputElement.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNode.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNodeCollection.h"
#include "third_party/WebKit/WebKit/chromium/public/WebVector.h"
#include "third_party/WebKit/WebKit/chromium/public/WebView.h"
// TODO(yaar) Eventually should not depend on api/src.
#include "third_party/WebKit/WebKit/chromium/src/DOMUtilitiesPrivate.h"
#include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h"
#include "webkit/glue/dom_operations.h"
-#include "webkit/glue/dom_operations_private.h"
#include "webkit/glue/form_data.h"
#include "webkit/glue/glue_util.h"
#include "webkit/glue/webpasswordautocompletelistener_impl.h"
using WebCore::String;
using WebKit::FrameLoaderClientImpl;
+using WebKit::WebDocument;
+using WebKit::WebElement;
using WebKit::WebFormElement;
using WebKit::WebFrame;
using WebKit::WebFrameImpl;
+using WebKit::WebInputElement;
using WebKit::WebNode;
+using WebKit::WebNodeCollection;
using WebKit::WebVector;
using WebKit::WebView;
@@ -64,7 +73,7 @@ struct SavableResourcesUniqueCheck {
std::set<GURL>* frames_set;
// Collection of all frames we go through when getting all savable resource
// links.
- std::vector<WebFrameImpl*>* frames;
+ std::vector<WebFrame*>* frames;
SavableResourcesUniqueCheck()
: resources_set(NULL),
@@ -72,7 +81,7 @@ struct SavableResourcesUniqueCheck {
frames(NULL) {}
SavableResourcesUniqueCheck(std::set<GURL>* resources_set,
- std::set<GURL>* frames_set, std::vector<WebFrameImpl*>* frames)
+ std::set<GURL>* frames_set, std::vector<WebFrame*>* frames)
: resources_set(resources_set),
frames_set(frames_set),
frames(frames) {}
@@ -81,25 +90,28 @@ struct SavableResourcesUniqueCheck {
// Get all savable resource links from current element. One element might
// have more than one resource link. It is possible to have some links
// in one CSS stylesheet.
-void GetSavableResourceLinkForElement(WebCore::Element* element,
- WebCore::Document* current_doc, SavableResourcesUniqueCheck* unique_check,
+void GetSavableResourceLinkForElement(
+ const WebElement& element,
+ const WebDocument& current_doc,
+ SavableResourcesUniqueCheck* unique_check,
webkit_glue::SavableResourcesResult* result) {
+
// Handle frame and iframe tag.
- bool is_frame_element;
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromElement(element, &is_frame_element);
- if (is_frame_element) {
- if (web_frame)
- unique_check->frames->push_back(web_frame);
+ if (element.hasTagName("iframe") ||
+ element.hasTagName("frame")) {
+ WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element);
+ if (sub_frame)
+ unique_check->frames->push_back(sub_frame);
return;
}
+
// Check whether the node has sub resource URL or not.
- const WebCore::AtomicString* value =
+ WebString value =
webkit_glue::GetSubResourceLinkFromElement(element);
- if (!value)
+ if (value.isNull())
return;
// Get absolute URL.
- GURL u(webkit_glue::KURLToGURL(current_doc->completeURL((*value).string())));
+ GURL u = current_doc.completeURL(value);
// ignore invalid URL
if (!u.is_valid())
return;
@@ -113,35 +125,25 @@ void GetSavableResourceLinkForElement(WebCore::Element* element,
return;
result->resources_list->push_back(u);
// Insert referrer for above new resource link.
- if (current_doc->frame()) {
- GURL u(webkit_glue::KURLToGURL(
- WebCore::KURL(WebCore::ParsedURLString,
- current_doc->frame()->loader()->outgoingReferrer())));
- result->referrers_list->push_back(u);
- } else {
- // Insert blank referrer.
- result->referrers_list->push_back(GURL());
- }
+ result->referrers_list->push_back(GURL());
}
// Get all savable resource links from current WebFrameImpl object pointer.
-void GetAllSavableResourceLinksForFrame(WebFrameImpl* current_frame,
+void GetAllSavableResourceLinksForFrame(WebFrame* current_frame,
SavableResourcesUniqueCheck* unique_check,
webkit_glue::SavableResourcesResult* result,
const char** savable_schemes) {
// Get current frame's URL.
- const WebCore::KURL& current_frame_kurl =
- current_frame->frame()->loader()->url();
- GURL current_frame_gurl(webkit_glue::KURLToGURL(current_frame_kurl));
+ GURL current_frame_url = current_frame->url();
// If url of current frame is invalid, ignore it.
- if (!current_frame_gurl.is_valid())
+ if (!current_frame_url.is_valid())
return;
// If url of current frame is not a savable protocol, ignore it.
bool is_valid_protocol = false;
for (int i = 0; savable_schemes[i] != NULL; ++i) {
- if (current_frame_gurl.SchemeIs(savable_schemes[i])) {
+ if (current_frame_url.SchemeIs(savable_schemes[i])) {
is_valid_protocol = true;
break;
}
@@ -150,20 +152,20 @@ void GetAllSavableResourceLinksForFrame(WebFrameImpl* current_frame,
return;
// If find same frame we have recorded, ignore it.
- if (!unique_check->frames_set->insert(current_frame_gurl).second)
+ if (!unique_check->frames_set->insert(current_frame_url).second)
return;
// Get current using document.
- WebCore::Document* current_doc = current_frame->frame()->document();
+ WebDocument current_doc = current_frame->document();
// Go through all descent nodes.
- PassRefPtr<WebCore::HTMLCollection> all = current_doc->all();
+ WebNodeCollection all = current_doc.all();
// Go through all node in this frame.
- for (WebCore::Node* node = all->firstItem(); node != NULL;
- node = all->nextItem()) {
+ for (WebNode node = all.firstItem(); !node.isNull();
+ node = all.nextItem()) {
// We only save HTML resources.
- if (!node->isHTMLElement())
+ if (!node.isElementNode())
continue;
- WebCore::Element* element = static_cast<WebCore::Element*>(node);
+ WebElement element = node.toElement<WebElement>();
GetSavableResourceLinkForElement(element,
current_doc,
unique_check,
@@ -360,159 +362,45 @@ void FillPasswordForm(WebView* view,
}
}
-WebFrameImpl* GetWebFrameImplFromElement(WebCore::Element* element,
- bool* is_frame_element) {
- *is_frame_element = false;
- if (element->hasTagName(WebCore::HTMLNames::iframeTag) ||
- element->hasTagName(WebCore::HTMLNames::frameTag)) {
- *is_frame_element = true;
- if (element->isFrameOwnerElement()) {
- // Check whether this frame has content.
- WebCore::HTMLFrameOwnerElement* frame_element =
- static_cast<WebCore::HTMLFrameOwnerElement*>(element);
- WebCore::Frame* content_frame = frame_element->contentFrame();
- return WebFrameImpl::fromFrame(content_frame);
+WebString GetSubResourceLinkFromElement(const WebElement& element) {
+ const char* attribute_name = NULL;
+ if (element.hasTagName("img") ||
+ element.hasTagName("script")) {
+ attribute_name = "src";
+ } else if (element.hasTagName("input")) {
+ const WebInputElement input = element.toConstElement<WebInputElement>();
+ if (input.inputType() == WebInputElement::Image) {
+ attribute_name = "src";
}
- }
- return NULL;
-}
-
-const WebCore::AtomicString* GetSubResourceLinkFromElement(
- const WebCore::Element* element) {
- const WebCore::QualifiedName* attribute_name = NULL;
- if (element->hasTagName(WebCore::HTMLNames::imgTag) ||
- element->hasTagName(WebCore::HTMLNames::scriptTag) ||
- element->hasTagName(WebCore::HTMLNames::linkTag)) {
- // Get value.
- if (element->hasTagName(WebCore::HTMLNames::linkTag)) {
+ } else if (element.hasTagName("body") ||
+ element.hasTagName("table") ||
+ element.hasTagName("tr") ||
+ element.hasTagName("td")) {
+ attribute_name = "background";
+ } else if (element.hasTagName("blockquote") ||
+ element.hasTagName("q") ||
+ element.hasTagName("del") ||
+ element.hasTagName("ins")) {
+ attribute_name = "cite";
+ } else if (element.hasTagName("link")) {
// If the link element is not linked to css, ignore it.
- const WebCore::HTMLLinkElement* link =
- static_cast<const WebCore::HTMLLinkElement*>(element);
- if (!link->sheet())
- return NULL;
+ if (LowerCaseEqualsASCII(element.getAttribute("type"), "text/css")) {
// TODO(jnd). Add support for extracting links of sub-resources which
// are inside style-sheet such as @import, url(), etc.
// See bug: http://b/issue?id=1111667.
- attribute_name = &WebCore::HTMLNames::hrefAttr;
- } else {
- attribute_name = &WebCore::HTMLNames::srcAttr;
- }
- } else if (element->hasTagName(WebCore::HTMLNames::inputTag)) {
- const WebCore::HTMLInputElement* input =
- static_cast<const WebCore::HTMLInputElement*>(element);
- if (input->inputType() == WebCore::HTMLInputElement::IMAGE) {
- attribute_name = &WebCore::HTMLNames::srcAttr;
+ attribute_name = "href";
}
- } else if (element->hasTagName(WebCore::HTMLNames::bodyTag) ||
- element->hasTagName(WebCore::HTMLNames::tableTag) ||
- element->hasTagName(WebCore::HTMLNames::trTag) ||
- element->hasTagName(WebCore::HTMLNames::tdTag)) {
- attribute_name = &WebCore::HTMLNames::backgroundAttr;
- } else if (element->hasTagName(WebCore::HTMLNames::blockquoteTag) ||
- element->hasTagName(WebCore::HTMLNames::qTag) ||
- element->hasTagName(WebCore::HTMLNames::delTag) ||
- element->hasTagName(WebCore::HTMLNames::insTag)) {
- attribute_name = &WebCore::HTMLNames::citeAttr;
}
if (!attribute_name)
- return NULL;
- const WebCore::AtomicString* value =
- &element->getAttribute(*attribute_name);
+ return WebString();
+ WebString value = element.getAttribute(WebString::fromUTF8(attribute_name));
// If value has content and not start with "javascript:" then return it,
// otherwise return NULL.
- if (value && !value->isEmpty() &&
- !value->startsWith("javascript:", false))
+ if (!value.isNull() && !value.isEmpty() &&
+ !StartsWithASCII(value.utf8(),"javascript:", false))
return value;
- return NULL;
-}
-
-bool ElementHasLegalLinkAttribute(const WebCore::Element* element,
- const WebCore::QualifiedName& attr_name) {
- if (attr_name == WebCore::HTMLNames::srcAttr) {
- // Check src attribute.
- if (element->hasTagName(WebCore::HTMLNames::imgTag) ||
- element->hasTagName(WebCore::HTMLNames::scriptTag) ||
- element->hasTagName(WebCore::HTMLNames::iframeTag) ||
- element->hasTagName(WebCore::HTMLNames::frameTag))
- return true;
- if (element->hasTagName(WebCore::HTMLNames::inputTag)) {
- const WebCore::HTMLInputElement* input =
- static_cast<const WebCore::HTMLInputElement*>(element);
- if (input->inputType() == WebCore::HTMLInputElement::IMAGE)
- return true;
- }
- } else if (attr_name == WebCore::HTMLNames::hrefAttr) {
- // Check href attribute.
- if (element->hasTagName(WebCore::HTMLNames::linkTag) ||
- element->hasTagName(WebCore::HTMLNames::aTag) ||
- element->hasTagName(WebCore::HTMLNames::areaTag))
- return true;
- } else if (attr_name == WebCore::HTMLNames::actionAttr) {
- if (element->hasTagName(WebCore::HTMLNames::formTag))
- return true;
- } else if (attr_name == WebCore::HTMLNames::backgroundAttr) {
- if (element->hasTagName(WebCore::HTMLNames::bodyTag) ||
- element->hasTagName(WebCore::HTMLNames::tableTag) ||
- element->hasTagName(WebCore::HTMLNames::trTag) ||
- element->hasTagName(WebCore::HTMLNames::tdTag))
- return true;
- } else if (attr_name == WebCore::HTMLNames::citeAttr) {
- if (element->hasTagName(WebCore::HTMLNames::blockquoteTag) ||
- element->hasTagName(WebCore::HTMLNames::qTag) ||
- element->hasTagName(WebCore::HTMLNames::delTag) ||
- element->hasTagName(WebCore::HTMLNames::insTag))
- return true;
- } else if (attr_name == WebCore::HTMLNames::classidAttr ||
- attr_name == WebCore::HTMLNames::dataAttr) {
- if (element->hasTagName(WebCore::HTMLNames::objectTag))
- return true;
- } else if (attr_name == WebCore::HTMLNames::codebaseAttr) {
- if (element->hasTagName(WebCore::HTMLNames::objectTag) ||
- element->hasTagName(WebCore::HTMLNames::appletTag))
- return true;
- }
- return false;
-}
-
-WebFrameImpl* GetWebFrameImplFromWebViewForSpecificURL(WebView* view,
- const GURL& page_url) {
- WebFrame* main_frame = view->mainFrame();
- if (!main_frame)
- return NULL;
- WebFrameImpl* main_frame_impl = static_cast<WebFrameImpl*>(main_frame);
-
- std::vector<WebFrameImpl*> frames;
- // First, process main frame.
- frames.push_back(main_frame_impl);
- // Collect all frames inside the specified frame.
- for (int i = 0; i < static_cast<int>(frames.size()); ++i) {
- WebFrameImpl* current_frame = frames[i];
- // Get current using document.
- WebCore::Document* current_doc = current_frame->frame()->document();
- // Check whether current frame is target or not.
- const WebCore::KURL& current_frame_kurl =
- current_frame->frame()->loader()->url();
- GURL current_frame_gurl(KURLToGURL(current_frame_kurl));
- if (page_url == current_frame_gurl)
- return current_frame;
- // Go through sub-frames.
- RefPtr<WebCore::HTMLCollection> all = current_doc->all();
- for (WebCore::Node* node = all->firstItem(); node != NULL;
- node = all->nextItem()) {
- if (!node->isHTMLElement())
- continue;
- WebCore::Element* element = static_cast<WebCore::Element*>(node);
- // Check frame tag and iframe tag.
- bool is_frame_element;
- WebFrameImpl* web_frame = GetWebFrameImplFromElement(
- element, &is_frame_element);
- if (is_frame_element && web_frame)
- frames.push_back(web_frame);
- }
- }
-
- return NULL;
+ return WebString();
}
// Get all savable resource links from current webview, include main
@@ -527,12 +415,12 @@ bool GetAllSavableResourceLinksForCurrentPage(WebView* view,
std::set<GURL> resources_set;
std::set<GURL> frames_set;
- std::vector<WebFrameImpl*> frames;
+ std::vector<WebFrame*> frames;
SavableResourcesUniqueCheck unique_check(&resources_set,
&frames_set,
&frames);
- GURL main_page_gurl(KURLToGURL(main_frame_impl->frame()->loader()->url()));
+ GURL main_page_gurl(main_frame_impl->url());
// Make sure we are saving same page between embedder and webkit.
// If page has being navigated, embedder will get three empty vector,
diff --git a/webkit/glue/dom_operations.h b/webkit/glue/dom_operations.h
index e0d7142..551666b 100644
--- a/webkit/glue/dom_operations.h
+++ b/webkit/glue/dom_operations.h
@@ -125,6 +125,14 @@ bool ElementDoesAutoCompleteForElementWithId(WebKit::WebView* view,
// Returns the number of animations currently running.
int NumberOfActiveAnimations(WebKit::WebView* view);
+// Returns the value in an elements resource url attribute. For IMG, SCRIPT or
+// INPUT TYPE=image, returns the value in "src". For LINK TYPE=text/css, returns
+// the value in "href". For BODY, TABLE, TR, TD, returns the value in
+// "background". For BLOCKQUOTE, Q, DEL, INS, returns the value in "cite"
+// attribute. Otherwise returns a null WebString.
+WebKit::WebString GetSubResourceLinkFromElement(
+ const WebKit::WebElement& element);
+
} // namespace webkit_glue
#endif // WEBKIT_GLUE_DOM_OPERATIONS_H__
diff --git a/webkit/glue/dom_operations_private.h b/webkit/glue/dom_operations_private.h
deleted file mode 100644
index 403ca16..0000000
--- a/webkit/glue/dom_operations_private.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_
-#define WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_
-
-namespace WebCore {
-class AtomicString;
-class Document;
-class Element;
-class Node;
-class QualifiedName;
-class String;
-}
-
-namespace WebKit {
-class WebFrameImpl;
-class WebView;
-}
-
-class GURL;
-
-namespace webkit_glue {
-
-// If element is HTML:IFrame or HTML:Frame, then return the WebFrameImpl
-// object corresponding to the content frame, otherwise return NULL.
-// The parameter is_frame_element indicates whether the input element
-// is frame/iframe element or not.
-WebKit::WebFrameImpl* GetWebFrameImplFromElement(WebCore::Element* element,
- bool* is_frame_element);
-
-// If element is img, script or input type=image, then return its link refer
-// to the "src" attribute. If element is link, then return its link refer to
-// the "href" attribute. If element is body, table, tr, td, then return its
-// link refer to the "background" attribute. If element is blockquote, q, del,
-// ins, then return its link refer to the "cite" attribute. Otherwise return
-// NULL.
-const WebCore::AtomicString* GetSubResourceLinkFromElement(
- const WebCore::Element* element);
-
-// For img, script, iframe, frame element, when attribute name is src,
-// for link, a, area element, when attribute name is href,
-// for form element, when attribute name is action,
-// for input, type=image, when attribute name is src,
-// for body, table, tr, td, when attribute name is background,
-// for blockquote, q, del, ins, when attribute name is cite,
-// we can consider the attribute value has legal link.
-bool ElementHasLegalLinkAttribute(const WebCore::Element* element,
- const WebCore::QualifiedName& attr_name);
-
-// Get pointer of WebFrameImpl from webview according to specific URL.
-WebKit::WebFrameImpl* GetWebFrameImplFromWebViewForSpecificURL(
- WebKit::WebView* view, const GURL& page_url);
-
-} // namespace webkit_glue
-
-#endif // WEBKIT_GLUE_DOM_OPERATIONS_PRIVATE_H_
diff --git a/webkit/glue/dom_serializer.cc b/webkit/glue/dom_serializer.cc
deleted file mode 100644
index d8cf3a4..0000000
--- a/webkit/glue/dom_serializer.cc
+++ /dev/null
@@ -1,627 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// How we handle the base tag better.
-// Current status:
-// At now the normal way we use to handling base tag is
-// a) For those links which have corresponding local saved files, such as
-// savable CSS, JavaScript files, they will be written to relative URLs which
-// point to local saved file. Why those links can not be resolved as absolute
-// file URLs, because if they are resolved as absolute URLs, after moving the
-// file location from one directory to another directory, the file URLs will
-// be dead links.
-// b) For those links which have not corresponding local saved files, such as
-// links in A, AREA tags, they will be resolved as absolute URLs.
-// c) We comment all base tags when serialzing DOM for the page.
-// FireFox also uses above way to handle base tag.
-//
-// Problem:
-// This way can not handle the following situation:
-// the base tag is written by JavaScript.
-// For example. The page "www.yahoo.com" use
-// "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL
-// of page when loading page. So when saving page as completed-HTML, we assume
-// that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved
-// completed-HTML page, then the JavaScript will insert a base tag
-// <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to
-// local saved resource files will be resolved as
-// "http://www.yahoo.com/yahoo_files/...", which will cause all saved resource
-// files can not be loaded correctly. Also the page will be rendered ugly since
-// all saved sub-resource files (such as CSS, JavaScript files) and sub-frame
-// files can not be fetched.
-// Now FireFox, IE and WebKit based Browser all have this problem.
-//
-// Solution:
-// My solution is that we comment old base tag and write new base tag:
-// <base href="." ...> after the previous commented base tag. In WebKit, it
-// always uses the latest "href" attribute of base tag to set document's base
-// URL. Based on this behavior, when we encounter a base tag, we comment it and
-// write a new base tag <base href="."> after the previous commented base tag.
-// The new added base tag can help engine to locate correct base URL for
-// correctly loading local saved resource files. Also I think we need to inherit
-// the base target value from document object when appending new base tag.
-// If there are multiple base tags in original document, we will comment all old
-// base tags and append new base tag after each old base tag because we do not
-// know those old base tags are original content or added by JavaScript. If
-// they are added by JavaScript, it means when loading saved page, the script(s)
-// will still insert base tag(s) to DOM, so the new added base tag(s) can
-// override the incorrect base URL and make sure we alway load correct local
-// saved resource files.
-
-// We must include format_macros up here, before any WebKit headers
-// include inttypes.h.
-#include "base/format_macros.h"
-
-#include "config.h"
-
-#include "base/compiler_specific.h"
-
-MSVC_PUSH_WARNING_LEVEL(0);
-#include "DocumentType.h"
-#include "FrameLoader.h"
-#include "Document.h"
-#include "Element.h"
-#include "HTMLAllCollection.h"
-#include "HTMLElement.h"
-#include "HTMLFormElement.h"
-#include "HTMLMetaElement.h"
-#include "HTMLNames.h"
-#include "KURL.h"
-#include "markup.h"
-#include "PlatformString.h"
-#include "TextEncoding.h"
-MSVC_POP_WARNING();
-#undef LOG
-
-#include "webkit/glue/dom_serializer.h"
-
-#include "base/string_util.h"
-#include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h"
-#include "webkit/glue/dom_operations.h"
-#include "webkit/glue/dom_operations_private.h"
-#include "webkit/glue/dom_serializer_delegate.h"
-#include "webkit/glue/entity_map.h"
-#include "webkit/glue/glue_util.h"
-
-using WebKit::WebFrame;
-using WebKit::WebFrameImpl;
-
-namespace {
-
-// Default "mark of the web" declaration
-static const char* const kDefaultMarkOfTheWeb =
- "\n<!-- saved from url=(%04" PRIuS ")%s -->\n";
-
-// Default meat content for writing correct charset declaration.
-static const wchar_t* const kDefaultMetaContent =
- L"<META http-equiv=\"Content-Type\" content=\"text/html; charset=%ls\">";
-
-// Notation of start comment.
-static const wchar_t* const kStartCommentNotation = L"<!-- ";
-
-// Notation of end comment.
-static const wchar_t* const kEndCommentNotation = L" -->";
-
-// Default XML declaration.
-static const wchar_t* const kXMLDeclaration =
- L"<?xml version=\"%ls\" encoding=\"%ls\"%ls?>\n";
-
-// Default base tag declaration
-static const wchar_t* const kBaseTagDeclaration =
- L"<BASE href=\".\"%ls>";
-
-static const wchar_t* const kBaseTargetDeclaration =
- L" target=\"%ls\"";
-
-// Maximum length of data buffer which is used to temporary save generated
-// html content data.
-static const int kHtmlContentBufferLength = 65536;
-
-// Check whether specified unicode has corresponding html/xml entity name.
-// If yes, replace the character with the returned entity notation, if not
-// then still use original character.
-void ConvertCorrespondingSymbolToEntity(WebCore::String* result,
- const WebCore::String& value,
- bool in_html_doc) {
- unsigned len = value.length();
- const UChar* start_pos = value.characters();
- const UChar* cur_pos = start_pos;
- while (len--) {
- const char* entity_name =
- webkit_glue::EntityMap::GetEntityNameByCode(*cur_pos, in_html_doc);
- if (entity_name) {
- // Append content before entity code.
- if (cur_pos > start_pos)
- result->append(start_pos, cur_pos - start_pos);
- result->append("&");
- result->append(entity_name);
- result->append(";");
- start_pos = ++cur_pos;
- } else {
- cur_pos++;
- }
- }
- // Append the remaining content.
- if (cur_pos > start_pos)
- result->append(start_pos, cur_pos - start_pos);
-}
-
-} // namespace
-
-namespace webkit_glue {
-
-// SerializeDomParam Constructor.
-DomSerializer::SerializeDomParam::SerializeDomParam(
- const GURL& current_frame_gurl,
- const WebCore::TextEncoding& text_encoding,
- WebCore::Document* doc,
- const FilePath& directory_name)
- : current_frame_gurl(current_frame_gurl),
- text_encoding(text_encoding),
- doc(doc),
- directory_name(directory_name),
- has_doctype(false),
- has_checked_meta(false),
- skip_meta_element(NULL),
- is_in_script_or_style_tag(false),
- has_doc_declaration(false) {
- // Cache the value since we check it lots of times.
- is_html_document = doc->isHTMLDocument();
-}
-
-// Static
-std::wstring DomSerializer::GenerateMetaCharsetDeclaration(
- const std::wstring& charset) {
- return StringPrintf(kDefaultMetaContent, charset.c_str());
-}
-
-// Static.
-std::string DomSerializer::GenerateMarkOfTheWebDeclaration(
- const GURL& url) {
- return StringPrintf(kDefaultMarkOfTheWeb,
- url.spec().size(), url.spec().c_str());
-}
-
-// Static.
-std::wstring DomSerializer::GenerateBaseTagDeclaration(
- const std::wstring& base_target) {
- std::wstring target_declaration = base_target.empty() ? L"" :
- StringPrintf(kBaseTargetDeclaration, base_target.c_str());
- return StringPrintf(kBaseTagDeclaration, target_declaration.c_str());
-}
-
-WebCore::String DomSerializer::PreActionBeforeSerializeOpenTag(
- const WebCore::Element* element, SerializeDomParam* param,
- bool* need_skip) {
- WebCore::String result;
-
- *need_skip = false;
- if (param->is_html_document) {
- // Skip the open tag of original META tag which declare charset since we
- // have overrided the META which have correct charset declaration after
- // serializing open tag of HEAD element.
- if (element->hasTagName(WebCore::HTMLNames::metaTag)) {
- const WebCore::HTMLMetaElement* meta =
- static_cast<const WebCore::HTMLMetaElement*>(element);
- // Check whether the META tag has declared charset or not.
- WebCore::String equiv = meta->httpEquiv();
- if (equalIgnoringCase(equiv, "content-type")) {
- WebCore::String content = meta->content();
- if (content.length() && content.contains("charset", false)) {
- // Find META tag declared charset, we need to skip it when
- // serializing DOM.
- param->skip_meta_element = element;
- *need_skip = true;
- }
- }
- } else if (element->hasTagName(WebCore::HTMLNames::htmlTag)) {
- // Check something before processing the open tag of HEAD element.
- // First we add doc type declaration if original doc has it.
- if (!param->has_doctype) {
- param->has_doctype = true;
- result += createMarkup(param->doc->doctype());
- }
-
- // Add MOTW declaration before html tag.
- // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
- result += StdStringToString(GenerateMarkOfTheWebDeclaration(
- param->current_frame_gurl));
- } else if (element->hasTagName(WebCore::HTMLNames::baseTag)) {
- // Comment the BASE tag when serializing dom.
- result += StdWStringToString(kStartCommentNotation);
- }
- } else {
- // Write XML declaration.
- if (!param->has_doc_declaration) {
- param->has_doc_declaration = true;
- // Get encoding info.
- WebCore::String xml_encoding = param->doc->xmlEncoding();
- if (xml_encoding.isEmpty())
- xml_encoding = param->doc->frame()->loader()->encoding();
- if (xml_encoding.isEmpty())
- xml_encoding = WebCore::UTF8Encoding().name();
- std::wstring str_xml_declaration =
- StringPrintf(kXMLDeclaration,
- StringToStdWString(param->doc->xmlVersion()).c_str(),
- StringToStdWString(xml_encoding).c_str(),
- param->doc->xmlStandalone() ? L" standalone=\"yes\"" :
- L"");
- result += StdWStringToString(str_xml_declaration);
- }
- // Add doc type declaration if original doc has it.
- if (!param->has_doctype) {
- param->has_doctype = true;
- result += createMarkup(param->doc->doctype());
- }
- }
-
- return result;
-}
-
-WebCore::String DomSerializer::PostActionAfterSerializeOpenTag(
- const WebCore::Element* element, SerializeDomParam* param) {
- WebCore::String result;
-
- param->has_added_contents_before_end = false;
- if (!param->is_html_document)
- return result;
- // Check after processing the open tag of HEAD element
- if (!param->has_checked_meta &&
- element->hasTagName(WebCore::HTMLNames::headTag)) {
- param->has_checked_meta = true;
- // Check meta element. WebKit only pre-parse the first 512 bytes
- // of the document. If the whole <HEAD> is larger and meta is the
- // end of head part, then this kind of pages aren't decoded correctly
- // because of this issue. So when we serialize the DOM, we need to
- // make sure the meta will in first child of head tag.
- // See http://bugs.webkit.org/show_bug.cgi?id=16621.
- // First we generate new content for writing correct META element.
- std::wstring str_meta =
- GenerateMetaCharsetDeclaration(
- ASCIIToWide(param->text_encoding.name()));
- result += StdWStringToString(str_meta);
-
- param->has_added_contents_before_end = true;
- // Will search each META which has charset declaration, and skip them all
- // in PreActionBeforeSerializeOpenTag.
- } else if (element->hasTagName(WebCore::HTMLNames::scriptTag) ||
- element->hasTagName(WebCore::HTMLNames::styleTag)) {
- param->is_in_script_or_style_tag = true;
- }
-
- return result;
-}
-
-WebCore::String DomSerializer::PreActionBeforeSerializeEndTag(
- const WebCore::Element* element, SerializeDomParam* param,
- bool* need_skip) {
- WebCore::String result;
-
- *need_skip = false;
- if (!param->is_html_document)
- return result;
- // Skip the end tag of original META tag which declare charset.
- // Need not to check whether it's META tag since we guarantee
- // skip_meta_element is definitely META tag if it's not NULL.
- if (param->skip_meta_element == element) {
- *need_skip = true;
- } else if (element->hasTagName(WebCore::HTMLNames::scriptTag) ||
- element->hasTagName(WebCore::HTMLNames::styleTag)) {
- DCHECK(param->is_in_script_or_style_tag);
- param->is_in_script_or_style_tag = false;
- }
-
- return result;
-}
-
-// After we finish serializing end tag of a element, we give the target
-// element a chance to do some post work to add some additional data.
-WebCore::String DomSerializer::PostActionAfterSerializeEndTag(
- const WebCore::Element* element, SerializeDomParam* param) {
- WebCore::String result;
-
- if (!param->is_html_document)
- return result;
- // Comment the BASE tag when serializing DOM.
- if (element->hasTagName(WebCore::HTMLNames::baseTag)) {
- result += StdWStringToString(kEndCommentNotation);
- // Append a new base tag declaration.
- result += StdWStringToString(GenerateBaseTagDeclaration(
- webkit_glue::StringToStdWString(param->doc->baseTarget())));
- }
-
- return result;
-}
-
-void DomSerializer::SaveHtmlContentToBuffer(const WebCore::String& result,
- SerializeDomParam* param) {
- if (!result.length())
- return;
- // Convert the unicode content to target encoding
- WebCore::CString encoding_result = param->text_encoding.encode(
- result.characters(), result.length(), WebCore::EntitiesForUnencodables);
-
- // if the data buffer will be full, then send it out first.
- if (encoding_result.length() + data_buffer_.size() >
- data_buffer_.capacity()) {
- // Send data to delegate, tell it now we are serializing current frame.
- delegate_->DidSerializeDataForFrame(param->current_frame_gurl,
- data_buffer_, DomSerializerDelegate::CURRENT_FRAME_IS_NOT_FINISHED);
- data_buffer_.clear();
- }
-
- // Append result to data buffer.
- data_buffer_.append(CStringToStdString(encoding_result));
-}
-
-void DomSerializer::OpenTagToString(const WebCore::Element* element,
- SerializeDomParam* param) {
- bool need_skip;
- // Do pre action for open tag.
- WebCore::String result = PreActionBeforeSerializeOpenTag(element,
- param,
- &need_skip);
- if (need_skip)
- return;
- // Add open tag
- result += "<" + element->nodeName();
- // Go through all attributes and serialize them.
- const WebCore::NamedNodeMap *attrMap = element->attributes(true);
- if (attrMap) {
- unsigned numAttrs = attrMap->length();
- for (unsigned i = 0; i < numAttrs; i++) {
- result += " ";
- // Add attribute pair
- const WebCore::Attribute *attribute = attrMap->attributeItem(i);
- result += attribute->name().toString();
- result += "=\"";
- if (!attribute->value().isEmpty()) {
- // Check whether we need to replace some resource links
- // with local resource paths.
- const WebCore::QualifiedName& attr_name = attribute->name();
- // Check whether need to change the attribute which has link
- bool need_replace_link =
- ElementHasLegalLinkAttribute(element, attr_name);
- if (need_replace_link) {
- // First, get the absolute link
- const WebCore::String& attr_value = attribute->value();
- // For links start with "javascript:", we do not change it.
- if (attr_value.startsWith("javascript:", false)) {
- result += attr_value;
- } else {
- WebCore::String str_value = param->doc->completeURL(attr_value);
- std::string value(StringToStdString(str_value));
- // Check whether we local files for those link.
- LinkLocalPathMap::const_iterator it = local_links_.find(value);
- if (it != local_links_.end()) {
- // Replace the link when we have local files.
- FilePath::StringType path(FilePath::kCurrentDirectory);
- if (!param->directory_name.empty())
- path += FILE_PATH_LITERAL("/") + param->directory_name.value();
- path += FILE_PATH_LITERAL("/") + it->second.value();
- result += FilePathStringToString(path);
- } else {
- // If not found local path, replace it with absolute link.
- result += str_value;
- }
- }
- } else {
- ConvertCorrespondingSymbolToEntity(&result, attribute->value(),
- param->is_html_document);
- }
- }
- result += "\"";
- }
- }
-
- // Do post action for open tag.
- WebCore::String added_contents =
- PostActionAfterSerializeOpenTag(element, param);
- // Complete the open tag for element when it has child/children.
- if (element->hasChildNodes() || param->has_added_contents_before_end)
- result += ">";
- // Append the added contents generate in post action of open tag.
- result += added_contents;
- // Save the result to data buffer.
- SaveHtmlContentToBuffer(result, param);
-}
-
-// Serialize end tag of an specified element.
-void DomSerializer::EndTagToString(const WebCore::Element* element,
- SerializeDomParam* param) {
- bool need_skip;
- // Do pre action for end tag.
- WebCore::String result = PreActionBeforeSerializeEndTag(element,
- param,
- &need_skip);
- if (need_skip)
- return;
- // Write end tag when element has child/children.
- if (element->hasChildNodes() || param->has_added_contents_before_end) {
- result += "</";
- result += element->nodeName();
- result += ">";
- } else {
- // Check whether we have to write end tag for empty element.
- if (param->is_html_document) {
- result += ">";
- const WebCore::HTMLElement* html_element =
- static_cast<const WebCore::HTMLElement*>(element);
- if (html_element->endTagRequirement() == WebCore::TagStatusRequired) {
- // We need to write end tag when it is required.
- result += "</";
- result += element->nodeName();
- result += ">";
- }
- } else {
- // For xml base document.
- result += " />";
- }
- }
- // Do post action for end tag.
- result += PostActionAfterSerializeEndTag(element, param);
- // Save the result to data buffer.
- SaveHtmlContentToBuffer(result, param);
-}
-
-void DomSerializer::BuildContentForNode(const WebCore::Node* node,
- SerializeDomParam* param) {
- switch (node->nodeType()) {
- case WebCore::Node::ELEMENT_NODE: {
- // Process open tag of element.
- OpenTagToString(static_cast<const WebCore::Element*>(node), param);
- // Walk through the children nodes and process it.
- for (const WebCore::Node *child = node->firstChild(); child != NULL;
- child = child->nextSibling())
- BuildContentForNode(child, param);
- // Process end tag of element.
- EndTagToString(static_cast<const WebCore::Element*>(node), param);
- break;
- }
- case WebCore::Node::TEXT_NODE: {
- SaveHtmlContentToBuffer(createMarkup(node), param);
- break;
- }
- case WebCore::Node::ATTRIBUTE_NODE:
- case WebCore::Node::DOCUMENT_NODE:
- case WebCore::Node::DOCUMENT_FRAGMENT_NODE: {
- // Should not exist.
- DCHECK(false);
- break;
- }
- // Document type node can be in DOM?
- case WebCore::Node::DOCUMENT_TYPE_NODE:
- param->has_doctype = true;
- default: {
- // For other type node, call default action.
- SaveHtmlContentToBuffer(createMarkup(node), param);
- break;
- }
- }
-}
-
-DomSerializer::DomSerializer(WebFrame* webframe,
- bool recursive_serialization,
- DomSerializerDelegate* delegate,
- const std::vector<GURL>& links,
- const std::vector<FilePath>& local_paths,
- const FilePath& local_directory_name)
- : delegate_(delegate),
- recursive_serialization_(recursive_serialization),
- frames_collected_(false),
- local_directory_name_(local_directory_name) {
- // Must specify available webframe.
- DCHECK(webframe);
- specified_webframeimpl_ = static_cast<WebFrameImpl*>(webframe);
- // Make sure we have not-NULL delegate.
- DCHECK(delegate);
- // Build local resources map.
- DCHECK(links.size() == local_paths.size());
- std::vector<GURL>::const_iterator link_it = links.begin();
- std::vector<FilePath>::const_iterator path_it = local_paths.begin();
- for (; link_it != links.end(); ++link_it, ++path_it) {
- bool never_present = local_links_.insert(
- LinkLocalPathMap::value_type(link_it->spec(), *path_it)).
- second;
- DCHECK(never_present);
- }
-
- // Init data buffer.
- data_buffer_.reserve(kHtmlContentBufferLength);
- DCHECK(data_buffer_.empty());
-}
-
-void DomSerializer::CollectTargetFrames() {
- DCHECK(!frames_collected_);
- frames_collected_ = true;
-
- // First, process main frame.
- frames_.push_back(specified_webframeimpl_);
- // Return now if user only needs to serialize specified frame, not including
- // all sub-frames.
- if (!recursive_serialization_)
- return;
- // Collect all frames inside the specified frame.
- for (int i = 0; i < static_cast<int>(frames_.size()); ++i) {
- WebFrameImpl* current_frame = frames_[i];
- // Get current using document.
- WebCore::Document* current_doc = current_frame->frame()->document();
- // Go through sub-frames.
- RefPtr<WebCore::HTMLAllCollection> all = current_doc->all();
- for (WebCore::Node* node = all->firstItem(); node != NULL;
- node = all->nextItem()) {
- if (!node->isHTMLElement())
- continue;
- WebCore::Element* element = static_cast<WebCore::Element*>(node);
- // Check frame tag and iframe tag.
- bool is_frame_element;
- WebFrameImpl* web_frame = GetWebFrameImplFromElement(
- element, &is_frame_element);
- if (is_frame_element && web_frame)
- frames_.push_back(web_frame);
- }
- }
-}
-
-bool DomSerializer::SerializeDom() {
- // Collect target frames.
- if (!frames_collected_)
- CollectTargetFrames();
- bool did_serialization = false;
- // Get GURL for main frame.
- GURL main_page_gurl(KURLToGURL(
- specified_webframeimpl_->frame()->loader()->url()));
-
- // Go through all frames for serializing DOM for whole page, include
- // sub-frames.
- for (int i = 0; i < static_cast<int>(frames_.size()); ++i) {
- // Get current serializing frame.
- WebFrameImpl* current_frame = frames_[i];
- // Get current using document.
- WebCore::Document* current_doc = current_frame->frame()->document();
- // Get current frame's URL.
- const WebCore::KURL& current_frame_kurl =
- current_frame->frame()->loader()->url();
- GURL current_frame_gurl(KURLToGURL(current_frame_kurl));
-
- // Check whether we have done this document.
- if (local_links_.find(current_frame_gurl.spec()) != local_links_.end()) {
- // A new document, we will serialize it.
- did_serialization = true;
- // Get target encoding for current document.
- WebCore::String encoding = current_frame->frame()->loader()->encoding();
- // Create the text encoding object with target encoding.
- WebCore::TextEncoding text_encoding(encoding);
- // Construct serialize parameter for late processing document.
- SerializeDomParam param(
- current_frame_gurl,
- encoding.length() ? text_encoding : WebCore::UTF8Encoding(),
- current_doc,
- current_frame_gurl == main_page_gurl ?
- local_directory_name_ :
- FilePath());
-
- // Process current document.
- WebCore::Element* root_element = current_doc->documentElement();
- if (root_element)
- BuildContentForNode(root_element, &param);
-
- // Sink the remainder data and finish serializing current frame.
- delegate_->DidSerializeDataForFrame(current_frame_gurl, data_buffer_,
- DomSerializerDelegate::CURRENT_FRAME_IS_FINISHED);
- // Clear the buffer.
- data_buffer_.clear();
- }
- }
-
- // We have done call frames, so we send message to embedder to tell it that
- // frames are finished serializing.
- DCHECK(data_buffer_.empty());
- delegate_->DidSerializeDataForFrame(GURL(), data_buffer_,
- DomSerializerDelegate::ALL_FRAMES_ARE_FINISHED);
-
- return did_serialization;
-}
-
-} // namespace webkit_glue
diff --git a/webkit/glue/dom_serializer.h b/webkit/glue/dom_serializer.h
deleted file mode 100644
index 3c70431..0000000
--- a/webkit/glue/dom_serializer.h
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef WEBKIT_GLUE_DOM_SERIALIZER_H__
-#define WEBKIT_GLUE_DOM_SERIALIZER_H__
-
-#include <string>
-
-#include "base/file_path.h"
-#include "base/hash_tables.h"
-#include "googleurl/src/gurl.h"
-
-namespace WebCore {
-class Document;
-class Element;
-class Node;
-class String;
-class TextEncoding;
-}
-
-namespace WebKit {
-class WebFrame;
-class WebFrameImpl;
-}
-
-namespace webkit_glue {
-
-class DomSerializerDelegate;
-
-// Get html data by serializing all frames of current page with lists
-// which contain all resource links that have local copy.
-// contain all saved auxiliary files included all sub frames and resources.
-// This function will find out all frames and serialize them to HTML data.
-// We have a data buffer to temporary saving generated html data. We will
-// sequentially call WebViewDelegate::SendSerializedHtmlData once the data
-// buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData
-// for getting more information.
-class DomSerializer {
- public:
- // Do serialization action. Return false means no available frame has been
- // serialized, otherwise return true.
- bool SerializeDom();
- // The parameter specifies which frame need to be serialized.
- // The parameter recursive_serialization specifies whether we need to
- // serialize all sub frames of the specified frame or not.
- // The parameter delegate specifies the pointer of interface
- // DomSerializerDelegate provide sink interface which can receive the
- // individual chunks of data to be saved.
- // The parameter links contain original URLs of all saved links.
- // The parameter local_paths contain corresponding local file paths of all
- // saved links, which matched with vector:links one by one.
- // The parameter local_directory_name is relative path of directory which
- // contain all saved auxiliary files included all sub frames and resources.
- DomSerializer(WebKit::WebFrame* webframe,
- bool recursive_serialization,
- DomSerializerDelegate* delegate,
- const std::vector<GURL>& links,
- const std::vector<FilePath>& local_paths,
- const FilePath& local_directory_name);
-
- // Generate the META for charset declaration.
- static std::wstring GenerateMetaCharsetDeclaration(
- const std::wstring& charset);
- // Generate the MOTW declaration.
- static std::string GenerateMarkOfTheWebDeclaration(const GURL& url);
- // Generate the default base tag declaration.
- static std::wstring GenerateBaseTagDeclaration(
- const std::wstring& base_target);
-
- private:
- // Specified frame which need to be serialized;
- WebKit::WebFrameImpl* specified_webframeimpl_;
- // This hash_map is used to map resource URL of original link to its local
- // file path.
- typedef base::hash_map<std::string, FilePath> LinkLocalPathMap;
- // local_links_ include all pair of local resource path and corresponding
- // original link.
- LinkLocalPathMap local_links_;
- // Pointer of DomSerializerDelegate
- DomSerializerDelegate* delegate_;
- // Data buffer for saving result of serialized DOM data.
- std::string data_buffer_;
- // Passing true to recursive_serialization_ indicates we will serialize not
- // only the specified frame but also all sub-frames in the specific frame.
- // Otherwise we only serialize the specified frame excluded all sub-frames.
- bool recursive_serialization_;
- // Flag indicates whether we have collected all frames which need to be
- // serialized or not;
- bool frames_collected_;
- // Local directory name of all local resource files.
- const FilePath& local_directory_name_;
- // Vector for saving all frames which need to be serialized.
- std::vector<WebKit::WebFrameImpl*> frames_;
-
- struct SerializeDomParam {
- // Frame URL of current processing document presented by GURL
- const GURL& current_frame_gurl;
- // Current using text encoding object.
- const WebCore::TextEncoding& text_encoding;
-
- // Document object of current frame.
- WebCore::Document* doc;
- // Local directory name of all local resource files.
- const FilePath& directory_name;
-
- // Flag indicates current doc is html document or not. It's a cache value
- // of Document.isHTMLDocument().
- bool is_html_document;
- // Flag which indicate whether we have met document type declaration.
- bool has_doctype;
- // Flag which indicate whether will process meta issue.
- bool has_checked_meta;
- // This meta element need to be skipped when serializing DOM.
- const WebCore::Element* skip_meta_element;
- // Flag indicates we are in script or style tag.
- bool is_in_script_or_style_tag;
- // Flag indicates whether we have written xml document declaration.
- // It is only used in xml document
- bool has_doc_declaration;
- // Flag indicates whether we have added additional contents before end tag.
- // This flag will be re-assigned in each call of function
- // PostActionAfterSerializeOpenTag and it could be changed in function
- // PreActionBeforeSerializeEndTag if the function adds new contents into
- // serialization stream.
- bool has_added_contents_before_end;
-
- // Constructor.
- SerializeDomParam(
- const GURL& current_frame_gurl,
- const WebCore::TextEncoding& text_encoding,
- WebCore::Document* doc,
- const FilePath& directory_name);
-
- private:
- DISALLOW_EVIL_CONSTRUCTORS(SerializeDomParam);
- };
-
- // Collect all target frames which need to be serialized.
- void CollectTargetFrames();
- // Before we begin serializing open tag of a element, we give the target
- // element a chance to do some work prior to add some additional data.
- WebCore::String PreActionBeforeSerializeOpenTag(
- const WebCore::Element* element,
- SerializeDomParam* param,
- bool* need_skip);
- // After we finish serializing open tag of a element, we give the target
- // element a chance to do some post work to add some additional data.
- WebCore::String PostActionAfterSerializeOpenTag(
- const WebCore::Element* element,
- SerializeDomParam* param);
- // Before we begin serializing end tag of a element, we give the target
- // element a chance to do some work prior to add some additional data.
- WebCore::String PreActionBeforeSerializeEndTag(
- const WebCore::Element* element,
- SerializeDomParam* param, bool* need_skip);
- // After we finish serializing end tag of a element, we give the target
- // element a chance to do some post work to add some additional data.
- WebCore::String PostActionAfterSerializeEndTag(
- const WebCore::Element* element,
- SerializeDomParam* param);
- // Save generated html content to data buffer.
- void SaveHtmlContentToBuffer(const WebCore::String& result,
- SerializeDomParam* param);
- // Serialize open tag of an specified element.
- void OpenTagToString(const WebCore::Element* element,
- SerializeDomParam* param);
- // Serialize end tag of an specified element.
- void EndTagToString(const WebCore::Element* element,
- SerializeDomParam* param);
- // Build content for a specified node
- void BuildContentForNode(const WebCore::Node* node,
- SerializeDomParam* param);
-
- DISALLOW_EVIL_CONSTRUCTORS(DomSerializer);
-};
-
-} // namespace webkit_glue
-
-#endif // WEBKIT_GLUE_DOM_SERIALIZER_H__
diff --git a/webkit/glue/dom_serializer_delegate.h b/webkit/glue/dom_serializer_delegate.h
deleted file mode 100644
index 7a738c4..0000000
--- a/webkit/glue/dom_serializer_delegate.h
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__
-#define WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__
-
-#include <string>
-
-class GURL;
-
-namespace webkit_glue {
-
-// This class is used for providing sink interface that can be used to receive
-// the individual chunks of data to be saved.
-class DomSerializerDelegate {
- public:
- // This enum indicates This sink interface can receive the individual chunks
- // of serialized data to be saved, so we use values of following enum
- // definition to indicate the serialization status of serializing all html
- // content. If current frame is not complete serialized, call
- // DidSerializeDataForFrame with URL of current frame, data, data length and
- // flag CURRENT_FRAME_IS_NOT_FINISHED.
- // If current frame is complete serialized, call DidSerializeDataForFrame
- // with URL of current frame, data, data length and flag
- // CURRENT_FRAME_IS_FINISHED.
- // If all frames of page are complete serialized, call
- // DidSerializeDataForFrame with empty URL, empty data, 0 and flag
- // ALL_FRAMES_ARE_FINISHED.
- enum PageSavingSerializationStatus {
- // Current frame is not finished saving.
- CURRENT_FRAME_IS_NOT_FINISHED = 0,
- // Current frame is finished saving.
- CURRENT_FRAME_IS_FINISHED,
- // All frame are finished saving.
- ALL_FRAMES_ARE_FINISHED,
- };
-
- // Receive the individual chunks of serialized data to be saved.
- // The parameter frame_url specifies what frame the data belongs. The
- // parameter data contains the available data for saving. The parameter
- // status indicates the status of data serialization.
- virtual void DidSerializeDataForFrame(const GURL& frame_url,
- const std::string& data, PageSavingSerializationStatus status) = 0;
-
- DomSerializerDelegate() { }
- virtual ~DomSerializerDelegate() { }
-};
-
-} // namespace webkit_glue
-
-#endif // WEBKIT_GLUE_DOM_SERIALIZER_DELEGATE_H__
diff --git a/webkit/glue/dom_serializer_unittest.cc b/webkit/glue/dom_serializer_unittest.cc
index f6c4bb75..da31347 100644
--- a/webkit/glue/dom_serializer_unittest.cc
+++ b/webkit/glue/dom_serializer_unittest.cc
@@ -2,60 +2,94 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "config.h"
-
#include "base/compiler_specific.h"
-
-MSVC_PUSH_WARNING_LEVEL(0);
-#include "Document.h"
-#include "DocumentType.h"
-#include "Element.h"
-#include "FrameLoader.h"
-#include "FrameView.h"
-#include "HTMLAllCollection.h"
-#include "HTMLHeadElement.h"
-#include "HTMLMetaElement.h"
-#include "HTMLNames.h"
-#include "KURL.h"
-#include "markup.h"
-#include "SharedBuffer.h"
-#include "SubstituteData.h"
-MSVC_POP_WARNING();
-#undef LOG
-
#include "base/file_path.h"
#include "base/file_util.h"
#include "base/hash_tables.h"
#include "base/string_util.h"
#include "net/base/net_util.h"
#include "net/url_request/url_request_context.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebCString.h"
#include "third_party/WebKit/WebKit/chromium/public/WebData.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebElement.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNode.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNodeCollection.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebNodeList.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializer.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebPageSerializerClient.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebString.h"
#include "third_party/WebKit/WebKit/chromium/public/WebURL.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebVector.h"
#include "third_party/WebKit/WebKit/chromium/public/WebView.h"
#include "webkit/glue/dom_operations.h"
-#include "webkit/glue/dom_operations_private.h"
-#include "webkit/glue/dom_serializer.h"
-#include "webkit/glue/dom_serializer_delegate.h"
-#include "webkit/glue/glue_util.h"
-#include "third_party/WebKit/WebKit/chromium/src/WebFrameImpl.h"
+#include "webkit/glue/webkit_glue.h"
#include "webkit/tools/test_shell/simple_resource_loader_bridge.h"
#include "webkit/tools/test_shell/test_shell_test.h"
-using WebKit::WebFrameImpl;
+using WebKit::WebCString;
+using WebKit::WebData;
+using WebKit::WebDocument;
+using WebKit::WebElement;
+using WebKit::WebFrame;
+using WebKit::WebNode;
+using WebKit::WebNodeCollection;
+using WebKit::WebNodeList;
+using WebKit::WebPageSerializer;
+using WebKit::WebPageSerializerClient;
+using WebKit::WebNode;
+using WebKit::WebString;
+using WebKit::WebURL;
+using WebKit::WebView;
+using WebKit::WebVector;
namespace {
+// Iterate recursively over sub-frames to find one with with a given url.
+WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) {
+ if (!web_view->mainFrame())
+ return NULL;
+
+ std::vector<WebFrame*> stack;
+ stack.push_back(web_view->mainFrame());
+
+ while (!stack.empty()) {
+ WebFrame* current_frame = stack.back();
+ stack.pop_back();
+ if (GURL(current_frame->url()) == url)
+ return current_frame;
+ WebNodeCollection all = current_frame->document().all();
+ for (WebNode node = all.firstItem();
+ !node.isNull(); node = all.nextItem()) {
+ if (!node.isElementNode())
+ continue;
+ // Check frame tag and iframe tag
+ WebElement element = node.toElement<WebElement>();
+ if (!element.hasTagName("frame") && !element.hasTagName("iframe"))
+ continue;
+ WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element);
+ if (sub_frame)
+ stack.push_back(sub_frame);
+ }
+ }
+ return NULL;
+}
+
class DomSerializerTests : public TestShellTest,
- public webkit_glue::DomSerializerDelegate {
+ public WebPageSerializerClient {
public:
DomSerializerTests()
: local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { }
// DomSerializerDelegate.
- void DidSerializeDataForFrame(const GURL& frame_url,
- const std::string& data, PageSavingSerializationStatus status) {
+ void didSerializeDataForFrame(const WebURL& frame_web_url,
+ const WebCString& data,
+ PageSerializationStatus status) {
+
+ GURL frame_url(frame_web_url);
// If the all frames are finished saving, check all finish status
- if (status == ALL_FRAMES_ARE_FINISHED) {
+ if (status == WebPageSerializerClient::AllFramesAreFinished) {
SerializationFinishStatusMap::iterator it =
serialization_finish_status_.begin();
for (; it != serialization_finish_status_.end(); ++it)
@@ -77,10 +111,10 @@ class DomSerializerTests : public TestShellTest,
ASSERT_FALSE(it->second);
// Add data to corresponding frame's content.
- serialized_frame_map_[frame_url.spec()] += data;
+ serialized_frame_map_[frame_url.spec()] += data.data();
// Current frame is completed saving, change the finish status.
- if (status == CURRENT_FRAME_IS_FINISHED)
+ if (status == WebPageSerializerClient::CurrentFrameIsFinished)
it->second = true;
}
@@ -106,27 +140,23 @@ class DomSerializerTests : public TestShellTest,
// the document.
void LoadContents(const std::string& contents,
const GURL& base_url,
- const WebCore::String encoding_info) {
+ const WebString encoding_info) {
test_shell_->ResetTestController();
// If input encoding is empty, use UTF-8 as default encoding.
if (encoding_info.isEmpty()) {
test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url);
} else {
+ WebData data(contents.data(), contents.length());
+
// Do not use WebFrame.LoadHTMLString because it assumes that input
// html contents use UTF-8 encoding.
// TODO(darin): This should use WebFrame::loadData.
- WebFrameImpl* web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ WebFrame* web_frame =
+ test_shell_->webView()->mainFrame();
+
ASSERT_TRUE(web_frame != NULL);
- int len = static_cast<int>(contents.size());
- RefPtr<WebCore::SharedBuffer> buf(
- WebCore::SharedBuffer::create(contents.data(), len));
-
- WebCore::SubstituteData subst_data(
- buf, WebCore::String("text/html"), encoding_info, WebCore::KURL());
- WebCore::ResourceRequest request(webkit_glue::GURLToKURL(base_url),
- WebCore::CString());
- web_frame->frame()->loader()->load(request, subst_data, false);
+
+ web_frame->loadData(data, "text/html", encoding_info, base_url);
}
test_shell_->WaitTestFinished();
@@ -137,20 +167,24 @@ class DomSerializerTests : public TestShellTest,
// sub-frames.
void SerializeDomForURL(const GURL& page_url,
bool recursive_serialization) {
- // Find corresponding WebFrameImpl according to page_url.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), page_url);
+ // Find corresponding WebFrame according to page_url.
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(),
+ page_url);
ASSERT_TRUE(web_frame != NULL);
// Add input file URl to links_.
- links_.push_back(page_url);
+ links_.assign(&page_url,1);
// Add dummy file path to local_path_.
- local_paths_.push_back(FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")));
+ WebString file_path = webkit_glue::FilePathStringToWebString(
+ FILE_PATH_LITERAL("c:\\dummy.htm"));
+ local_paths_.assign(&file_path, 1);
// Start serializing DOM.
- webkit_glue::DomSerializer dom_serializer(web_frame,
- recursive_serialization, this, links_, local_paths_,
- local_directory_name_);
- ASSERT_TRUE(dom_serializer.SerializeDom());
+ bool result = WebPageSerializer::serialize(web_frame,
+ recursive_serialization,
+ static_cast<WebPageSerializerClient*>(this),
+ links_,
+ local_paths_,
+ webkit_glue::FilePathToWebString(local_directory_name_));
+ ASSERT_TRUE(result);
ASSERT_TRUE(serialized_);
}
@@ -164,10 +198,10 @@ class DomSerializerTests : public TestShellTest,
// Flag indicates whether the process of serializing DOM is finished or not.
bool serialized_;
// The links_ contain dummy original URLs of all saved links.
- std::vector<GURL> links_;
+ WebVector<WebURL> links_;
// The local_paths_ contain dummy corresponding local file paths of all saved
// links, which matched links_ one by one.
- std::vector<FilePath> local_paths_;
+ WebVector<WebString> local_paths_;
// The local_directory_name_ is dummy relative path of directory which
// contain all saved auxiliary files included all sub frames and resources.
const FilePath local_directory_name_;
@@ -184,55 +218,61 @@ class DomSerializerTests : public TestShellTest,
}
};
+// Helper function that test whether the first node in the doc is a doc type
+// node.
+bool HasDocType(const WebDocument& doc) {
+ WebNode node = doc.firstChild();
+ if (node.isNull())
+ return false;
+ return node.nodeType() == WebNode::DocumentTypeNode;
+}
+
// Helper function for checking whether input node is META tag. Return true
// means it is META element, otherwise return false. The parameter charset_info
// return actual charset info if the META tag has charset declaration.
-bool IsMetaElement(const WebCore::Node* node, WebCore::String* charset_info) {
- if (!node->isHTMLElement())
+bool IsMetaElement(const WebNode& node, std::string& charset_info) {
+ if (!node.isElementNode())
return false;
- if (!(static_cast<const WebCore::HTMLElement*>(node))->hasTagName(
- WebCore::HTMLNames::metaTag))
+ const WebElement meta = node.toConstElement<WebElement>();
+ if (!meta.hasTagName("meta"))
return false;
- charset_info->remove(0, charset_info->length());
- const WebCore::HTMLMetaElement* meta =
- static_cast<const WebCore::HTMLMetaElement*>(node);
+ charset_info.erase(0, charset_info.length());
// Check the META charset declaration.
- WebCore::String equiv = meta->httpEquiv();
- if (equalIgnoringCase(equiv, "content-type")) {
- WebCore::String content = meta->content();
- int pos = content.find("charset", 0, false);
+ WebString httpEquiv = meta.getAttribute("http-equiv");
+ if (LowerCaseEqualsASCII(httpEquiv, "content-type")) {
+ std::string content = meta.getAttribute("content").utf8();
+ int pos = content.find("charset", 0);
if (pos > -1) {
// Add a dummy charset declaration to charset_info, which indicates this
// META tag has charset declaration although we do not get correct value
// yet.
- charset_info->append("has-charset-declaration");
+ charset_info.append("has-charset-declaration");
int remaining_length = content.length() - pos - 7;
if (!remaining_length)
return true;
- const UChar* start_pos = content.characters() + pos + 7;
+ int start_pos = pos + 7;
// Find "=" symbol.
while (remaining_length--)
- if (*start_pos++ == L'=')
+ if (content[start_pos++] == L'=')
break;
// Skip beginning space.
while (remaining_length) {
- if (*start_pos > 0x0020)
+ if (content[start_pos] > 0x0020)
break;
++start_pos;
--remaining_length;
}
if (!remaining_length)
return true;
- const UChar* end_pos = start_pos;
+ int end_pos = start_pos;
// Now we find out the start point of charset info. Search the end point.
while (remaining_length--) {
- if (*end_pos <= 0x0020 || *end_pos == L';')
+ if (content[end_pos] <= 0x0020 || content[end_pos] == L';')
break;
++end_pos;
}
// Get actual charset info.
- *charset_info = WebCore::String(start_pos,
- static_cast<unsigned>(end_pos - start_pos));
+ charset_info = content.substr(start_pos, end_pos - start_pos);
return true;
}
}
@@ -250,12 +290,10 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) {
// Load the test file.
LoadPageFromURL(file_url);
// Make sure original contents have document type.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->doctype() != NULL);
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(HasDocType(doc));
// Do serialization.
SerializeDomForURL(file_url, false);
// Load the serialized contents.
@@ -263,12 +301,11 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithDocType) {
const std::string& serialized_contents =
GetSerializedContentForFrame(file_url);
LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
+ web_frame->encoding());
// Make sure serialized contents still have document type.
- web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->doctype() != NULL);
+ web_frame = test_shell_->webView()->mainFrame();
+ doc = web_frame->document();
+ ASSERT_TRUE(HasDocType(doc));
}
// If original contents do not have document type, the serialized contents
@@ -282,12 +319,10 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) {
// Load the test file.
LoadPageFromURL(file_url);
// Make sure original contents do not have document type.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->doctype() == NULL);
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(!HasDocType(doc));
// Do serialization.
SerializeDomForURL(file_url, false);
// Load the serialized contents.
@@ -295,12 +330,11 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithoutDocType) {
const std::string& serialized_contents =
GetSerializedContentForFrame(file_url);
LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
+ web_frame->encoding());
// Make sure serialized contents do not have document type.
- web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->doctype() == NULL);
+ web_frame = test_shell_->webView()->mainFrame();
+ doc = web_frame->document();
+ ASSERT_TRUE(!HasDocType(doc));
}
// Serialize XML document which has all 5 built-in entities. After
@@ -340,7 +374,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithAddingMOTW) {
ASSERT_TRUE(file_url.SchemeIsFile());
// Make sure original contents does not have MOTW;
std::string motw_declaration =
- webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url);
+ WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
ASSERT_FALSE(motw_declaration.empty());
// The encoding of original contents is ISO-8859-1, so we convert the MOTW
// declaration to ASCII and search whether original contents has it or not.
@@ -373,21 +407,19 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
LoadPageFromURL(file_url);
// Make sure there is no META charset declaration in original document.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLHeadElement* head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement head_element = doc.head();
+ ASSERT_TRUE(!head_element.isNull());
// Go through all children of HEAD element.
- WebCore::String charset_info;
- for (const WebCore::Node *child = head_ele->firstChild(); child != NULL;
- child = child->nextSibling())
- if (IsMetaElement(child, &charset_info))
- ASSERT_TRUE(charset_info.isEmpty());
-
+ for (WebNode child = head_element.firstChild(); !child.isNull();
+ child = child.nextSibling()) {
+ std::string charset_info;
+ if (IsMetaElement(child, charset_info))
+ ASSERT_TRUE(charset_info.empty());
+ }
// Do serialization.
SerializeDomForURL(file_url, false);
@@ -396,28 +428,30 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
const std::string& serialized_contents =
GetSerializedContentForFrame(file_url);
LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
+ web_frame->encoding());
// Make sure the first child of HEAD element is META which has charset
// declaration in serialized contents.
- web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ web_frame = test_shell_->webView()->mainFrame();
ASSERT_TRUE(web_frame != NULL);
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
- WebCore::Node* meta_node = head_ele->firstChild();
- ASSERT_TRUE(meta_node != NULL);
+ doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ head_element = doc.head();
+ ASSERT_TRUE(!head_element.isNull());
+ WebNode meta_node = head_element.firstChild();
+ ASSERT_TRUE(!meta_node.isNull());
// Get meta charset info.
- ASSERT_TRUE(IsMetaElement(meta_node, &charset_info));
- ASSERT_TRUE(!charset_info.isEmpty());
- ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding());
+ std::string charset_info2;
+ ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
+ ASSERT_TRUE(!charset_info2.empty());
+ ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8()));
// Make sure no more additional META tags which have charset declaration.
- for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL;
- child = child->nextSibling())
- if (IsMetaElement(child, &charset_info))
- ASSERT_TRUE(charset_info.isEmpty());
+ for (WebNode child = meta_node.nextSibling(); !child.isNull();
+ child = child.nextSibling()) {
+ std::string charset_info;
+ if (IsMetaElement(child, charset_info))
+ ASSERT_TRUE(charset_info.empty());
+ }
}
// When serializing DOM, if the original document has multiple META charset
@@ -437,24 +471,22 @@ TEST_F(DomSerializerTests,
// Make sure there are multiple META charset declarations in original
// document.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLHeadElement* head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement head_ele = doc.head();
+ ASSERT_TRUE(!head_ele.isNull());
// Go through all children of HEAD element.
int charset_declaration_count = 0;
- WebCore::String charset_info;
- for (const WebCore::Node *child = head_ele->firstChild(); child != NULL;
- child = child->nextSibling()) {
- if (IsMetaElement(child, &charset_info) && !charset_info.isEmpty())
+ for (WebNode child = head_ele.firstChild(); !child.isNull();
+ child = child.nextSibling()) {
+ std::string charset_info;
+ if (IsMetaElement(child, charset_info) && !charset_info.empty())
charset_declaration_count++;
}
// The original doc has more than META tags which have charset declaration.
- ASSERT(charset_declaration_count > 1);
+ ASSERT_TRUE(charset_declaration_count > 1);
// Do serialization.
SerializeDomForURL(file_url, false);
@@ -464,28 +496,30 @@ TEST_F(DomSerializerTests,
const std::string& serialized_contents =
GetSerializedContentForFrame(file_url);
LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
+ web_frame->encoding());
// Make sure only first child of HEAD element is META which has charset
// declaration in serialized contents.
- web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ web_frame = test_shell_->webView()->mainFrame();
ASSERT_TRUE(web_frame != NULL);
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
- WebCore::Node* meta_node = head_ele->firstChild();
- ASSERT_TRUE(meta_node != NULL);
+ doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ head_ele = doc.head();
+ ASSERT_TRUE(!head_ele.isNull());
+ WebNode meta_node = head_ele.firstChild();
+ ASSERT_TRUE(!meta_node.isNull());
// Get meta charset info.
- ASSERT_TRUE(IsMetaElement(meta_node, &charset_info));
- ASSERT_TRUE(!charset_info.isEmpty());
- ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding());
+ std::string charset_info2;
+ ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
+ ASSERT_TRUE(!charset_info2.empty());
+ ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8()));
// Make sure no more additional META tags which have charset declaration.
- for (const WebCore::Node *child = meta_node->nextSibling(); child != NULL;
- child = child->nextSibling())
- if (IsMetaElement(child, &charset_info))
- ASSERT_TRUE(charset_info.isEmpty());
+ for (WebNode child = meta_node.nextSibling(); !child.isNull();
+ child = child.nextSibling()) {
+ std::string charset_info;
+ if (IsMetaElement(child, charset_info))
+ ASSERT_TRUE(charset_info.empty());
+ }
}
// Test situation of html entities in text when serializing HTML DOM.
@@ -501,20 +535,19 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) {
static const char* const original_contents =
"<HTML><BODY>&amp;&lt;&gt;\"\'</BODY></HTML>";
// Load the test contents.
- LoadContents(original_contents, file_url, "");
+ LoadContents(original_contents, file_url, WebString());
// Get BODY's text content in DOM.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLElement* body_ele = doc->body();
- ASSERT_TRUE(body_ele != NULL);
- WebCore::Node* text_node = body_ele->firstChild();
- ASSERT_TRUE(text_node->isTextNode());
- ASSERT_TRUE(createMarkup(text_node) == "&amp;&lt;&gt;\"\'");
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement body_ele = doc.body();
+ ASSERT_TRUE(!body_ele.isNull());
+ WebNode text_node = body_ele.firstChild();
+ ASSERT_TRUE(text_node.isTextNode());
+ ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) ==
+ "&amp;&lt;&gt;\"\'");
// Do serialization.
SerializeDomForURL(file_url, false);
// Compare the serialized contents with original contents.
@@ -526,22 +559,21 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInText) {
// Because we add MOTW when serializing DOM, so before comparison, we also
// need to add MOTW to original_contents.
std::string original_str =
- webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url);
+ WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
original_str += original_contents;
// Since WebCore now inserts a new HEAD element if there is no HEAD element
// when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.)
// We need to append the HEAD content and corresponding META content if we
// find WebCore-generated HEAD element.
- if (doc->head()) {
- WebCore::String encoding = web_frame->frame()->loader()->encoding();
+ if (!doc.head().isNull()) {
+ WebString encoding = web_frame->encoding();
std::string htmlTag("<HTML>");
std::string::size_type pos = original_str.find(htmlTag);
ASSERT_NE(std::string::npos, pos);
pos += htmlTag.length();
std::string head_part("<HEAD>");
- head_part += WideToASCII(
- webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration(
- webkit_glue::StringToStdWString(encoding)));
+ head_part +=
+ WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
head_part += "</HEAD>";
original_str.insert(pos, head_part);
}
@@ -562,19 +594,16 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) {
static const char* const original_contents =
"<HTML><BODY title=\"&amp;&lt;&gt;&quot;&#39;\"></BODY></HTML>";
// Load the test contents.
- LoadContents(original_contents, file_url, "");
+ LoadContents(original_contents, file_url, WebString());
// Get value of BODY's title attribute in DOM.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLElement* body_ele = doc->body();
- ASSERT_TRUE(body_ele != NULL);
- const WebCore::String& value = body_ele->getAttribute(
- WebCore::HTMLNames::titleAttr);
- ASSERT_TRUE(value == WebCore::String("&<>\"\'"));
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement body_ele = doc.body();
+ ASSERT_TRUE(!body_ele.isNull());
+ WebString value = body_ele.getAttribute("title");
+ ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'");
// Do serialization.
SerializeDomForURL(file_url, false);
// Compare the serialized contents with original contents.
@@ -584,18 +613,17 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEntitiesInAttributeValue) {
// Compare the serialized contents with original contents to make sure
// they are same.
std::string original_str =
- webkit_glue::DomSerializer::GenerateMarkOfTheWebDeclaration(file_url);
+ WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
original_str += original_contents;
- if (doc->head()) {
- WebCore::String encoding = web_frame->frame()->loader()->encoding();
+ if (!doc.isNull()) {
+ WebString encoding = web_frame->encoding();
std::string htmlTag("<HTML>");
std::string::size_type pos = original_str.find(htmlTag);
ASSERT_NE(std::string::npos, pos);
pos += htmlTag.length();
std::string head_part("<HEAD>");
- head_part += WideToASCII(
- webkit_glue::DomSerializer::GenerateMetaCharsetDeclaration(
- webkit_glue::StringToStdWString(encoding)));
+ head_part +=
+ WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
head_part += "</HEAD>";
original_str.insert(pos, head_part);
}
@@ -612,23 +640,20 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithNonStandardEntities) {
LoadPageFromURL(file_url);
// Get value of BODY's title attribute in DOM.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLElement* body_ele = doc->body();
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement body_element = doc.body();
// Unescaped string for "&percnt;&nsup;&supl;&apos;".
- static const UChar parsed_value[] = {
+ static const wchar_t parsed_value[] = {
'%', 0x2285, 0x00b9, '\'', 0
};
- const WebCore::String& value = body_ele->getAttribute(
- WebCore::HTMLNames::titleAttr);
- ASSERT_TRUE(value == WebCore::String(parsed_value));
+ WebString value = body_element.getAttribute("title");
+ ASSERT_TRUE(UTF16ToWide(value) == parsed_value);
// Check the BODY content.
- WebCore::Node* text_node = body_ele->firstChild();
- ASSERT_TRUE(text_node->isTextNode());
- ASSERT_TRUE(text_node->nodeValue() == WebCore::String(parsed_value));
+ WebNode text_node = body_element.firstChild();
+ ASSERT_TRUE(text_node.isTextNode());
+ ASSERT_TRUE(UTF16ToWide(text_node.nodeValue()) == parsed_value);
// Do serialization.
SerializeDomForURL(file_url, false);
@@ -667,34 +692,32 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) {
// Since for this test, we assume there is no savable sub-resource links for
// this test file, also all links are relative URLs in this test file, so we
// need to check those relative URLs and make sure document has BASE tag.
- WebFrameImpl* web_frame =
- webkit_glue::GetWebFrameImplFromWebViewForSpecificURL(
- test_shell_->webView(), file_url);
+ WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url);
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
// Go through all descent nodes.
- RefPtr<WebCore::HTMLAllCollection> all = doc->all();
+ WebNodeCollection all = doc.all();
int original_base_tag_count = 0;
- for (WebCore::Node* node = all->firstItem(); node != NULL;
- node = all->nextItem()) {
- if (!node->isHTMLElement())
+ for (WebNode node = all.firstItem(); !node.isNull();
+ node = all.nextItem()) {
+ if (!node.isElementNode())
continue;
- WebCore::Element* element = static_cast<WebCore::Element*>(node);
- if (element->hasTagName(WebCore::HTMLNames::baseTag)) {
+ WebElement element = node.toElement<WebElement>();
+ if (element.hasTagName("base")) {
original_base_tag_count++;
} else {
// Get link.
- const WebCore::AtomicString* value =
+ WebString value =
webkit_glue::GetSubResourceLinkFromElement(element);
- if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) {
- value = &element->getAttribute(WebCore::HTMLNames::hrefAttr);
- if (value->isEmpty())
- value = NULL;
+ if (value.isNull() && element.hasTagName("a")) {
+ value = element.getAttribute("href");
+ if (value.isEmpty())
+ value = WebString();
}
// Each link is relative link.
- if (value) {
- GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string())));
+ if (!value.isNull()) {
+ GURL link(value.utf8());
ASSERT_TRUE(link.scheme().empty());
}
}
@@ -702,8 +725,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) {
ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
// Make sure in original document, the base URL is not equal with the
// |path_dir_url|.
- GURL original_base_url(
- WideToUTF8(webkit_glue::StringToStdWString(doc->baseURL())));
+ GURL original_base_url(doc.baseURL());
ASSERT_NE(original_base_url, path_dir_url);
// Do serialization.
@@ -714,37 +736,37 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) {
const std::string& serialized_contents =
GetSerializedContentForFrame(file_url);
LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
+ web_frame->encoding());
// Make sure all links are absolute URLs and doc there are some number of
// BASE tags in serialized HTML data. Each of those BASE tags have same base
// URL which is as same as URL of current test file.
- web_frame = static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ web_frame = test_shell_->webView()->mainFrame();
ASSERT_TRUE(web_frame != NULL);
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
+ doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
// Go through all descent nodes.
- all = doc->all();
+ all = doc.all();
int new_base_tag_count = 0;
- for (WebCore::Node* node = all->firstItem(); node != NULL;
- node = all->nextItem()) {
- if (!node->isHTMLElement())
+ for (WebNode node = all.firstItem(); !node.isNull();
+ node = all.nextItem()) {
+ if (!node.isElementNode())
continue;
- WebCore::Element* element = static_cast<WebCore::Element*>(node);
- if (element->hasTagName(WebCore::HTMLNames::baseTag)) {
+ WebElement element = node.toElement<WebElement>();
+ if (element.hasTagName("base")) {
new_base_tag_count++;
} else {
// Get link.
- const WebCore::AtomicString* value =
+ WebString value =
webkit_glue::GetSubResourceLinkFromElement(element);
- if (!value && element->hasTagName(WebCore::HTMLNames::aTag)) {
- value = &element->getAttribute(WebCore::HTMLNames::hrefAttr);
- if (value->isEmpty())
- value = NULL;
+ if (value.isNull() && element.hasTagName("a")) {
+ value = element.getAttribute("href");
+ if (value.isEmpty())
+ value = WebString();
}
// Each link is absolute link.
- if (value) {
- GURL link(WideToUTF8(webkit_glue::StringToStdWString(value->string())));
+ if (!value.isNull()) {
+ GURL link(std::string(value.utf8()));
ASSERT_FALSE(link.scheme().empty());
}
}
@@ -752,8 +774,7 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithBaseTag) {
// We have one more added BASE tag which is generated by JavaScript.
ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1);
// Make sure in new document, the base URL is equal with the |path_dir_url|.
- GURL new_base_url(
- webkit_glue::StringToStdString(doc->baseURL()));
+ GURL new_base_url(doc.baseURL());
ASSERT_EQ(new_base_url, path_dir_url);
}
@@ -768,18 +789,17 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEmptyHead) {
// Load the test html content.
static const char* const empty_head_contents =
"<HTML><HEAD></HEAD><BODY>hello world</BODY></HTML>";
- LoadContents(empty_head_contents, file_url, "");
+ LoadContents(empty_head_contents, file_url, WebString());
// Make sure the head tag is empty.
- WebFrameImpl* web_frame =
- static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ WebFrame* web_frame = test_shell_->webView()->mainFrame();
ASSERT_TRUE(web_frame != NULL);
- WebCore::Document* doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- WebCore::HTMLHeadElement* head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
- WTF::PassRefPtr<WebCore::HTMLCollection> children = head_ele->children();
- ASSERT_TRUE(0 == children->length());
+ WebDocument doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ WebElement head_element = doc.head();
+ ASSERT_TRUE(!head_element.isNull());
+ ASSERT_TRUE(!head_element.hasChildNodes());
+ ASSERT_TRUE(head_element.childNodes().length() == 0);
// Do serialization.
SerializeDomForURL(file_url, false);
@@ -789,32 +809,31 @@ TEST_F(DomSerializerTests, SerialzeHTMLDOMWithEmptyHead) {
GetSerializedContentForFrame(file_url);
// Reload serialized contents and make sure there is only one META tag.
- LoadContents(serialized_contents, file_url,
- web_frame->frame()->loader()->encoding());
- web_frame = static_cast<WebFrameImpl*>(test_shell_->webView()->mainFrame());
+ LoadContents(serialized_contents, file_url, web_frame->encoding());
+ web_frame = test_shell_->webView()->mainFrame();
ASSERT_TRUE(web_frame != NULL);
- doc = web_frame->frame()->document();
- ASSERT_TRUE(doc->isHTMLDocument());
- head_ele = doc->head();
- ASSERT_TRUE(head_ele != NULL);
- children = head_ele->children();
- ASSERT_TRUE(1 == children->length());
- WebCore::Node* meta_node = head_ele->firstChild();
- ASSERT_TRUE(meta_node != NULL);
+ doc = web_frame->document();
+ ASSERT_TRUE(doc.isHTMLDocument());
+ head_element = doc.head();
+ ASSERT_TRUE(!head_element.isNull());
+ ASSERT_TRUE(head_element.hasChildNodes());
+ ASSERT_TRUE(head_element.childNodes().length() == 1);
+ WebNode meta_node = head_element.firstChild();
+ ASSERT_TRUE(!meta_node.isNull());
// Get meta charset info.
- WebCore::String charset_info;
- ASSERT_TRUE(IsMetaElement(meta_node, &charset_info));
- ASSERT_TRUE(!charset_info.isEmpty());
- ASSERT_TRUE(charset_info == web_frame->frame()->loader()->encoding());
+ std::string charset_info;
+ ASSERT_TRUE(IsMetaElement(meta_node, charset_info));
+ ASSERT_TRUE(!charset_info.empty());
+ ASSERT_TRUE(charset_info == std::string(web_frame->encoding().utf8()));
// Check the body's first node is text node and its contents are
// "hello world"
- WebCore::HTMLElement* body_ele = doc->body();
- ASSERT_TRUE(body_ele != NULL);
- WebCore::Node* text_node = body_ele->firstChild();
- ASSERT_TRUE(text_node->isTextNode());
- const WebCore::String& text_node_contents = text_node->nodeValue();
- ASSERT_TRUE(text_node_contents == WebCore::String("hello world"));
+ WebElement body_element = doc.body();
+ ASSERT_TRUE(!body_element.isNull());
+ WebNode text_node = body_element.firstChild();
+ ASSERT_TRUE(text_node.isTextNode());
+ WebString text_node_contents = text_node.nodeValue();
+ ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world");
}
} // namespace
diff --git a/webkit/glue/entity_map.cc b/webkit/glue/entity_map.cc
deleted file mode 100644
index 26f094f..0000000
--- a/webkit/glue/entity_map.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "config.h"
-
-#undef LOG
-
-#include "webkit/glue/entity_map.h"
-
-#include "base/hash_tables.h"
-#include "base/string_util.h"
-
-namespace webkit_glue {
-
-// Note that this file is also included by HTMLTokenizer.cpp so we are getting
-// two copies of the data in memory. We can fix this by changing the script
-// that generated the array to create a static const that is its length, but
-// this is low priority since the data is less than 4K.
-#include "HTMLEntityNames.c"
-
-typedef base::hash_map<char16, const char*> EntityMapType;
-
-class EntityMapData {
- public:
- EntityMapData(const Entity* entity_codes, int entity_codes_length,
- bool standard_html_entities)
- : entity_codes_(entity_codes),
- entity_codes_length_(entity_codes_length),
- standard_html_entities_(standard_html_entities),
- map_(NULL) {
- }
- ~EntityMapData() { delete map_; }
- const EntityMapType* GetEntityMapData();
-
- private:
- // Data structure which saves all pairs of Unicode character and its
- // corresponding entity notation.
- const Entity* entity_codes_;
- const int entity_codes_length_;
- // &apos;, &percnt;, &nsup; and &supl; are not defined by the HTML standards.
- // - IE does not support &apos; as an HTML entity (but support it as an XML
- // entity.)
- // - Firefox supports &apos; as an HTML entity.
- // - Both of IE and Firefox don't support &percnt;, &nsup; and &supl;.
- //
- // A web page saved by Chromium should be able to be read by other browsers
- // such as IE and Firefox. Chromium should produce only the standard entity
- // references which other browsers can recognize.
- // So if standard_html_entities_ is true, we will use a numeric character
- // reference for &apos;, and don't use entity references for &percnt;, &nsup;
- // and &supl; for serialization.
- const bool standard_html_entities_;
- // Map the Unicode character to corresponding entity notation.
- EntityMapType* map_;
-
- DISALLOW_EVIL_CONSTRUCTORS(EntityMapData);
-};
-
-const EntityMapType* EntityMapData::GetEntityMapData() {
- if (!map_) {
- // lazily create the entity map.
- map_ = new EntityMapType;
- const Entity* entity_code = &entity_codes_[0];
- for (int i = 0; i < entity_codes_length_; ++i, ++entity_code) {
- // For consistency, use lower case for entity codes that have both.
- EntityMapType::const_iterator it = map_->find(entity_code->code);
- if (it != map_->end() &&
- StringToLowerASCII(std::string(entity_code->name)) == it->second)
- continue;
- if (!standard_html_entities_ ||
- // Don't register &percnt;, &nsup; and &supl;.
- (entity_code->code != '%' &&
- entity_code->code != 0x2285 && entity_code->code != 0x00b9))
- (*map_)[entity_code->code] = entity_code->name;
- }
- if (standard_html_entities_)
- (*map_)[0x0027] = "#39";
- }
- return map_;
-}
-
-static const Entity xml_built_in_entity_codes[] = {
- {"lt", 0x003c},
- {"gt", 0x003e},
- {"amp", 0x0026},
- {"apos", 0x0027},
- {"quot", 0x0022}
-};
-
-const char* EntityMap::GetEntityNameByCode(char16 code, bool is_html) {
- static EntityMapData html_entity_map_singleton(
- wordlist, sizeof(wordlist) / sizeof(Entity), true);
- static EntityMapData xml_entity_map_singleton(
- xml_built_in_entity_codes, arraysize(xml_built_in_entity_codes), false);
-
- const EntityMapType* entity_map;
- if (is_html)
- entity_map = html_entity_map_singleton.GetEntityMapData();
- else
- entity_map = xml_entity_map_singleton.GetEntityMapData();
-
- // Check entity name according to unicode.
- EntityMapType::const_iterator i = entity_map->find(code);
- if (i == entity_map->end())
- // Not found, return NULL.
- return NULL;
- else
- // Found, return entity notation.
- return i->second;
-}
-
-} // namespace webkit_glue
diff --git a/webkit/glue/entity_map.h b/webkit/glue/entity_map.h
deleted file mode 100644
index 1b7e28b..0000000
--- a/webkit/glue/entity_map.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef WEBKIT_GLUE_ENTITY_MAP_H__
-#define WEBKIT_GLUE_ENTITY_MAP_H__
-
-#include <string>
-
-#include "base/basictypes.h"
-#include "base/string16.h"
-
-namespace webkit_glue {
-
-class EntityMap {
- public:
- // Check whether specified unicode has corresponding html or xml built-in
- // entity name. If yes, return the entity notation, if not then return NULL.
- // Parameter is_html indicates check the code in html entity map or in xml
- // entity map. THIS FUNCTION IS NOT THREADSAFE.
- static const char* GetEntityNameByCode(char16 code, bool is_html);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(EntityMap);
-};
-
-} // namespace webkit_glue
-
-#endif // WEBKIT_GLUE_ENTITY_MAP_H__