summaryrefslogtreecommitdiffstats
path: root/webkit/glue/dom_serializer.h
blob: fc6d0ef66e061460f3a920ab8f8626495c4719dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef WEBKIT_GLUE_DOM_SERIALIZER_H__
#define WEBKIT_GLUE_DOM_SERIALIZER_H__

#include <string>

#include "base/file_path.h"
#include "base/hash_tables.h"
#include "googleurl/src/gurl.h"

class WebFrame;
class WebFrameImpl;

namespace WebCore {
class Document;
class Element;
class Node;
class String;
class TextEncoding;
}

namespace webkit_glue {

class DomSerializerDelegate;

// Get html data by serializing all frames of current page with lists
// which contain all resource links that have local copy.
// contain all saved auxiliary files included all sub frames and resources.
// This function will find out all frames and serialize them to HTML data.
// We have a data buffer to temporary saving generated html data. We will
// sequentially call WebViewDelegate::SendSerializedHtmlData once the data
// buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData
// for getting more information.
class DomSerializer {
 public:
  // Do serialization action. Return false means no available frame has been
  // serialized, otherwise return true.
  bool SerializeDom();
  // The parameter specifies which frame need to be serialized.
  // The parameter recursive_serialization specifies whether we need to
  // serialize all sub frames of the specified frame or not.
  // The parameter delegate specifies the pointer of interface
  // DomSerializerDelegate provide sink interface which can receive the
  // individual chunks of data to be saved.
  // The parameter links contain original URLs of all saved links.
  // The parameter local_paths contain corresponding local file paths of all
  // saved links, which matched with vector:links one by one.
  // The parameter local_directory_name is relative path of directory which
  // contain all saved auxiliary files included all sub frames and resources.
  DomSerializer(WebFrame* webframe,
                bool recursive_serialization,
                DomSerializerDelegate* delegate,
                const std::vector<GURL>& links,
                const std::vector<FilePath>& local_paths,
                const FilePath& local_directory_name);

  // Generate the META for charset declaration.
  static std::wstring GenerateMetaCharsetDeclaration(
      const std::wstring& charset);
  // Generate the MOTW declaration.
  static std::string GenerateMarkOfTheWebDeclaration(const GURL& url);
  // Generate the default base tag declaration.
  static std::wstring GenerateBaseTagDeclaration(
      const std::wstring& base_target);

 private:
  // Specified frame which need to be serialized;
  WebFrameImpl* specified_webframeimpl_;
  // This hash_map is used to map resource URL of original link to its local
  // file path.
  typedef base::hash_map<std::string, FilePath> LinkLocalPathMap;
  // local_links_ include all pair of local resource path and corresponding
  // original link.
  LinkLocalPathMap local_links_;
  // Pointer of DomSerializerDelegate
  DomSerializerDelegate* delegate_;
  // Data buffer for saving result of serialized DOM data.
  std::string data_buffer_;
  // Passing true to recursive_serialization_ indicates we will serialize not
  // only the specified frame but also all sub-frames in the specific frame.
  // Otherwise we only serialize the specified frame excluded all sub-frames.
  bool recursive_serialization_;
  // Flag indicates whether we have collected all frames which need to be
  // serialized or not;
  bool frames_collected_;
  // Local directory name of all local resource files.
  const FilePath& local_directory_name_;
  // Vector for saving all frames which need to be serialized.
  std::vector<WebFrameImpl*> frames_;

  struct SerializeDomParam {
    // Frame URL of current processing document presented by GURL
    const GURL& current_frame_gurl;
    // Current using text encoding object.
    const WebCore::TextEncoding& text_encoding;

    // Document object of current frame.
    WebCore::Document* doc;
    // Local directory name of all local resource files.
    const FilePath& directory_name;

    // Flag indicates current doc is html document or not. It's a cache value
    // of Document.isHTMLDocument().
    bool is_html_document;
    // Flag which indicate whether we have met document type declaration.
    bool has_doctype;
    // Flag which indicate whether will process meta issue.
    bool has_checked_meta;
    // This meta element need to be skipped when serializing DOM.
    const WebCore::Element* skip_meta_element;
    // Flag indicates we are in script or style tag.
    bool is_in_script_or_style_tag;
    // Flag indicates whether we have written xml document declaration.
    // It is only used in xml document
    bool has_doc_declaration;
    // Flag indicates whether we have added additional contents before end tag.
    // This flag will be re-assigned in each call of function
    // PostActionAfterSerializeOpenTag and it could be changed in function
    // PreActionBeforeSerializeEndTag if the function adds new contents into
    // serialization stream.
    bool has_added_contents_before_end;

    // Constructor.
    SerializeDomParam(
        const GURL& current_frame_gurl,
        const WebCore::TextEncoding& text_encoding,
        WebCore::Document* doc,
        const FilePath& directory_name);

   private:
    DISALLOW_EVIL_CONSTRUCTORS(SerializeDomParam);
  };

  // Collect all target frames which need to be serialized.
  void CollectTargetFrames();
  // Before we begin serializing open tag of a element, we give the target
  // element a chance to do some work prior to add some additional data.
  WebCore::String PreActionBeforeSerializeOpenTag(
      const WebCore::Element* element,
      SerializeDomParam* param,
      bool* need_skip);
  // After we finish serializing open tag of a element, we give the target
  // element a chance to do some post work to add some additional data.
  WebCore::String PostActionAfterSerializeOpenTag(
      const WebCore::Element* element,
      SerializeDomParam* param);
  // Before we begin serializing end tag of a element, we give the target
  // element a chance to do some work prior to add some additional data.
  WebCore::String PreActionBeforeSerializeEndTag(
      const WebCore::Element* element,
      SerializeDomParam* param, bool* need_skip);
  // After we finish serializing end tag of a element, we give the target
  // element a chance to do some post work to add some additional data.
  WebCore::String PostActionAfterSerializeEndTag(
      const WebCore::Element* element,
      SerializeDomParam* param);
  // Save generated html content to data buffer.
  void SaveHtmlContentToBuffer(const WebCore::String& result,
                               SerializeDomParam* param);
  // Serialize open tag of an specified element.
  void OpenTagToString(const WebCore::Element* element,
                       SerializeDomParam* param);
  // Serialize end tag of an specified element.
  void EndTagToString(const WebCore::Element* element,
                      SerializeDomParam* param);
  // Build content for a specified node
  void BuildContentForNode(const WebCore::Node* node,
                           SerializeDomParam* param);

  DISALLOW_EVIL_CONSTRUCTORS(DomSerializer);
};

}  // namespace webkit_glue

#endif  // WEBKIT_GLUE_DOM_SERIALIZER_H__