summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/browser/download/save_page_browsertest.cc5
-rw-r--r--content/browser/download/mhtml_generation_manager.cc98
-rw-r--r--content/browser/download/mhtml_generation_manager.h11
-rw-r--r--content/browser/frame_host/render_frame_host_impl.cc8
-rw-r--r--content/browser/frame_host/render_frame_host_impl.h6
-rw-r--r--content/common/frame_messages.h72
-rw-r--r--content/renderer/render_frame_impl.cc86
-rw-r--r--content/renderer/render_frame_impl.h10
-rw-r--r--third_party/WebKit/Source/core/page/PageSerializer.cpp32
-rw-r--r--third_party/WebKit/Source/core/page/PageSerializer.h28
-rw-r--r--third_party/WebKit/Source/web/WebPageSerializer.cpp55
-rw-r--r--third_party/WebKit/Source/web/tests/PageSerializerTest.cpp8
-rw-r--r--third_party/WebKit/public/web/WebPageSerializer.h26
13 files changed, 288 insertions, 157 deletions
diff --git a/chrome/browser/download/save_page_browsertest.cc b/chrome/browser/download/save_page_browsertest.cc
index a7e0a54..fbbe0e3 100644
--- a/chrome/browser/download/save_page_browsertest.cc
+++ b/chrome/browser/download/save_page_browsertest.cc
@@ -908,10 +908,7 @@ IN_PROC_BROWSER_TEST_F(SavePageSitePerProcessBrowserTest, SaveAsMHTML) {
count++;
pos++;
}
- // TODO(lukasza): Need to dedupe savable resources (i.e. 1.png) across frames.
- // This will be fixed by crrev.com/1417323006.
- // EXPECT_EQ(1, count)
- // << "Verify number of image/png parts in the mhtml output";
+ EXPECT_EQ(1, count) << "Verify number of image/png parts in the mhtml output";
}
// Test suite that verifies that the frame tree "looks" the same before
diff --git a/content/browser/download/mhtml_generation_manager.cc b/content/browser/download/mhtml_generation_manager.cc
index 9937db7..c1cad61 100644
--- a/content/browser/download/mhtml_generation_manager.cc
+++ b/content/browser/download/mhtml_generation_manager.cc
@@ -18,12 +18,14 @@
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringprintf.h"
#include "content/browser/frame_host/frame_tree_node.h"
+#include "content/browser/frame_host/render_frame_host_impl.h"
#include "content/common/frame_messages.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/render_process_host_observer.h"
#include "content/public/browser/web_contents.h"
+#include "url/gurl.h"
namespace content {
@@ -38,14 +40,25 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
GenerateMHTMLCallback callback() const { return callback_; }
+ // Handler for FrameHostMsg_SerializeAsMHTMLResponse (a notification from the
+ // renderer that the MHTML generation for previous frame has finished).
+ // Returns |true| upon success; |false| otherwise.
+ bool OnSerializeAsMHTMLResponse(
+ RenderFrameHostImpl* sender,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources);
+
// Sends IPC to the renderer, asking for MHTML generation of the next frame.
//
// Returns true if the message was sent successfully; false otherwise.
bool SendToNextRenderFrame();
// Indicates if more calls to SendToNextRenderFrame are needed.
- bool HasMoreFramesToProcess() const {
- return !pending_frame_tree_node_ids_.empty();
+ bool IsDone() const {
+ bool waiting_for_response_from_renderer =
+ frame_tree_node_id_of_busy_frame_ !=
+ FrameTreeNode::kFrameTreeNodeInvalidId;
+ bool no_more_requests_to_send = pending_frame_tree_node_ids_.empty();
+ return !waiting_for_response_from_renderer && no_more_requests_to_send;
}
// Close the file on the file thread and respond back on the UI thread with
@@ -73,12 +86,17 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
// See also MHTMLGenerationManager::id_to_job_ map.
int job_id_;
+ // The IDs of frames that still need to be processed.
+ std::queue<int> pending_frame_tree_node_ids_;
+
+ // Identifies a frame to which we've sent FrameMsg_SerializeAsMHTML but for
+ // which we didn't yet process FrameHostMsg_SerializeAsMHTMLResponse via
+ // OnSerializeAsMHTMLResponse.
+ int frame_tree_node_id_of_busy_frame_;
+
// The handle to the file the MHTML is saved to for the browser process.
base::File browser_file_;
- // The IDs of frames we still need to process.
- std::queue<int> pending_frame_tree_node_ids_;
-
// Map from frames into content ids (see WebPageSerializer::generateMHTMLParts
// for more details about what "content ids" are and how they are used).
std::map<int, std::string> frame_tree_node_to_content_id_;
@@ -86,6 +104,10 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
// MIME multipart boundary to use in the MHTML doc.
std::string mhtml_boundary_marker_;
+ // Digests of URIs of already generated MHTML parts.
+ std::set<std::string> digests_of_already_serialized_uris_;
+ std::string salt_;
+
// The callback to call once generation is complete.
GenerateMHTMLCallback callback_;
@@ -100,7 +122,9 @@ MHTMLGenerationManager::Job::Job(int job_id,
WebContents* web_contents,
GenerateMHTMLCallback callback)
: job_id_(job_id),
+ frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId),
mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()),
+ salt_(base::GenerateGUID()),
callback_(callback),
observed_renderer_process_host_(this) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
@@ -144,9 +168,13 @@ bool MHTMLGenerationManager::Job::SendToNextRenderFrame() {
DCHECK(browser_file_.IsValid());
DCHECK_LT(0u, pending_frame_tree_node_ids_.size());
+ FrameMsg_SerializeAsMHTML_Params ipc_params;
+ ipc_params.job_id = job_id_;
+ ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_;
+
int frame_tree_node_id = pending_frame_tree_node_ids_.front();
pending_frame_tree_node_ids_.pop();
- bool is_last_frame = pending_frame_tree_node_ids_.empty();
+ ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty();
FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
if (!ftn) // The contents went away.
@@ -157,12 +185,21 @@ bool MHTMLGenerationManager::Job::SendToNextRenderFrame() {
observed_renderer_process_host_.RemoveAll();
observed_renderer_process_host_.Add(rfh->GetProcess());
- IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess(
+ // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
+ ipc_params.salt = salt_;
+ ipc_params.digests_of_uris_to_skip = digests_of_already_serialized_uris_;
+
+ ipc_params.destination_file = IPC::GetFileHandleForProcess(
browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(),
false); // |close_source_handle|.
- rfh->Send(new FrameMsg_SerializeAsMHTML(
- rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_,
- CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame));
+ ipc_params.frame_routing_id_to_content_id =
+ CreateFrameRoutingIdToContentId(rfh->GetSiteInstance());
+
+ // Send the IPC asking the renderer to serialize the frame.
+ DCHECK_EQ(FrameTreeNode::kFrameTreeNodeInvalidId,
+ frame_tree_node_id_of_busy_frame_);
+ frame_tree_node_id_of_busy_frame_ = frame_tree_node_id;
+ rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params));
return true;
}
@@ -207,6 +244,31 @@ void MHTMLGenerationManager::Job::CloseFile(
callback);
}
+bool MHTMLGenerationManager::Job::OnSerializeAsMHTMLResponse(
+ RenderFrameHostImpl* sender,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources) {
+ // Sanitize renderer input / reject unexpected messages.
+ int sender_id = sender->frame_tree_node()->frame_tree_node_id();
+ if (sender_id != frame_tree_node_id_of_busy_frame_) {
+ NOTREACHED();
+ return false; // Report failure.
+ }
+ frame_tree_node_id_of_busy_frame_ = FrameTreeNode::kFrameTreeNodeInvalidId;
+
+ // Renderer should be deduping resources with the same uris.
+ DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>(
+ digests_of_already_serialized_uris_,
+ digests_of_uris_of_serialized_resources).size());
+ digests_of_already_serialized_uris_.insert(
+ digests_of_uris_of_serialized_resources.begin(),
+ digests_of_uris_of_serialized_resources.end());
+
+ if (pending_frame_tree_node_ids_.empty())
+ return true; // Report success.
+
+ return SendToNextRenderFrame();
+}
+
// static
int64_t MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) {
DCHECK_CURRENTLY_ON(BrowserThread::FILE);
@@ -260,9 +322,11 @@ void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents,
job_id));
}
-void MHTMLGenerationManager::OnSavedFrameAsMHTML(
+void MHTMLGenerationManager::OnSerializeAsMHTMLResponse(
+ RenderFrameHostImpl* sender,
int job_id,
- bool mhtml_generation_in_renderer_succeeded) {
+ bool mhtml_generation_in_renderer_succeeded,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!mhtml_generation_in_renderer_succeeded) {
@@ -274,14 +338,14 @@ void MHTMLGenerationManager::OnSavedFrameAsMHTML(
if (!job)
return;
- if (job->HasMoreFramesToProcess()) {
- if (!job->SendToNextRenderFrame()) {
- JobFinished(job_id, JobStatus::FAILURE);
- }
+ if (!job->OnSerializeAsMHTMLResponse(
+ sender, digests_of_uris_of_serialized_resources)) {
+ JobFinished(job_id, JobStatus::FAILURE);
return;
}
- JobFinished(job_id, JobStatus::SUCCESS);
+ if (job->IsDone())
+ JobFinished(job_id, JobStatus::SUCCESS);
}
// static
diff --git a/content/browser/download/mhtml_generation_manager.h b/content/browser/download/mhtml_generation_manager.h
index e697e13..b418e54 100644
--- a/content/browser/download/mhtml_generation_manager.h
+++ b/content/browser/download/mhtml_generation_manager.h
@@ -8,12 +8,15 @@
#include <stdint.h>
#include <map>
+#include <set>
+#include <string>
#include "base/files/file.h"
#include "base/macros.h"
#include "base/memory/singleton.h"
#include "base/process/process.h"
#include "ipc/ipc_platform_file.h"
+#include "url/gurl.h"
namespace base {
class FilePath;
@@ -21,6 +24,7 @@ class FilePath;
namespace content {
+class RenderFrameHostImpl;
class WebContents;
// The class and all of its members live on the UI thread. Only static methods
@@ -42,8 +46,11 @@ class MHTMLGenerationManager {
// Handler for FrameHostMsg_SerializeAsMHTMLResponse (a notification from the
// renderer that the MHTML generation finished for a single frame).
- void OnSavedFrameAsMHTML(int job_id,
- bool mhtml_generation_in_renderer_succeeded);
+ void OnSerializeAsMHTMLResponse(
+ RenderFrameHostImpl* sender,
+ int job_id,
+ bool mhtml_generation_in_renderer_succeeded,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources);
private:
friend struct base::DefaultSingletonTraits<MHTMLGenerationManager>;
diff --git a/content/browser/frame_host/render_frame_host_impl.cc b/content/browser/frame_host/render_frame_host_impl.cc
index a74c597..1834494 100644
--- a/content/browser/frame_host/render_frame_host_impl.cc
+++ b/content/browser/frame_host/render_frame_host_impl.cc
@@ -1714,8 +1714,12 @@ void RenderFrameHostImpl::OnDidChangeLoadProgress(double load_progress) {
frame_tree_node_->DidChangeLoadProgress(load_progress);
}
-void RenderFrameHostImpl::OnSerializeAsMHTMLResponse(int job_id, bool success) {
- MHTMLGenerationManager::GetInstance()->OnSavedFrameAsMHTML(job_id, success);
+void RenderFrameHostImpl::OnSerializeAsMHTMLResponse(
+ int job_id,
+ bool success,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources) {
+ MHTMLGenerationManager::GetInstance()->OnSerializeAsMHTMLResponse(
+ this, job_id, success, digests_of_uris_of_serialized_resources);
}
#if defined(OS_MACOSX) || defined(OS_ANDROID)
diff --git a/content/browser/frame_host/render_frame_host_impl.h b/content/browser/frame_host/render_frame_host_impl.h
index cc190ac..07f78db 100644
--- a/content/browser/frame_host/render_frame_host_impl.h
+++ b/content/browser/frame_host/render_frame_host_impl.h
@@ -9,6 +9,7 @@
#include <stdint.h>
#include <map>
+#include <set>
#include <string>
#include <vector>
@@ -620,7 +621,10 @@ class CONTENT_EXPORT RenderFrameHostImpl
void OnDidStartLoading(bool to_different_document);
void OnDidStopLoading();
void OnDidChangeLoadProgress(double load_progress);
- void OnSerializeAsMHTMLResponse(int job_id, bool success);
+ void OnSerializeAsMHTMLResponse(
+ int job_id,
+ bool success,
+ const std::set<std::string>& digests_of_uris_of_serialized_resources);
#if defined(OS_MACOSX) || defined(OS_ANDROID)
void OnShowPopup(const FrameHostMsg_ShowPopup_Params& params);
diff --git a/content/common/frame_messages.h b/content/common/frame_messages.h
index 561e0e9..59f3284 100644
--- a/content/common/frame_messages.h
+++ b/content/common/frame_messages.h
@@ -8,6 +8,11 @@
#include <stddef.h>
#include <stdint.h>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
#include "build/build_config.h"
#include "cc/surfaces/surface_id.h"
#include "cc/surfaces/surface_sequence.h"
@@ -453,6 +458,38 @@ IPC_STRUCT_TRAITS_BEGIN(content::SavableSubframe)
IPC_STRUCT_TRAITS_MEMBER(routing_id)
IPC_STRUCT_TRAITS_END()
+IPC_STRUCT_BEGIN(FrameMsg_SerializeAsMHTML_Params)
+ // Job id - used to match responses to requests.
+ IPC_STRUCT_MEMBER(int, job_id)
+
+ // Destination file handle.
+ IPC_STRUCT_MEMBER(IPC::PlatformFileForTransit, destination_file)
+
+ // MHTML boundary marker / MIME multipart boundary maker. The same
+ // |mhtml_boundary_marker| should be used for serialization of each frame.
+ IPC_STRUCT_MEMBER(std::string, mhtml_boundary_marker)
+
+ // Frame to content-id map.
+ // Keys are routing ids of either RenderFrames or RenderFrameProxies.
+ // Values are MHTML content-ids - see WebPageSerializer::generateMHTMLParts.
+ IPC_STRUCT_MEMBER(FrameMsg_SerializeAsMHTML_FrameRoutingIdToContentIdMap,
+ frame_routing_id_to_content_id)
+
+ // |digests_of_uris_to_skip| contains digests of uris of MHTML parts that
+ // should be skipped. This helps deduplicate mhtml parts across frames.
+ // SECURITY NOTE: Sha256 digests (rather than uris) are used to prevent
+ // disclosing uris to other renderer processes; the digests should be
+ // generated using SHA256HashString function from crypto/sha2.h and hashing
+ // |salt + url.spec()|.
+ IPC_STRUCT_MEMBER(std::set<std::string>, digests_of_uris_to_skip)
+
+ // Salt used for |digests_of_uris_to_skip|.
+ IPC_STRUCT_MEMBER(std::string, salt)
+
+ // If |is_last_frame| is true, then an MHTML footer will be generated.
+ IPC_STRUCT_MEMBER(bool, is_last_frame)
+IPC_STRUCT_END()
+
#if defined(OS_MACOSX) || defined(OS_ANDROID)
// This message is used for supporting popup menus on Mac OS X and Android using
// native controls. See the FrameHostMsg_ShowPopup message.
@@ -752,28 +789,11 @@ IPC_MESSAGE_ROUTED1(FrameMsg_GetSerializedHtmlWithLocalLinks,
FrameMsg_GetSerializedHtmlWithLocalLinks_Map)
// Serialize target frame and its resources into MHTML and write it into the
-// provided destination file handle.
-//
-// When starting generation of a new MHTML document, one needs to start by
-// sending FrameMsg_SerializeAsMHTML for the *main* frame (main frame needs to
-// be the first part in the MHTML document + main frame will trigger generation
-// of the MHTML header).
-//
-// The same |mhtml_boundary_marker| should be used for serialization of each
-// frame (this string will be used as a mime multipart boundary within the mhtml
-// document).
-//
-// For more details about frame to content id map please see
-// WebPageSerializer::generateMHTMLParts method.
-//
-// |is_last_frame| controls whether the serializer in the renderer will
-// emit the MHTML footer.
-IPC_MESSAGE_ROUTED5(FrameMsg_SerializeAsMHTML,
- int /* job_id (used to match responses to requests) */,
- IPC::PlatformFileForTransit /* destination file handle */,
- std::string /* mhtml boundary marker */,
- FrameMsg_SerializeAsMHTML_FrameRoutingIdToContentIdMap,
- bool /* is last frame */)
+// provided destination file handle. Note that when serializing multiple
+// frames, one needs to serialize the *main* frame first (the main frame
+// needs to go first according to RFC2557 + the main frame will trigger
+// generation of the MHTML header).
+IPC_MESSAGE_ROUTED1(FrameMsg_SerializeAsMHTML, FrameMsg_SerializeAsMHTML_Params)
IPC_MESSAGE_ROUTED1(FrameMsg_SetFrameOwnerProperties,
blink::WebFrameOwnerProperties /* frame_owner_properties */)
@@ -1319,9 +1339,11 @@ IPC_MESSAGE_ROUTED2(FrameHostMsg_SerializedHtmlWithLocalLinksResponse,
bool /* end of data? */)
// Response to FrameMsg_SerializeAsMHTML.
-IPC_MESSAGE_ROUTED2(FrameHostMsg_SerializeAsMHTMLResponse,
- int /* job_id (used to match responses to requests) */,
- bool /* true if success, false if error */)
+IPC_MESSAGE_ROUTED3(
+ FrameHostMsg_SerializeAsMHTMLResponse,
+ int /* job_id (used to match responses to requests) */,
+ bool /* true if success, false if error */,
+ std::set<std::string> /* digests of uris of serialized resources */)
// Sent when the renderer updates hint for importance of a tab.
IPC_MESSAGE_ROUTED1(FrameHostMsg_UpdatePageImportanceSignals,
diff --git a/content/renderer/render_frame_impl.cc b/content/renderer/render_frame_impl.cc
index 180be85..a6d24f2 100644
--- a/content/renderer/render_frame_impl.cc
+++ b/content/renderer/render_frame_impl.cc
@@ -17,10 +17,12 @@
#include "base/files/file.h"
#include "base/i18n/char_iterator.h"
#include "base/logging.h"
+#include "base/macros.h"
#include "base/memory/shared_memory.h"
#include "base/memory/weak_ptr.h"
#include "base/metrics/histogram.h"
#include "base/process/process.h"
+#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "base/thread_task_runner_handle.h"
@@ -127,6 +129,7 @@
#include "content/renderer/web_frame_utils.h"
#include "content/renderer/web_ui_extension.h"
#include "content/renderer/websharedworker_proxy.h"
+#include "crypto/sha2.h"
#include "gin/modules/module_registry.h"
#include "media/audio/audio_output_device.h"
#include "media/base/audio_renderer_mixer_input.h"
@@ -580,6 +583,55 @@ WebString ConvertRelativePathToHtmlAttribute(const base::FilePath& path) {
path.NormalizePathSeparatorsTo(FILE_PATH_LITERAL('/')).AsUTF8Unsafe());
}
+// Implementation of WebPageSerializer::MHTMLPartsGenerationDelegate that
+// 1. Bases shouldSkipResource and getContentID responses on contents of
+// FrameMsg_SerializeAsMHTML_Params.
+// 2. Stores digests of urls of serialized resources (i.e. urls reported via
+// shouldSkipResource) into |digests_of_uris_of_serialized_resources| passed
+// to the constructor.
+class MHTMLPartsGenerationDelegate
+ : public WebPageSerializer::MHTMLPartsGenerationDelegate {
+ public:
+ MHTMLPartsGenerationDelegate(
+ const FrameMsg_SerializeAsMHTML_Params& params,
+ std::set<std::string>* digests_of_uris_of_serialized_resources)
+ : params_(params),
+ digests_of_uris_of_serialized_resources_(
+ digests_of_uris_of_serialized_resources) {
+ DCHECK(digests_of_uris_of_serialized_resources_);
+ }
+
+ bool shouldSkipResource(const WebURL& url) override {
+ std::string digest =
+ crypto::SHA256HashString(params_.salt + GURL(url).spec());
+
+ // Skip if the |url| already covered by serialization of an *earlier* frame.
+ if (ContainsKey(params_.digests_of_uris_to_skip, digest))
+ return true;
+
+ // Let's record |url| as being serialized for the *current* frame.
+ auto pair = digests_of_uris_of_serialized_resources_->insert(digest);
+ bool insertion_took_place = pair.second;
+ DCHECK(insertion_took_place); // Blink should dedupe within a frame.
+
+ return false;
+ }
+
+ WebString getContentID(const WebFrame& frame) override {
+ int routing_id = GetRoutingIdForFrameOrProxy(const_cast<WebFrame*>(&frame));
+ auto it = params_.frame_routing_id_to_content_id.find(routing_id);
+ DCHECK(it != params_.frame_routing_id_to_content_id.end());
+ const std::string& content_id = it->second;
+ return WebString::fromUTF8(content_id);
+ }
+
+ private:
+ const FrameMsg_SerializeAsMHTML_Params& params_;
+ std::set<std::string>* digests_of_uris_of_serialized_resources_;
+
+ DISALLOW_COPY_AND_ASSIGN(MHTMLPartsGenerationDelegate);
+};
+
bool IsContentWithCertificateErrorsRelevantToUI(
const blink::WebURL& url,
const blink::WebCString& security_info,
@@ -4766,28 +4818,18 @@ void RenderFrameImpl::OnGetSerializedHtmlWithLocalLinks(
}
void RenderFrameImpl::OnSerializeAsMHTML(
- int job_id,
- IPC::PlatformFileForTransit file_for_transit,
- const std::string& std_mhtml_boundary,
- const std::map<int, std::string>& frame_routing_id_to_content_id,
- bool is_last_frame) {
+ const FrameMsg_SerializeAsMHTML_Params& params) {
// Unpack IPC payload.
- base::File file = IPC::PlatformFileForTransitToFile(file_for_transit);
- const WebString mhtml_boundary = WebString::fromUTF8(std_mhtml_boundary);
+ base::File file = IPC::PlatformFileForTransitToFile(params.destination_file);
+ const WebString mhtml_boundary =
+ WebString::fromUTF8(params.mhtml_boundary_marker);
DCHECK(!mhtml_boundary.isEmpty());
- std::vector<std::pair<WebFrame*, WebString>> web_frame_to_content_id;
- for (const auto& it : frame_routing_id_to_content_id) {
- const std::string& content_id = it.second;
- WebFrame* web_frame = GetWebFrameFromRoutingIdForFrameOrProxy(it.first);
- if (!web_frame)
- continue;
-
- web_frame_to_content_id.push_back(
- std::make_pair(web_frame, WebString::fromUTF8(content_id)));
- }
WebData data;
bool success = true;
+ std::set<std::string> digests_of_uris_of_serialized_resources;
+ MHTMLPartsGenerationDelegate delegate(
+ params, &digests_of_uris_of_serialized_resources);
// Generate MHTML header if needed.
if (IsMainFrame()) {
@@ -4800,8 +4842,8 @@ void RenderFrameImpl::OnSerializeAsMHTML(
// Generate MHTML parts.
if (success) {
- data = WebPageSerializer::generateMHTMLParts(
- mhtml_boundary, GetWebFrame(), false, web_frame_to_content_id);
+ data = WebPageSerializer::generateMHTMLParts(mhtml_boundary, GetWebFrame(),
+ false, &delegate);
// TODO(jcivelli): write the chunks in deferred tasks to give a chance to
// the message loop to process other events.
if (file.WriteAtCurrentPos(data.data(), data.size()) < 0) {
@@ -4810,7 +4852,7 @@ void RenderFrameImpl::OnSerializeAsMHTML(
}
// Generate MHTML footer if needed.
- if (success && is_last_frame) {
+ if (success && params.is_last_frame) {
data = WebPageSerializer::generateMHTMLFooter(mhtml_boundary);
if (file.WriteAtCurrentPos(data.data(), data.size()) < 0) {
success = false;
@@ -4819,7 +4861,9 @@ void RenderFrameImpl::OnSerializeAsMHTML(
// Cleanup and notify the browser process about completion.
file.Close(); // Need to flush file contents before sending IPC response.
- Send(new FrameHostMsg_SerializeAsMHTMLResponse(routing_id_, job_id, success));
+ Send(new FrameHostMsg_SerializeAsMHTMLResponse(
+ routing_id_, params.job_id, success,
+ digests_of_uris_of_serialized_resources));
}
void RenderFrameImpl::OpenURL(const GURL& url,
diff --git a/content/renderer/render_frame_impl.h b/content/renderer/render_frame_impl.h
index 76e2925..7efc773 100644
--- a/content/renderer/render_frame_impl.h
+++ b/content/renderer/render_frame_impl.h
@@ -48,6 +48,7 @@
#include "third_party/WebKit/public/web/WebPageSerializerClient.h"
#include "third_party/WebKit/public/web/WebScriptExecutionCallback.h"
#include "ui/gfx/range/range.h"
+#include "url/gurl.h"
#if defined(ENABLE_PLUGINS)
#include "content/renderer/pepper/plugin_power_saver_helper.h"
@@ -61,10 +62,10 @@
#include "media/mojo/interfaces/service_factory.mojom.h"
#endif
-class GURL;
class TransportDIB;
struct FrameMsg_NewFrame_WidgetParams;
struct FrameMsg_PostMessage_Params;
+struct FrameMsg_SerializeAsMHTML_Params;
struct FrameMsg_TextTrackSettings_Params;
namespace blink {
@@ -790,12 +791,7 @@ class CONTENT_EXPORT RenderFrameImpl
void OnGetSavableResourceLinks();
void OnGetSerializedHtmlWithLocalLinks(
const std::map<GURL, base::FilePath>& url_to_local_path);
- void OnSerializeAsMHTML(
- int job_id,
- IPC::PlatformFileForTransit file_for_transit,
- const std::string& mhtml_boundary_marker,
- const std::map<int, std::string>& frame_routing_id_to_content_id,
- bool is_last_frame);
+ void OnSerializeAsMHTML(const FrameMsg_SerializeAsMHTML_Params& params);
// Requests that the browser process navigates to |url|. If
// |is_history_navigation_in_new_child| is true, the browser process should
diff --git a/third_party/WebKit/Source/core/page/PageSerializer.cpp b/third_party/WebKit/Source/core/page/PageSerializer.cpp
index d2bfd21..a3fe2fe 100644
--- a/third_party/WebKit/Source/core/page/PageSerializer.cpp
+++ b/third_party/WebKit/Source/core/page/PageSerializer.cpp
@@ -84,7 +84,7 @@ static bool shouldIgnoreElement(const Element& element)
class SerializerMarkupAccumulator : public MarkupAccumulator {
STACK_ALLOCATED();
public:
- SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&);
+ SerializerMarkupAccumulator(PageSerializer::Delegate&, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&);
~SerializerMarkupAccumulator() override;
protected:
@@ -103,7 +103,7 @@ private:
const String& attributeName,
const String& attributeValue);
- PageSerializer* m_serializer;
+ PageSerializer::Delegate& m_delegate;
RawPtrWillBeMember<const Document> m_document;
// FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
@@ -116,9 +116,9 @@ private:
WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewrittenLinks;
};
-SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes)
+SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer::Delegate& delegate, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes)
: MarkupAccumulator(ResolveAllURLs)
- , m_serializer(serializer)
+ , m_delegate(delegate)
, m_document(&document)
, m_nodes(nodes)
{
@@ -137,11 +137,7 @@ void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text)
bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribute)
{
- PageSerializer::Delegate* delegate = m_serializer->delegate();
- if (delegate)
- return delegate->shouldIgnoreAttribute(attribute);
-
- return MarkupAccumulator::shouldIgnoreAttribute(attribute);
+ return m_delegate.shouldIgnoreAttribute(attribute);
}
void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
@@ -176,9 +172,8 @@ void SerializerMarkupAccumulator::appendAttribute(
&& attribute.name() == HTMLNames::srcdocAttr;
if (isLinkAttribute || isSrcDocAttribute) {
// Check if the delegate wants to do link rewriting for the element.
- PageSerializer::Delegate* delegate = m_serializer->delegate();
String newLinkForTheElement;
- if (delegate && delegate->rewriteLink(element, newLinkForTheElement)) {
+ if (m_delegate.rewriteLink(element, newLinkForTheElement)) {
if (isLinkAttribute) {
// Rewrite element links.
appendRewrittenAttribute(
@@ -247,7 +242,7 @@ void SerializerMarkupAccumulator::appendRewrittenAttribute(
PageSerializer::PageSerializer(
Vector<SerializedResource>& resources,
- Delegate* delegate)
+ Delegate& delegate)
: m_resources(&resources)
, m_delegate(delegate)
{
@@ -267,7 +262,7 @@ void PageSerializer::serializeFrame(const LocalFrame& frame)
}
WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
- SerializerMarkupAccumulator accumulator(this, document, serializedNodes);
+ SerializerMarkupAccumulator accumulator(m_delegate, document, serializedNodes);
String text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode);
CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencodables);
@@ -302,7 +297,6 @@ void PageSerializer::serializeFrame(const LocalFrame& frame)
if (CSSStyleSheet* sheet = linkElement.sheet()) {
KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
serializeCSSStyleSheet(*sheet, url);
- ASSERT(m_resourceURLs.contains(url));
}
} else if (isHTMLStyleElement(element)) {
HTMLStyleElement& styleElement = toHTMLStyleElement(element);
@@ -332,7 +326,7 @@ void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KUR
serializeCSSRule(rule);
}
- if (url.isValid() && !m_resourceURLs.contains(url)) {
+ if (shouldAddURL(url)) {
WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
ASSERT(textEncoding.isValid());
String textString = cssText.toString();
@@ -392,7 +386,8 @@ void PageSerializer::serializeCSSRule(CSSRule* rule)
bool PageSerializer::shouldAddURL(const KURL& url)
{
- return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData();
+ return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData()
+ && !m_delegate.shouldSkipResource(url);
}
void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
@@ -469,11 +464,6 @@ void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document&
}
}
-PageSerializer::Delegate* PageSerializer::delegate()
-{
- return m_delegate;
-}
-
// Returns MOTW (Mark of the Web) declaration before html tag which is in
// HTML comment, e.g. "<!-- saved from url=(%04d)%s -->"
// See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
diff --git a/third_party/WebKit/Source/core/page/PageSerializer.h b/third_party/WebKit/Source/core/page/PageSerializer.h
index bd99ede..0de4675 100644
--- a/third_party/WebKit/Source/core/page/PageSerializer.h
+++ b/third_party/WebKit/Source/core/page/PageSerializer.h
@@ -68,7 +68,11 @@ class CORE_EXPORT PageSerializer final {
public:
class Delegate {
public:
- virtual bool shouldIgnoreAttribute(const Attribute&) = 0;
+ // Controls whether HTML serialization should skip the given attribute.
+ virtual bool shouldIgnoreAttribute(const Attribute&)
+ {
+ return false;
+ }
// Method allowing the Delegate control which URLs are written into the
// generated html document.
@@ -79,14 +83,24 @@ public:
// (i.e. in place of img.src or iframe.src or object.data).
//
// If no link rewriting is desired, this method should return false.
- virtual bool rewriteLink(const Element&, String& rewrittenLink) = 0;
+ virtual bool rewriteLink(const Element&, String& rewrittenLink)
+ {
+ return false;
+ }
+
+ // Tells whether to skip serialization of a subresource with a given URI.
+ // Used to deduplicate resources across multiple frames.
+ virtual bool shouldSkipResource(const KURL&)
+ {
+ return false;
+ }
};
// Constructs a serializer that will write output to the given vector of
- // SerializedResources and use the optional Delegate for controlling some
- // serialization aspects. Callers need to ensure that the Delegate stays
+ // SerializedResources and uses the Delegate for controlling some
+ // serialization aspects. Callers need to ensure that both arguments stay
// alive until the PageSerializer gets destroyed.
- PageSerializer(Vector<SerializedResource>&, Delegate*);
+ PageSerializer(Vector<SerializedResource>&, Delegate&);
// Initiates the serialization of the frame. All serialized content and
// retrieved resources are added to the Vector passed to the constructor.
@@ -94,8 +108,6 @@ public:
// Subsequent resources are images, css, etc.
void serializeFrame(const LocalFrame&);
- Delegate* delegate();
-
static String markOfTheWebDeclaration(const KURL&);
private:
@@ -118,7 +130,7 @@ private:
Vector<SerializedResource>* m_resources;
ListHashSet<KURL> m_resourceURLs;
- Delegate* m_delegate;
+ Delegate& m_delegate;
};
} // namespace blink
diff --git a/third_party/WebKit/Source/web/WebPageSerializer.cpp b/third_party/WebKit/Source/web/WebPageSerializer.cpp
index afb89bc..bb9d48b 100644
--- a/third_party/WebKit/Source/web/WebPageSerializer.cpp
+++ b/third_party/WebKit/Source/web/WebPageSerializer.cpp
@@ -69,28 +69,21 @@ namespace blink {
namespace {
-using ContentIDMap = WillBeHeapHashMap<RawPtrWillBeMember<Frame>, String>;
-
-class MHTMLPageSerializerDelegate final :
- public NoBaseWillBeGarbageCollected<MHTMLPageSerializerDelegate>,
- public PageSerializer::Delegate {
+class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate);
public:
- MHTMLPageSerializerDelegate(const ContentIDMap& frameToContentID);
+ explicit MHTMLPageSerializerDelegate(WebPageSerializer::MHTMLPartsGenerationDelegate&);
bool shouldIgnoreAttribute(const Attribute&) override;
bool rewriteLink(const Element&, String& rewrittenLink) override;
-
-#if ENABLE(OILPAN)
- void trace(Visitor* visitor) { visitor->trace(m_frameToContentID); }
-#endif
+ bool shouldSkipResource(const KURL&) override;
private:
- const ContentIDMap& m_frameToContentID;
+ WebPageSerializer::MHTMLPartsGenerationDelegate& m_webDelegate;
};
MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
- const ContentIDMap& frameToContentID)
- : m_frameToContentID(frameToContentID)
+ WebPageSerializer::MHTMLPartsGenerationDelegate& webDelegate)
+ : m_webDelegate(webDelegate)
{
}
@@ -114,7 +107,8 @@ bool MHTMLPageSerializerDelegate::rewriteLink(
if (!frame)
return false;
- KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID.get(frame));
+ WebString contentID = m_webDelegate.getContentID(*WebFrame::fromFrame(frame));
+ KURL cidURI = MHTMLParser::convertContentIDToURI(contentID);
ASSERT(cidURI.isValid());
if (isHTMLFrameElementBase(&element)) {
@@ -135,20 +129,9 @@ bool MHTMLPageSerializerDelegate::rewriteLink(
return false;
}
-ContentIDMap createFrameToContentIDMap(
- const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID)
+bool MHTMLPageSerializerDelegate::shouldSkipResource(const KURL& url)
{
- ContentIDMap result;
- for (const auto& it : webFrameToContentID) {
- WebFrame* webFrame = it.first;
- const WebString& webContentID = it.second;
-
- Frame* frame = webFrame->toImplBase()->frame();
- String contentID(webContentID);
-
- result.add(frame, contentID);
- }
- return result;
+ return m_webDelegate.shouldSkipResource(url);
}
} // namespace
@@ -167,28 +150,34 @@ WebData WebPageSerializer::generateMHTMLHeader(
WebData WebPageSerializer::generateMHTMLParts(
const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding,
- const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID)
+ MHTMLPartsGenerationDelegate* webDelegate)
{
+ ASSERT(webFrame);
+ ASSERT(webDelegate);
+
// Translate arguments from public to internal blink APIs.
LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame();
MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding
? MHTMLArchive::EncodingPolicy::UseBinaryEncoding
: MHTMLArchive::EncodingPolicy::UseDefaultEncoding;
- ContentIDMap frameToContentID = createFrameToContentIDMap(webFrameToContentID);
// Serialize.
Vector<SerializedResource> resources;
- MHTMLPageSerializerDelegate delegate(frameToContentID);
- PageSerializer serializer(resources, &delegate);
+ MHTMLPageSerializerDelegate coreDelegate(*webDelegate);
+ PageSerializer serializer(resources, coreDelegate);
serializer.serializeFrame(*frame);
+ // Get Content-ID for the frame being serialized.
+ String frameContentID = webDelegate->getContentID(*webFrame);
+ ASSERT(!frameContentID.isEmpty());
+
// Encode serializer's output as MHTML.
RefPtr<SharedBuffer> output = SharedBuffer::create();
bool isFirstResource = true;
for (const SerializedResource& resource : resources) {
// Frame is the 1st resource (see PageSerializer::serializeFrame doc
- // comment). Frames need a Content-ID header.
- String contentID = isFirstResource ? frameToContentID.get(frame) : String();
+ // comment). Frames get a Content-ID header.
+ String contentID = isFirstResource ? frameContentID : String();
MHTMLArchive::generateMHTMLPart(
boundary, contentID, encodingPolicy, resource, *output);
diff --git a/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp b/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
index a52501c..f333ef5 100644
--- a/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
+++ b/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
@@ -119,7 +119,7 @@ protected:
void serialize(const char* url)
{
FrameTestHelpers::loadFrame(m_helper.webView()->mainFrame(), KURL(m_baseUrl, url).string().utf8().data());
- PageSerializer serializer(m_resources, this);
+ PageSerializer serializer(m_resources, *this);
Frame* frame = m_helper.webViewImpl()->mainFrameImpl()->frame();
for (; frame; frame = frame->tree().traverseNext()) {
// This is safe, because tests do not do cross-site navigation
@@ -168,12 +168,6 @@ private:
}
// PageSerializer::Delegate implementation.
- bool shouldIgnoreAttribute(const Attribute&) override
- {
- return false;
- }
-
- // PageSerializer::Delegate implementation.
bool rewriteLink(const Element& element, String& rewrittenLink)
{
String completeURL;
diff --git a/third_party/WebKit/public/web/WebPageSerializer.h b/third_party/WebKit/public/web/WebPageSerializer.h
index 7a59b6a..44d5f94 100644
--- a/third_party/WebKit/public/web/WebPageSerializer.h
+++ b/third_party/WebKit/public/web/WebPageSerializer.h
@@ -63,21 +63,29 @@ public:
BLINK_EXPORT static WebData generateMHTMLHeader(
const WebString& boundary, WebLocalFrame*);
- // Generates and returns MHTML parts for the given frame and all the
+ // Delegate for controling the behavior of generateMHTMLParts method.
+ class MHTMLPartsGenerationDelegate {
+ public:
+ // Tells whether to skip serialization of a subresource with a given URI.
+ // Used to deduplicate resources across multiple frames.
+ virtual bool shouldSkipResource(const WebURL&) = 0;
+
+ // Returns a Content-ID to be used for the given frame.
+ // See rfc2557 - section 8.3 - "Use of the Content-ID header and CID URLs".
+ // Format note - the returned string should be of the form "<foo@bar.com>"
+ // (i.e. the strings should include the angle brackets).
+ virtual WebString getContentID(const WebFrame&) = 0;
+ };
+
+ // Generates and returns MHTML parts for the given frame and the
// savable resources underneath.
//
// Same |boundary| needs to used for all generateMHTMLHeader and
// generateMHTMLParts and generateMHTMLFooter calls that belong to the same
// MHTML document (see also rfc1341, section 7.2.1, "boundary" description).
- //
- // |frameToContentID| is used for 1) emitting cid: scheme uri links for
- // subframes and 2) emitting MIME Content-ID headers.
- // See rfc2557 - section 8.3 - "Use of the Content-ID header and CID URLs".
- // Format note - |frameToContentID| should contain strings of the form
- // "<foo@bar.com>" (i.e. the strings should include the angle brackets).
BLINK_EXPORT static WebData generateMHTMLParts(
const WebString& boundary, WebLocalFrame*, bool useBinaryEncoding,
- const WebVector<std::pair<WebFrame*, WebString>>& frameToContentID);
+ MHTMLPartsGenerationDelegate*);
// Generates and returns an MHTML footer.
//
@@ -92,7 +100,7 @@ public:
// This function will serialize the specified frame to HTML data.
// We have a data buffer to temporary saving generated html data. We will
- // sequentially call WebPageSeriazlierClient once the data buffer is full.
+ // sequentially call WebPageSerializerClient once the data buffer is full.
//
// Return false means if no data has been serialized (i.e. because
// the target frame didn't have a valid url).