diff options
author | bengr@chromium.org <bengr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-13 22:05:30 +0000 |
---|---|---|
committer | bengr@chromium.org <bengr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-13 22:05:30 +0000 |
commit | c0f759b1863f24bbcdd5679623a33380874cbb69 (patch) | |
tree | a089cc82b6174d38da1c4d982fd684167da887c9 /components | |
parent | f55913a8897cee5df58696c7ba52a10aeded377d (diff) | |
download | chromium_src-c0f759b1863f24bbcdd5679623a33380874cbb69.zip chromium_src-c0f759b1863f24bbcdd5679623a33380874cbb69.tar.gz chromium_src-c0f759b1863f24bbcdd5679623a33380874cbb69.tar.bz2 |
Page distiller core
This change adds code to load a page in a WebContents, execute JavaScript in that context, parse the result of executing the JS, fetch images referenced in the result, and store the images and other details of the result in a proto.
BUG=288015, 283846
Review URL: https://codereview.chromium.org/26358008
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@234921 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components')
18 files changed, 920 insertions, 24 deletions
diff --git a/components/components_tests.gyp b/components/components_tests.gyp index eaa08b7..21319a2 100644 --- a/components/components_tests.gyp +++ b/components/components_tests.gyp @@ -23,6 +23,7 @@ 'autofill/core/common/password_form_fill_data_unittest.cc', 'browser_context_keyed_service/browser_context_dependency_manager_unittest.cc', 'browser_context_keyed_service/dependency_graph_unittest.cc', + 'dom_distiller/core/distiller_unittest.cc', 'dom_distiller/core/distiller_url_fetcher_unittest.cc', 'dom_distiller/core/dom_distiller_database_unittest.cc', 'dom_distiller/core/dom_distiller_model_unittest.cc', @@ -222,5 +223,74 @@ }], ], }], + ['OS!="ios"', { + 'targets': [ + { + 'target_name': 'components_browsertests', + 'type': '<(gtest_target_type)', + 'defines!': ['CONTENT_IMPLEMENTATION'], + 'dependencies': [ + '../content/content_shell_and_tests.gyp:content_browser_test_support', + '../content/content_shell_and_tests.gyp:test_support_content', + '../skia/skia.gyp:skia', + '../testing/gtest.gyp:gtest', + 'components.gyp:dom_distiller_content', + 'components.gyp:dom_distiller_core', + ], + 'include_dirs': [ + '..', + ], + 'defines': [ + 'HAS_OUT_OF_PROC_TEST_RUNNER', + ], + 'sources': [ + '../content/test/content_test_launcher.cc', + 'dom_distiller/content/distiller_page_web_contents_browsertest.cc', + ], + 'conditions': [ + ['OS=="win"', { + 'resource_include_dirs': [ + '<(SHARED_INTERMEDIATE_DIR)/webkit', + ], + 'sources': [ + '../content/shell/app/resource.h', + '../content/shell/app/shell.rc', + # TODO: It would be nice to have these pulled in + # automatically from direct_dependent_settings in + # their various targets (net.gyp:net_resources, etc.), + # but that causes errors in other targets when + # resulting .res files get referenced multiple times. + '<(SHARED_INTERMEDIATE_DIR)/net/net_resources.rc', + '<(SHARED_INTERMEDIATE_DIR)/webkit/blink_resources.rc', + '<(SHARED_INTERMEDIATE_DIR)/webkit/webkit_strings_en-US.rc', + ], + 'dependencies': [ + '<(DEPTH)/net/net.gyp:net_resources', + '<(DEPTH)/third_party/iaccessible2/iaccessible2.gyp:iaccessible2', + '<(DEPTH)/third_party/isimpledom/isimpledom.gyp:isimpledom', + '<(DEPTH)/webkit/webkit_resources.gyp:webkit_strings', + '<(DEPTH)/webkit/webkit_resources.gyp:webkit_resources', + ], + 'configurations': { + 'Debug_Base': { + 'msvs_settings': { + 'VCLinkerTool': { + 'LinkIncremental': '<(msvs_large_module_debug_link_mode)', + }, + }, + }, + }, + # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. + 'msvs_disabled_warnings': [ 4267, ], + }], + ['OS=="win" and win_use_allocator_shim==1', { + 'dependencies': [ + '../base/allocator/allocator.gyp:allocator', + ], + }], + ], + }, + ], + }], ], } diff --git a/components/dom_distiller.gypi b/components/dom_distiller.gypi index abece3f..74a9b1d 100644 --- a/components/dom_distiller.gypi +++ b/components/dom_distiller.gypi @@ -32,12 +32,15 @@ 'type': 'static_library', 'dependencies': [ 'dom_distiller_core', + '../skia/skia.gyp:skia', '../sync/sync.gyp:sync', ], 'include_dirs': [ '..', ], 'sources': [ + 'dom_distiller/content/distiller_page_web_contents.h', + 'dom_distiller/content/distiller_page_web_contents.cc', 'dom_distiller/content/dom_distiller_service_factory.h', 'dom_distiller/content/dom_distiller_service_factory.cc', ], @@ -64,8 +67,9 @@ 'type': 'static_library', 'dependencies': [ 'distilled_page_proto', + 'dom_distiller_resources', '../base/base.gyp:base', - #'../skia/skia.gyp:skia', + '../skia/skia.gyp:skia', '../sync/sync.gyp:sync', '../third_party/protobuf/protobuf.gyp:protobuf_lite', '../third_party/leveldatabase/leveldatabase.gyp:leveldatabase', @@ -76,6 +80,10 @@ 'sources': [ 'dom_distiller/core/article_entry.cc', 'dom_distiller/core/article_entry.h', + 'dom_distiller/core/distiller.cc', + 'dom_distiller/core/distiller.h', + 'dom_distiller/core/distiller_page.cc', + 'dom_distiller/core/distiller_page.h', 'dom_distiller/core/distiller_url_fetcher.cc', 'dom_distiller/core/distiller_url_fetcher.h', 'dom_distiller/core/dom_distiller_constants.cc', diff --git a/components/dom_distiller/DEPS b/components/dom_distiller/DEPS index 99f902a..1c12a78 100644 --- a/components/dom_distiller/DEPS +++ b/components/dom_distiller/DEPS @@ -5,6 +5,9 @@ include_rules = [ "+third_party/leveldatabase/src/include", "+net/http", "+net/url_request", + "+ui/base/android", + "+ui/base/resource", + "+ui/gfx/android", # The dom distiller is a layered component; subdirectories must explicitly # introduce the ability to use the content layer as appropriate. diff --git a/components/dom_distiller/content/DEPS b/components/dom_distiller/content/DEPS index f3ade60..6be787a 100644 --- a/components/dom_distiller/content/DEPS +++ b/components/dom_distiller/content/DEPS @@ -1,4 +1,7 @@ include_rules = [ "+components/browser_context_keyed_service", - "+content/public/browser", + "+content/public", + "+content/shell", + "+content/test", + "+net/test", ] diff --git a/components/dom_distiller/content/distiller_page_web_contents.cc b/components/dom_distiller/content/distiller_page_web_contents.cc new file mode 100644 index 0000000..0138d37 --- /dev/null +++ b/components/dom_distiller/content/distiller_page_web_contents.cc @@ -0,0 +1,78 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/dom_distiller/content/distiller_page_web_contents.h" + +#include "base/callback.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/utf_string_conversions.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "content/public/browser/browser_context.h" +#include "content/public/browser/navigation_controller.h" +#include "content/public/browser/render_view_host.h" +#include "content/public/browser/web_contents.h" +#include "content/public/browser/web_contents_observer.h" +#include "url/gurl.h" + +namespace dom_distiller { + +scoped_ptr<DistillerPage> DistillerPageWebContentsFactory::CreateDistillerPage( + DistillerPage::Delegate* delegate) const { + DCHECK(browser_context_); + return scoped_ptr<DistillerPage>( + new DistillerPageWebContents(delegate, browser_context_)); +} + +DistillerPageWebContents::DistillerPageWebContents( + DistillerPage::Delegate* delegate, + content::BrowserContext* browser_context) + : DistillerPage(delegate), + browser_context_(browser_context) {} + +DistillerPageWebContents::~DistillerPageWebContents() { +} + +void DistillerPageWebContents::InitImpl() { + DCHECK(browser_context_); + web_contents_.reset( + content::WebContents::Create( + content::WebContents::CreateParams(browser_context_))); +} + +void DistillerPageWebContents::LoadURLImpl(const GURL& gurl) { + DCHECK(web_contents_.get()); + content::WebContentsObserver::Observe(web_contents_.get()); + content::NavigationController::LoadURLParams params(gurl); + web_contents_->GetController().LoadURLWithParams(params); +} + +void DistillerPageWebContents::ExecuteJavaScriptImpl( + const std::string& script) { + content::RenderViewHost* host = web_contents_->GetRenderViewHost(); + DCHECK(host); + host->ExecuteJavascriptInWebFrameCallbackResult( + string16(), // frame_xpath + UTF8ToUTF16(script), + base::Bind(&DistillerPage::OnExecuteJavaScriptDone, + base::Unretained(this))); +} + +void DistillerPageWebContents::DidFinishLoad(int64 frame_id, + const GURL& validated_url, + bool is_main_frame, + RenderViewHost* render_view_host) { + content::WebContentsObserver::Observe(NULL); + OnLoadURLDone(); +} +void DistillerPageWebContents::DidFailLoad(int64 frame_id, + const GURL& validated_url, + bool is_main_frame, + int error_code, + const string16& error_description, + RenderViewHost* render_view_host) { + content::WebContentsObserver::Observe(NULL); + OnLoadURLFailed(); +} + +} // namespace dom_distiller diff --git a/components/dom_distiller/content/distiller_page_web_contents.h b/components/dom_distiller/content/distiller_page_web_contents.h new file mode 100644 index 0000000..65694ff --- /dev/null +++ b/components/dom_distiller/content/distiller_page_web_contents.h @@ -0,0 +1,71 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLER_PAGE_WEB_CONTENTS_H_ +#define COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLER_PAGE_WEB_CONTENTS_H_ + +#include "base/memory/scoped_ptr.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "content/public/browser/web_contents.h" +#include "content/public/browser/web_contents_observer.h" +#include "url/gurl.h" + +namespace content { +class RenderViewHost; +} + +using content::RenderViewHost; + +namespace dom_distiller { + +class DistillerContext; + +class DistillerPageWebContentsFactory : public DistillerPageFactory { + public: + DistillerPageWebContentsFactory(content::BrowserContext* browser_context) + : DistillerPageFactory(), browser_context_(browser_context) {} + virtual ~DistillerPageWebContentsFactory() {} + + virtual scoped_ptr<DistillerPage> CreateDistillerPage( + DistillerPage::Delegate* delegate) const OVERRIDE; + + private: + content::BrowserContext* browser_context_; +}; + + +class DistillerPageWebContents : public DistillerPage, + public content::WebContentsObserver { + public: + DistillerPageWebContents(DistillerPage::Delegate* delegate, + content::BrowserContext* browser_context); + virtual ~DistillerPageWebContents(); + + // content::WebContentsObserver implementation. + virtual void DidFinishLoad(int64 frame_id, + const GURL& validated_url, + bool is_main_frame, + RenderViewHost* render_view_host) OVERRIDE; + + virtual void DidFailLoad(int64 frame_id, + const GURL& validated_url, + bool is_main_frame, + int error_code, + const string16& error_description, + RenderViewHost* render_view_host) OVERRIDE; + + protected: + virtual void InitImpl() OVERRIDE; + virtual void LoadURLImpl(const GURL& gurl) OVERRIDE; + virtual void ExecuteJavaScriptImpl(const std::string& script) OVERRIDE; + + private: + scoped_ptr<content::WebContents> web_contents_; + content::BrowserContext* browser_context_; + DISALLOW_COPY_AND_ASSIGN(DistillerPageWebContents); +}; + +} // namespace dom_distiller + +#endif // COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLER_PAGE_WEB_CONTENTS_H_ diff --git a/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc new file mode 100644 index 0000000..13f6a49 --- /dev/null +++ b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc @@ -0,0 +1,86 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/run_loop.h" +#include "base/values.h" +#include "components/dom_distiller/content/distiller_page_web_contents.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "content/public/browser/browser_context.h" +#include "content/shell/browser/shell.h" +#include "content/test/content_browser_test.h" +#include "net/test/embedded_test_server/embedded_test_server.h" + +using content::ContentBrowserTest; + +namespace { + // TODO(bengr): Once JavaScript has landed to extract article content from + // a loaded page, test the interaction of that script with + // DistillerPageWebContents. + static const char kTitle[] = "Test Page Title"; + static const char kHtml[] = + "<body>T<img src='http://t.com/t.jpg' id='0'></body>"; + static const char kImageUrl[] = "http://t.com/t.jpg"; + + static const char kScript[] = + " (function () {" + " var result = new Array(3);" + " result[0] = \"Test Page Title\";" + " result[1] = \"<body>T<img src='http://t.com/t.jpg' id='0'></body>\";" + " result[2] = \"http://t.com/t.jpg\";" + " return result;" + " }())"; +} + +namespace dom_distiller { + +class DistillerPageWebContentsTest + : public ContentBrowserTest, + public DistillerPage::Delegate { + public: + void DistillPage(const base::Closure& quit_closure, const std::string& url) { + quit_closure_ = quit_closure; + distiller_page_->LoadURL( + embedded_test_server()->GetURL(url)); + } + + virtual void OnLoadURLDone() OVERRIDE { + distiller_page_->ExecuteJavaScript(kScript); + } + + virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE { + value_ = value->DeepCopy(); + quit_closure_.Run(); + } + + protected: + DistillerPageWebContents* distiller_page_; + base::Closure quit_closure_; + const base::Value* value_; +}; + +IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, LoadPage) { + ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady()); + DistillerPageWebContents distiller_page( + this, shell()->web_contents()->GetBrowserContext()); + distiller_page_ = &distiller_page; + distiller_page.Init(); + base::RunLoop run_loop; + DistillPage(run_loop.QuitClosure(), "/simple_page.html"); + run_loop.Run(); + + const base::ListValue* result_list = NULL; + ASSERT_TRUE(value_->GetAsList(&result_list)); + ASSERT_EQ(3u, result_list->GetSize()); + std::string title; + result_list->GetString(0, &title); + ASSERT_EQ(kTitle, title); + std::string html; + result_list->GetString(1, &html); + ASSERT_EQ(kHtml, html); + std::string image_url; + result_list->GetString(2, &image_url); + ASSERT_EQ(kImageUrl, image_url); +} + +} // namespace dom_distiller diff --git a/components/dom_distiller/content/dom_distiller_service_factory.cc b/components/dom_distiller/content/dom_distiller_service_factory.cc index 956451f..1d030e6 100644 --- a/components/dom_distiller/content/dom_distiller_service_factory.cc +++ b/components/dom_distiller/content/dom_distiller_service_factory.cc @@ -5,8 +5,10 @@ #include "components/dom_distiller/content/dom_distiller_service_factory.h" #include "components/browser_context_keyed_service/browser_context_dependency_manager.h" +#include "components/dom_distiller/content/distiller_page_web_contents.h" #include "components/dom_distiller/core/distiller.h" #include "components/dom_distiller/core/dom_distiller_store.h" +#include "content/public/browser/browser_context.h" namespace dom_distiller { @@ -38,7 +40,13 @@ DomDistillerServiceFactory::~DomDistillerServiceFactory() {} BrowserContextKeyedService* DomDistillerServiceFactory::BuildServiceInstanceFor( content::BrowserContext* profile) const { scoped_ptr<DomDistillerStoreInterface> dom_distiller_store; - scoped_ptr<DistillerFactory> distiller_factory; + scoped_ptr<DistillerPageFactory> distiller_page_factory( + new DistillerPageWebContentsFactory(profile)); + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( + new DistillerURLFetcherFactory(profile->GetRequestContext())); + scoped_ptr<DistillerFactory> distiller_factory( + new DistillerFactory(distiller_page_factory.Pass(), + distiller_url_fetcher_factory.Pass())); return new DomDistillerContextKeyedService(dom_distiller_store.Pass(), distiller_factory.Pass()); } diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc new file mode 100644 index 0000000..11189e5 --- /dev/null +++ b/components/dom_distiller/core/distiller.cc @@ -0,0 +1,131 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/dom_distiller/core/distiller.h" + +#include <map> + +#include "base/bind.h" +#include "base/callback.h" +#include "base/strings/stringprintf.h" +#include "base/strings/utf_string_conversions.h" +#include "base/values.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "components/dom_distiller/core/distiller_url_fetcher.h" +#include "components/dom_distiller/core/proto/distilled_page.pb.h" +#include "grit/dom_distiller_resources.h" +#include "net/url_request/url_request_context_getter.h" +#include "ui/base/resource/resource_bundle.h" +#include "url/gurl.h" + +namespace dom_distiller { + +DistillerFactory::DistillerFactory( + scoped_ptr<DistillerPageFactory> distiller_page_factory, + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) + : distiller_page_factory_(distiller_page_factory.Pass()), + distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} + +DistillerFactory::~DistillerFactory() {} + +Distiller* DistillerFactory::CreateDistiller() { + return new DistillerImpl(*distiller_page_factory_, + *distiller_url_fetcher_factory_); +} + +DistillerImpl::DistillerImpl( + const DistillerPageFactory& distiller_page_factory, + const DistillerURLFetcherFactory& distiller_url_fetcher_factory) + : distiller_page_factory_(distiller_page_factory), + distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { + distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); +} + +DistillerImpl::~DistillerImpl() { +} + +void DistillerImpl::Init() { + distiller_page_->Init(); +} + +void DistillerImpl::DistillPage(const GURL& url, + const DistillerCallback& distillation_cb) { + distillation_cb_ = distillation_cb; + proto_.reset(new DistilledPageProto()); + proto_->set_url(url.spec()); + LoadURL(url); +} + +void DistillerImpl::LoadURL(const GURL& url) { + distiller_page_->LoadURL(url); +} + +void DistillerImpl::OnLoadURLDone() { + GetDistilledContent(); +} + +void DistillerImpl::GetDistilledContent() { + std::string script = + ResourceBundle::GetSharedInstance().GetRawDataResource( + IDR_DISTILLER_JS).as_string(); + distiller_page_->ExecuteJavaScript(script); +} + +void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { + std::string result; + const base::ListValue* result_list = NULL; + if (!value->GetAsList(&result_list)) { + DCHECK(proto_); + distillation_cb_.Run(proto_.Pass()); + return; + } + int i = 0; + for (base::ListValue::const_iterator iter = result_list->begin(); + iter != result_list->end(); ++iter, ++i) { + std::string item; + (*iter)->GetAsString(&item); + // The JavaScript returns an array where the first element is the title, + // the second element is the article content HTML, and the remaining + // elements are image URLs referenced in the HTML. + switch (i) { + case 0: + proto_->set_title(item); + break; + case 1: + proto_->set_html(item); + break; + default: + int image_number = i - 2; + std::string image_id = base::StringPrintf("%d", image_number); + FetchImage(image_id, item); + } + } +} + +void DistillerImpl::FetchImage(const std::string& image_id, + const std::string& item) { + DistillerURLFetcher* fetcher = + distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); + image_fetchers_[image_id] = fetcher; + fetcher->FetchURL(item, + base::Bind(&DistillerImpl::OnFetchImageDone, + base::Unretained(this), image_id)); +} + +void DistillerImpl::OnFetchImageDone(const std::string& id, + const std::string& response) { + DCHECK(proto_); + DistilledPageProto_Image* image = proto_->add_image(); + image->set_name(id); + image->set_data(response); + DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); + delete image_fetchers_[id]; + int result = image_fetchers_.erase(id); + DCHECK_EQ(1, result); + if (image_fetchers_.empty()) { + distillation_cb_.Run(proto_.Pass()); + } +} + +} // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller.h b/components/dom_distiller/core/distiller.h index b4c4ea4..6fe8228 100644 --- a/components/dom_distiller/core/distiller.h +++ b/components/dom_distiller/core/distiller.h @@ -8,28 +8,87 @@ #include <map> #include "base/callback.h" +#include "base/gtest_prod_util.h" +#include "base/memory/ref_counted.h" #include "base/values.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "components/dom_distiller/core/distiller_url_fetcher.h" +#include "components/dom_distiller/core/proto/distilled_page.pb.h" +#include "net/url_request/url_request_context_getter.h" #include "url/gurl.h" namespace dom_distiller { -class DistilledPageProto; +class DistillerImpl; -class DistillerInterface { +class Distiller { public: - typedef base::Callback<void(DistilledPageProto*)> DistillerCallback; - virtual ~DistillerInterface() {} + typedef base::Callback<void( + scoped_ptr<DistilledPageProto>)> DistillerCallback; + virtual ~Distiller() {} - // Distills a page, and asynchronously returns the article HTML to the + // Distills a page, and asynchrounously returns the article HTML to the // supplied callback. virtual void DistillPage(const GURL& url, const DistillerCallback& callback) = 0; }; + +// Factory for creating a Distiller. class DistillerFactory { public: - virtual ~DistillerFactory() {}; - virtual scoped_ptr<DistillerInterface> CreateDistiller() = 0; + DistillerFactory( + scoped_ptr<DistillerPageFactory> distiller_page_factory, + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); + virtual ~DistillerFactory(); + virtual Distiller* CreateDistiller(); + + private: + scoped_ptr<DistillerPageFactory> distiller_page_factory_; + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; +}; + + +// Distills a article from a page and associated pages. +class DistillerImpl : public Distiller, + public DistillerPage::Delegate { + public: + DistillerImpl( + const DistillerPageFactory& distiller_page_factory, + const DistillerURLFetcherFactory& distiller_url_fetcher_factory); + virtual ~DistillerImpl(); + + // Creates an execution context. This must be called once before any calls are + // made to distill the page. + virtual void Init(); + + virtual void DistillPage(const GURL& url, + const DistillerCallback& callback) OVERRIDE; + + // PageDistillerContext::Delegate + virtual void OnLoadURLDone() OVERRIDE; + virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; + + void OnFetchImageDone(const std::string& id, const std::string& response); + + private: + virtual void LoadURL(const GURL& url); + virtual void FetchImage(const std::string& image_id, const std::string& item); + + // Injects JavaScript to distill a loaded page down to its important content, + // e.g., extracting a news article from its surrounding boilerplate. + void GetDistilledContent(); + + const DistillerPageFactory& distiller_page_factory_; + const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; + scoped_ptr<DistillerPage> distiller_page_; + DistillerCallback distillation_cb_; + + std::map<std::string, DistillerURLFetcher* > image_fetchers_; + + scoped_ptr<DistilledPageProto> proto_; + + DISALLOW_COPY_AND_ASSIGN(DistillerImpl); }; } // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller_page.cc b/components/dom_distiller/core/distiller_page.cc new file mode 100644 index 0000000..cc486c7 --- /dev/null +++ b/components/dom_distiller/core/distiller_page.cc @@ -0,0 +1,67 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/dom_distiller/core/distiller_page.h" + +#include "base/logging.h" +#include "url/gurl.h" + +namespace dom_distiller { + +DistillerPageFactory::~DistillerPageFactory() {} + +DistillerPage::DistillerPage( + DistillerPage::Delegate* delegate) + : state_(NO_CONTEXT), delegate_(delegate) { +} + +DistillerPage::~DistillerPage() { +} + +void DistillerPage::Init() { + DCHECK_EQ(NO_CONTEXT, state_); + InitImpl(); + state_ = IDLE; +} + +void DistillerPage::LoadURL(const GURL& gurl) { + DCHECK(state_ == IDLE || + state_ == PAGE_AVAILABLE || + state_ == PAGELOAD_FAILED); + state_ = LOADING_PAGE; + LoadURLImpl(gurl); +} + +void DistillerPage::ExecuteJavaScript(const std::string& script) { + DCHECK_EQ(PAGE_AVAILABLE, state_); + state_ = EXECUTING_JAVASCRIPT; + ExecuteJavaScriptImpl(script); +} + +void DistillerPage::OnLoadURLDone() { + DCHECK_EQ(LOADING_PAGE, state_); + state_ = PAGE_AVAILABLE; + if (!delegate_) + return; + delegate_->OnLoadURLDone(); +} + +void DistillerPage::OnLoadURLFailed() { + state_ = PAGELOAD_FAILED; + scoped_ptr<base::Value> empty(base::Value::CreateNullValue()); + if (!delegate_) + return; + delegate_->OnExecuteJavaScriptDone(empty.get()); +} + +void DistillerPage::OnExecuteJavaScriptDone( + const base::Value* value) { + DCHECK_EQ(EXECUTING_JAVASCRIPT, state_); + state_ = PAGE_AVAILABLE; + if (!delegate_) + return; + delegate_->OnExecuteJavaScriptDone(value); +} + +} // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller_page.h b/components/dom_distiller/core/distiller_page.h new file mode 100644 index 0000000..8769a0e --- /dev/null +++ b/components/dom_distiller/core/distiller_page.h @@ -0,0 +1,100 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ +#define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ + +#include "base/memory/scoped_ptr.h" +#include "base/values.h" +#include "url/gurl.h" + +namespace dom_distiller { + +// Injects JavaScript into a page, and uses it to extract and return long-form +// content. The class can be reused to load and distill multiple pages, +// following the state transitions described along with the class's states. +class DistillerPage { + public: + class Delegate { + public: + virtual ~Delegate() {} + virtual void OnLoadURLDone() {} + virtual void OnExecuteJavaScriptDone(const base::Value* value) {} + }; + + DistillerPage(Delegate* delegate); + + virtual ~DistillerPage(); + + + // Initializes a |DistillerPage|. It must be called before any + // other functions, and must only be called once. + void Init(); + + // Loads a URL. |OnLoadURLDone| is called when the load completes or fails. + // May be called when the distiller is idle or a page is available. + void LoadURL(const GURL& url); + virtual void OnLoadURLDone(); + virtual void OnLoadURLFailed(); + + // Injects and executes JavaScript in the context of a loaded page. |LoadURL| + // must complete before this function is called. May be called only when + // a page is available. + void ExecuteJavaScript(const std::string& script); + + // Called when the JavaScript execution completes. |value| contains data + // returned by the script. + virtual void OnExecuteJavaScriptDone(const base::Value* value); + + protected: + enum State { + // No context has yet been set in which to load or distill a page. + NO_CONTEXT, + // The page distiller has been initialized and is idle. + IDLE, + // A page is currently loading. + LOADING_PAGE, + // A page has loaded within the specified context. + PAGE_AVAILABLE, + // There was an error processing the page. + PAGELOAD_FAILED, + // JavaScript is executing within the context of the page. When the + // JavaScript completes, the state will be returned to |PAGE_AVAILABLE|. + EXECUTING_JAVASCRIPT + }; + + // Called by |Init| to do plaform-specific initialization work set up an + // environment in which a page can be loaded. + virtual void InitImpl() = 0; + + // Called by |LoadURL| to carry out platform-specific instructions to load a + // page. + virtual void LoadURLImpl(const GURL& gurl) = 0; + + // Called by |ExecuteJavaScript| to carry out platform-specific instructions + // to inject and execute JavaScript within the context of the loaded page. + virtual void ExecuteJavaScriptImpl(const std::string& script) = 0; + + + + // The current state of the |DistillerPage|, initially |NO_CONTEXT|. + State state_; + + private: + Delegate* delegate_; + DISALLOW_COPY_AND_ASSIGN(DistillerPage); +}; + +// Factory for generating a |DistillerPage|. +class DistillerPageFactory { + public: + virtual ~DistillerPageFactory(); + + virtual scoped_ptr<DistillerPage> CreateDistillerPage( + DistillerPage::Delegate* delegate) const = 0; +}; + +} // namespace dom_distiller + +#endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ diff --git a/components/dom_distiller/core/distiller_unittest.cc b/components/dom_distiller/core/distiller_unittest.cc new file mode 100644 index 0000000..144b41e --- /dev/null +++ b/components/dom_distiller/core/distiller_unittest.cc @@ -0,0 +1,169 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <map> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/location.h" +#include "base/memory/scoped_ptr.h" +#include "base/message_loop/message_loop.h" +#include "base/values.h" +#include "components/dom_distiller/core/distiller.h" +#include "components/dom_distiller/core/distiller_page.h" +#include "components/dom_distiller/core/proto/distilled_page.pb.h" +#include "net/url_request/url_request_context_getter.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "ui/base/resource/resource_bundle.h" + +#if defined(OS_ANDROID) +#include "base/android/jni_android.h" +#include "ui/base/android/ui_jni_registrar.h" +#include "ui/gfx/android/gfx_jni_registrar.h" +#endif + +using::testing::Invoke; +using::testing::Return; +using::testing::_; + +namespace { + const char kTitle[] = "Title"; + const char kContent[] = "Content"; + const char kURL[] = "http://a.com/"; + const char kId0[] = "0"; + const char kId1[] = "1"; + const char kImageURL0[] = "http://a.com/img1.jpg"; + const char kImageURL1[] = "http://a.com/img2.jpg"; + const char kImageData0[] = { 'a', 'b', 'c', 'd', 'e', 0 }; + const char kImageData1[] = { '1', '2', '3', '4', '5', 0 }; +} + +namespace dom_distiller { + +class TestDistillerURLFetcher : public DistillerURLFetcher { + public: + + TestDistillerURLFetcher() : DistillerURLFetcher(NULL) { + responses_[kImageURL0] = std::string(kImageData0); + responses_[kImageURL1] = std::string(kImageData1); + } + + void CallCallback(std::string url, const URLFetcherCallback& callback) { + callback.Run(responses_[url]); + } + + virtual void FetchURL(const std::string& url, + const URLFetcherCallback& callback) OVERRIDE { + ASSERT_TRUE(base::MessageLoop::current()); + base::MessageLoop::current()->PostTask( + FROM_HERE, + base::Bind(&TestDistillerURLFetcher::CallCallback, + base::Unretained(this), url, callback)); + } + + std::map<std::string, std::string> responses_; +}; + + +class TestDistillerURLFetcherFactory : public DistillerURLFetcherFactory { + public: + TestDistillerURLFetcherFactory() : DistillerURLFetcherFactory(NULL) {} + virtual ~TestDistillerURLFetcherFactory() {} + virtual DistillerURLFetcher* CreateDistillerURLFetcher() const OVERRIDE { + return new TestDistillerURLFetcher(); + } +}; + + +class MockDistillerPage : public DistillerPage { + public: + MOCK_METHOD0(InitImpl, void()); + MOCK_METHOD1(LoadURLImpl, void(const GURL& gurl)); + MOCK_METHOD1(ExecuteJavaScriptImpl, void(const std::string& script)); + + MockDistillerPage(DistillerPage::Delegate* delegate) + : DistillerPage(delegate) { + } +}; + + +class MockDistillerPageFactory : public DistillerPageFactory { + public: + MOCK_CONST_METHOD1( + CreateDistillerPageMock, + DistillerPage*(DistillerPage::Delegate* delegate)); + + virtual scoped_ptr<DistillerPage> CreateDistillerPage( + DistillerPage::Delegate* delegate) const OVERRIDE { + return scoped_ptr<DistillerPage>(CreateDistillerPageMock(delegate)); + } +}; + + +class DistillerTest : public testing::Test { + public: + virtual ~DistillerTest() {} + void OnDistillPageDone(scoped_ptr<DistilledPageProto> proto) { + proto_ = proto.Pass(); + } + + protected: + scoped_ptr<DistillerImpl> distiller_; + scoped_ptr<DistilledPageProto> proto_; + MockDistillerPageFactory page_factory_; + TestDistillerURLFetcherFactory url_fetcher_factory_; +}; + +ACTION_P2(DistillerPageOnExecuteJavaScriptDone, distiller_page, list) { + distiller_page->OnExecuteJavaScriptDone(list); +} + +ACTION_P2(CreateMockDistillerPage, list, kurl) { + DistillerPage::Delegate* delegate = arg0; + MockDistillerPage* distiller_page = new MockDistillerPage(delegate); + EXPECT_CALL(*distiller_page, InitImpl()); + EXPECT_CALL(*distiller_page, LoadURLImpl(kurl)) + .WillOnce(testing::InvokeWithoutArgs(distiller_page, + &DistillerPage::OnLoadURLDone)); + EXPECT_CALL(*distiller_page, ExecuteJavaScriptImpl(_)) + .WillOnce(DistillerPageOnExecuteJavaScriptDone(distiller_page, list)); + return distiller_page; +} + +TEST_F(DistillerTest, DistillPage) { +#if defined(OS_ANDROID) + // Register JNI bindings for android. + JNIEnv* env = base::android::AttachCurrentThread(); + gfx::android::RegisterJni(env); + ui::android::RegisterJni(env); +#endif + + ui::ResourceBundle::InitSharedInstanceWithLocale("en-US", NULL); + base::MessageLoop loop(base::MessageLoop::TYPE_UI); + scoped_ptr<base::ListValue> list(new base::ListValue()); + list->AppendString(kTitle); + list->AppendString(kContent); + list->AppendString(kImageURL0); + list->AppendString(kImageURL1); + EXPECT_CALL(page_factory_, + CreateDistillerPageMock(_)).WillOnce( + CreateMockDistillerPage(list.get(), GURL(kURL))); + distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); + distiller_->Init(); + distiller_->DistillPage( + GURL(kURL), + base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this))); + base::MessageLoop::current()->RunUntilIdle(); + EXPECT_EQ(kTitle, proto_->title()); + EXPECT_EQ(kContent, proto_->html()); + EXPECT_EQ(kURL, proto_->url()); + EXPECT_EQ(2, proto_->image_size()); + EXPECT_EQ(kImageData0, proto_->image(0).data()); + EXPECT_EQ(kId0, proto_->image(0).name()); + EXPECT_EQ(kImageData1, proto_->image(1).data()); + EXPECT_EQ(kId1, proto_->image(1).name()); +} + +} // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller_url_fetcher.cc b/components/dom_distiller/core/distiller_url_fetcher.cc index 4472768..854a34b 100644 --- a/components/dom_distiller/core/distiller_url_fetcher.cc +++ b/components/dom_distiller/core/distiller_url_fetcher.cc @@ -15,20 +15,31 @@ using net::URLFetcher; namespace dom_distiller { -DistillerURLFetcher::DistillerURLFetcher() { +DistillerURLFetcherFactory::DistillerURLFetcherFactory( + net::URLRequestContextGetter* context_getter) + : context_getter_(context_getter) { +} + +DistillerURLFetcher* +DistillerURLFetcherFactory::CreateDistillerURLFetcher() const { + return new DistillerURLFetcher(context_getter_); +} + + +DistillerURLFetcher::DistillerURLFetcher( + net::URLRequestContextGetter* context_getter) + : context_getter_(context_getter) { } DistillerURLFetcher::~DistillerURLFetcher() { } -void DistillerURLFetcher::FetchURL( - net::URLRequestContextGetter* context_getter, - const std::string& url, - const URLFetcherCallback& callback) { +void DistillerURLFetcher::FetchURL(const std::string& url, + const URLFetcherCallback& callback) { // Don't allow a fetch if one is pending. DCHECK(!url_fetcher_ || !url_fetcher_->GetStatus().is_io_pending()); callback_ = callback; - url_fetcher_.reset(CreateURLFetcher(context_getter, url)); + url_fetcher_.reset(CreateURLFetcher(context_getter_, url)); url_fetcher_->Start(); } diff --git a/components/dom_distiller/core/distiller_url_fetcher.h b/components/dom_distiller/core/distiller_url_fetcher.h index c6c654b..cb1dad7 100644 --- a/components/dom_distiller/core/distiller_url_fetcher.h +++ b/components/dom_distiller/core/distiller_url_fetcher.h @@ -12,20 +12,32 @@ namespace dom_distiller { +class DistillerURLFetcher; + +// Class for creating a DistillerURLFetcher. +class DistillerURLFetcherFactory { + public: + DistillerURLFetcherFactory(net::URLRequestContextGetter* context_getter); + virtual ~DistillerURLFetcherFactory() {} + virtual DistillerURLFetcher* CreateDistillerURLFetcher() const; + + private: + net::URLRequestContextGetter* context_getter_; +}; + // This class fetches a URL, and notifies the caller when the operation // completes or fails. If the request fails, an empty string will be returned. class DistillerURLFetcher : public net::URLFetcherDelegate { public: - DistillerURLFetcher(); + DistillerURLFetcher(net::URLRequestContextGetter* context_getter); virtual ~DistillerURLFetcher(); // Indicates when a fetch is done. typedef base::Callback<void(const std::string& data)> URLFetcherCallback; // Fetches a |url|. Notifies when the fetch is done via |callback|. - void FetchURL(net::URLRequestContextGetter* context_getter, - const std::string& url, - const URLFetcherCallback& callback); + virtual void FetchURL(const std::string& url, + const URLFetcherCallback& callback); protected: virtual net::URLFetcher* CreateURLFetcher( @@ -38,6 +50,7 @@ class DistillerURLFetcher : public net::URLFetcherDelegate { scoped_ptr<net::URLFetcher> url_fetcher_; URLFetcherCallback callback_; + net::URLRequestContextGetter* context_getter_; DISALLOW_COPY_AND_ASSIGN(DistillerURLFetcher); }; diff --git a/components/dom_distiller/core/distiller_url_fetcher_unittest.cc b/components/dom_distiller/core/distiller_url_fetcher_unittest.cc index 9f4a806..b8e6367 100644 --- a/components/dom_distiller/core/distiller_url_fetcher_unittest.cc +++ b/components/dom_distiller/core/distiller_url_fetcher_unittest.cc @@ -29,23 +29,25 @@ public: protected: // testing::Test implementation: virtual void SetUp() OVERRIDE { - url_fetcher_.reset(new dom_distiller::DistillerURLFetcher()); + url_fetcher_.reset(new dom_distiller::DistillerURLFetcher(NULL)); factory_.reset(new net::FakeURLFetcherFactory(NULL)); factory_->SetFakeResponse( GURL(kTestPageA), std::string(kTestPageAResponse, sizeof(kTestPageAResponse)), - net::HTTP_OK, net::URLRequestStatus::SUCCESS); + net::HTTP_OK, + net::URLRequestStatus::SUCCESS); factory_->SetFakeResponse( GURL(kTestPageB), std::string(kTestPageBResponse, sizeof(kTestPageBResponse)), - net::HTTP_INTERNAL_SERVER_ERROR, net::URLRequestStatus::FAILED); + net::HTTP_INTERNAL_SERVER_ERROR, + net::URLRequestStatus::SUCCESS); } void Fetch(const std::string& url, const std::string& expected_response) { base::MessageLoop loop(base::MessageLoop::TYPE_UI); url_fetcher_->FetchURL( - NULL, url, + url, base::Bind(&DistillerURLFetcherTest::FetcherCallback, base::Unretained(this))); loop.RunUntilIdle(); diff --git a/components/dom_distiller/core/resources/distiller.js b/components/dom_distiller/core/resources/distiller.js new file mode 100644 index 0000000..4088cc7 --- /dev/null +++ b/components/dom_distiller/core/resources/distiller.js @@ -0,0 +1,16 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +(function () { + // TODO(bengr): Replace this JavaScript, which is only a placeholder, with + // code that will extract article content from a loaded page. + var result = new Array(4); + result[0] = "Rain in Seattle"; + result[1] = "<i>Seattle</i> It is raining. " + + "<img src='http://www.a.com/img0.jpg' id='0'>" + + "<img src='http://www.a.com/img1.jpg' id='1'>"; + result[2] = "http://www.a.com/img0.jpg"; + result[3] = "http://www.a.com/img1.jpg"; + return result; +}()) diff --git a/components/dom_distiller_resources.grd b/components/dom_distiller_resources.grd index dd2a5f5..8713589 100644 --- a/components/dom_distiller_resources.grd +++ b/components/dom_distiller_resources.grd @@ -12,6 +12,7 @@ <include name="IDR_ABOUT_DOM_DISTILLER_HTML" file="dom_distiller/webui/resources/about_dom_distiller.html" flattenhtml="true" allowexternalscript="true" type="BINDATA" /> <include name="IDR_ABOUT_DOM_DISTILLER_CSS" file="dom_distiller/webui/resources/about_dom_distiller.css" type="BINDATA" /> <include name="IDR_ABOUT_DOM_DISTILLER_JS" file="dom_distiller/webui/resources/about_dom_distiller.js" type="BINDATA" /> + <include name="IDR_DISTILLER_JS" file="dom_distiller/core/resources/distiller.js" type="BINDATA" /> </includes> </release> </grit> |