From b4c49028da217cbf78d1876e69cbf7af07aa385d Mon Sep 17 00:00:00 2001 From: "nyquist@chromium.org" Date: Sat, 12 Apr 2014 01:04:17 +0000 Subject: Remove PageDistiller from DOM Distiller. The PageDistiller adds an extra layer between the Distiller and the underlying code which in fact distills the content. This CL simplifies the distillation process by removing this class and the delegate used for communcation. More of the logic is now moved into the DistillerPage class. BUG=361939 Review URL: https://codereview.chromium.org/231933005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263434 0039d316-1c4b-4281-b951-d872f2087c98 --- components/dom_distiller.gypi | 2 - .../content/distiller_page_web_contents.cc | 12 +-- .../content/distiller_page_web_contents.h | 7 +- .../distiller_page_web_contents_browsertest.cc | 70 +++++-------- components/dom_distiller/core/distiller.cc | 6 +- components/dom_distiller/core/distiller.h | 4 +- components/dom_distiller/core/distiller_page.cc | 86 ++++++++++++---- components/dom_distiller/core/distiller_page.h | 41 ++++---- .../dom_distiller/core/distiller_unittest.cc | 52 +++++----- components/dom_distiller/core/page_distiller.cc | 109 --------------------- components/dom_distiller/core/page_distiller.h | 74 -------------- 11 files changed, 146 insertions(+), 317 deletions(-) delete mode 100644 components/dom_distiller/core/page_distiller.cc delete mode 100644 components/dom_distiller/core/page_distiller.h diff --git a/components/dom_distiller.gypi b/components/dom_distiller.gypi index b930bb3..1c5a47a 100644 --- a/components/dom_distiller.gypi +++ b/components/dom_distiller.gypi @@ -73,8 +73,6 @@ 'dom_distiller/core/dom_distiller_store.h', 'dom_distiller/core/feedback_reporter.cc', 'dom_distiller/core/feedback_reporter.h', - 'dom_distiller/core/page_distiller.cc', - 'dom_distiller/core/page_distiller.h', 'dom_distiller/core/task_tracker.cc', 'dom_distiller/core/task_tracker.h', 'dom_distiller/core/url_constants.cc', diff --git a/components/dom_distiller/content/distiller_page_web_contents.cc b/components/dom_distiller/content/distiller_page_web_contents.cc index 9cf8388..861c776 100644 --- a/components/dom_distiller/content/distiller_page_web_contents.cc +++ b/components/dom_distiller/content/distiller_page_web_contents.cc @@ -18,20 +18,18 @@ namespace dom_distiller { -scoped_ptr DistillerPageWebContentsFactory::CreateDistillerPage( - const base::WeakPtr& delegate) const { +scoped_ptr DistillerPageWebContentsFactory::CreateDistillerPage() + const { DCHECK(browser_context_); return scoped_ptr( - new DistillerPageWebContents(delegate, browser_context_)); + new DistillerPageWebContents(browser_context_)); } DistillerPageWebContents::DistillerPageWebContents( - const base::WeakPtr& delegate, content::BrowserContext* browser_context) - : DistillerPage(delegate), browser_context_(browser_context) {} + : browser_context_(browser_context) {} -DistillerPageWebContents::~DistillerPageWebContents() { -} +DistillerPageWebContents::~DistillerPageWebContents() {} void DistillerPageWebContents::InitImpl() { DCHECK(browser_context_); diff --git a/components/dom_distiller/content/distiller_page_web_contents.h b/components/dom_distiller/content/distiller_page_web_contents.h index c6b94fc..cefbec3 100644 --- a/components/dom_distiller/content/distiller_page_web_contents.h +++ b/components/dom_distiller/content/distiller_page_web_contents.h @@ -30,19 +30,16 @@ class DistillerPageWebContentsFactory : public DistillerPageFactory { : DistillerPageFactory(), browser_context_(browser_context) {} virtual ~DistillerPageWebContentsFactory() {} - virtual scoped_ptr CreateDistillerPage( - const base::WeakPtr& delegate) const OVERRIDE; + virtual scoped_ptr CreateDistillerPage() const OVERRIDE; private: content::BrowserContext* browser_context_; }; - class DistillerPageWebContents : public DistillerPage, public content::WebContentsObserver { public: - DistillerPageWebContents(const base::WeakPtr& delegate, - content::BrowserContext* browser_context); + DistillerPageWebContents(content::BrowserContext* browser_context); virtual ~DistillerPageWebContents(); // content::WebContentsObserver implementation. diff --git a/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc index 2aa7f0e..c7faf1d 100644 --- a/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc +++ b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc @@ -23,9 +23,7 @@ using testing::Not; namespace dom_distiller { -class DistillerPageWebContentsTest - : public ContentBrowserTest, - public DistillerPage::Delegate { +class DistillerPageWebContentsTest : public ContentBrowserTest { public: // ContentBrowserTest: virtual void SetUpOnMainThread() OVERRIDE { @@ -36,18 +34,16 @@ class DistillerPageWebContentsTest void DistillPage(const base::Closure& quit_closure, const std::string& url) { quit_closure_ = quit_closure; - distiller_page_->LoadURL(embedded_test_server()->GetURL(url)); + distiller_page_->DistillPage( + embedded_test_server()->GetURL(url), + base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished, + this)); } - virtual void OnLoadURLDone() OVERRIDE { - std::string script = ResourceBundle::GetSharedInstance(). - GetRawDataResource(IDR_DISTILLER_JS).as_string(); - distiller_page_->ExecuteJavaScript(script); - } - - virtual void OnExecuteJavaScriptDone(const GURL& page_url, - const base::Value* value) OVERRIDE { - value_ = value->DeepCopy(); + void OnPageDistillationFinished( + scoped_ptr distilled_page, + bool distillation_successful) { + page_info_ = distilled_page.Pass(); quit_closure_.Run(); } @@ -72,13 +68,12 @@ class DistillerPageWebContentsTest protected: DistillerPageWebContents* distiller_page_; base::Closure quit_closure_; - const base::Value* value_; + scoped_ptr page_info_; }; IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) { - base::WeakPtrFactory weak_factory(this); DistillerPageWebContents distiller_page( - weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext()); + shell()->web_contents()->GetBrowserContext()); distiller_page_ = &distiller_page; distiller_page.Init(); @@ -86,28 +81,16 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) { DistillPage(run_loop.QuitClosure(), "/simple_article.html"); run_loop.Run(); - const base::ListValue* result_list = NULL; - ASSERT_TRUE(value_->GetAsList(&result_list)); - ASSERT_EQ(4u, result_list->GetSize()); - std::string title; - result_list->GetString(0, &title); - EXPECT_EQ("Test Page Title", title); - std::string html; - result_list->GetString(1, &html); - EXPECT_THAT(html, HasSubstr("Lorem ipsum")); - EXPECT_THAT(html, Not(HasSubstr("questionable content"))); - std::string next_page_url; - result_list->GetString(2, &next_page_url); - EXPECT_EQ("", next_page_url); - std::string unused; - result_list->GetString(3, &unused); - EXPECT_EQ("", unused); + EXPECT_EQ("Test Page Title", page_info_.get()->title); + EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum")); + EXPECT_THAT(page_info_.get()->html, Not(HasSubstr("questionable content"))); + EXPECT_EQ("", page_info_.get()->next_page_url); + EXPECT_EQ("", page_info_.get()->prev_page_url); } IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) { - base::WeakPtrFactory weak_factory(this); DistillerPageWebContents distiller_page( - weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext()); + shell()->web_contents()->GetBrowserContext()); distiller_page_ = &distiller_page; distiller_page.Init(); @@ -115,21 +98,16 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) { DistillPage(run_loop.QuitClosure(), "/simple_article.html"); run_loop.Run(); - const base::ListValue* result_list = NULL; - ASSERT_TRUE(value_->GetAsList(&result_list)); - std::string html; - result_list->GetString(1, &html); // A relative link should've been updated. - EXPECT_THAT(html, + EXPECT_THAT(page_info_.get()->html, ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\"")); - EXPECT_THAT(html, + EXPECT_THAT(page_info_.get()->html, HasSubstr("href=\"http://www.google.com/absolutelink.html\"")); } IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) { - base::WeakPtrFactory weak_factory(this); DistillerPageWebContents distiller_page( - weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext()); + shell()->web_contents()->GetBrowserContext()); distiller_page_ = &distiller_page; distiller_page.Init(); @@ -137,14 +115,10 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) { DistillPage(run_loop.QuitClosure(), "/simple_article.html"); run_loop.Run(); - const base::ListValue* result_list = NULL; - ASSERT_TRUE(value_->GetAsList(&result_list)); - std::string html; - result_list->GetString(1, &html); // A relative link should've been updated. - EXPECT_THAT(html, + EXPECT_THAT(page_info_.get()->html, ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\"")); - EXPECT_THAT(html, + EXPECT_THAT(page_info_.get()->html, HasSubstr("src=\"http://www.google.com/absoluteimage.png\"")); } diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc index 259bb53..acbb68e 100644 --- a/components/dom_distiller/core/distiller.cc +++ b/components/dom_distiller/core/distiller.cc @@ -54,7 +54,7 @@ DistillerImpl::DistillerImpl( max_pages_in_article_(kMaxPagesInArticle), destruction_allowed_(true), weak_factory_(this) { - page_distiller_.reset(new PageDistiller(distiller_page_factory)); + distiller_page_ = distiller_page_factory.CreateDistillerPage().Pass(); } DistillerImpl::~DistillerImpl() { @@ -63,7 +63,7 @@ DistillerImpl::~DistillerImpl() { void DistillerImpl::Init() { DCHECK(AreAllPagesFinished()); - page_distiller_->Init(); + distiller_page_->Init(); } void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) { @@ -125,7 +125,7 @@ void DistillerImpl::DistillNextPage() { seen_urls_.insert(url.spec()); pages_.push_back(new DistilledPageData()); started_pages_index_[page_num] = pages_.size() - 1; - page_distiller_->DistillPage( + distiller_page_->DistillPage( url, base::Bind(&DistillerImpl::OnPageDistillationFinished, weak_factory_.GetWeakPtr(), diff --git a/components/dom_distiller/core/distiller.h b/components/dom_distiller/core/distiller.h index e6b6529..249a1b3 100644 --- a/components/dom_distiller/core/distiller.h +++ b/components/dom_distiller/core/distiller.h @@ -15,8 +15,8 @@ #include "base/memory/scoped_vector.h" #include "base/memory/weak_ptr.h" #include "components/dom_distiller/core/article_distillation_update.h" +#include "components/dom_distiller/core/distiller_page.h" #include "components/dom_distiller/core/distiller_url_fetcher.h" -#include "components/dom_distiller/core/page_distiller.h" #include "components/dom_distiller/core/proto/distilled_article.pb.h" #include "net/url_request/url_request_context_getter.h" #include "url/gurl.h" @@ -152,7 +152,7 @@ class DistillerImpl : public Distiller { const ArticleDistillationUpdate CreateDistillationUpdate() const; const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; - scoped_ptr page_distiller_; + scoped_ptr distiller_page_; DistillationFinishedCallback finished_cb_; DistillationUpdateCallback update_cb_; diff --git a/components/dom_distiller/core/distiller_page.cc b/components/dom_distiller/core/distiller_page.cc index 7f188ea..eed6c2c 100644 --- a/components/dom_distiller/core/distiller_page.cc +++ b/components/dom_distiller/core/distiller_page.cc @@ -4,21 +4,24 @@ #include "components/dom_distiller/core/distiller_page.h" +#include "base/bind.h" #include "base/logging.h" +#include "base/message_loop/message_loop.h" +#include "grit/component_resources.h" +#include "ui/base/resource/resource_bundle.h" #include "url/gurl.h" namespace dom_distiller { +DistilledPageInfo::DistilledPageInfo() {} + +DistilledPageInfo::~DistilledPageInfo() {} + DistillerPageFactory::~DistillerPageFactory() {} -DistillerPage::DistillerPage( - const base::WeakPtr& delegate) - : state_(NO_CONTEXT), delegate_(delegate) { - DCHECK(delegate_.get()); -} +DistillerPage::DistillerPage() : state_(NO_CONTEXT) {} -DistillerPage::~DistillerPage() { -} +DistillerPage::~DistillerPage() {} void DistillerPage::Init() { DCHECK_EQ(NO_CONTEXT, state_); @@ -26,40 +29,87 @@ void DistillerPage::Init() { state_ = IDLE; } -void DistillerPage::LoadURL(const GURL& gurl) { - DCHECK(state_ == IDLE || - state_ == PAGE_AVAILABLE || +void DistillerPage::DistillPage(const GURL& gurl, + const DistillerPageCallback& callback) { + DCHECK(state_ == IDLE || state_ == PAGE_AVAILABLE || state_ == PAGELOAD_FAILED); state_ = LOADING_PAGE; + distiller_page_callback_ = callback; LoadURLImpl(gurl); } -void DistillerPage::ExecuteJavaScript(const std::string& script) { +void DistillerPage::ExecuteJavaScript() { DCHECK_EQ(PAGE_AVAILABLE, state_); state_ = EXECUTING_JAVASCRIPT; + DVLOG(1) << "Beginning distillation"; + std::string script = ResourceBundle::GetSharedInstance() + .GetRawDataResource(IDR_DISTILLER_JS) + .as_string(); ExecuteJavaScriptImpl(script); } void DistillerPage::OnLoadURLDone() { DCHECK_EQ(LOADING_PAGE, state_); state_ = PAGE_AVAILABLE; - DCHECK(delegate_.get()); - delegate_->OnLoadURLDone(); + ExecuteJavaScript(); } void DistillerPage::OnLoadURLFailed() { state_ = PAGELOAD_FAILED; - scoped_ptr empty(base::Value::CreateNullValue()); - DCHECK(delegate_.get()); - delegate_->OnExecuteJavaScriptDone(GURL(), empty.get()); + base::MessageLoop::current()->PostTask( + FROM_HERE, + base::Bind(distiller_page_callback_, + base::Passed(scoped_ptr()), + false)); } void DistillerPage::OnExecuteJavaScriptDone(const GURL& page_url, const base::Value* value) { DCHECK_EQ(EXECUTING_JAVASCRIPT, state_); state_ = PAGE_AVAILABLE; - DCHECK(delegate_.get()); - delegate_->OnExecuteJavaScriptDone(page_url, value); + scoped_ptr page_info(new DistilledPageInfo()); + std::string result; + const base::ListValue* result_list = NULL; + bool found_content = false; + if (!value->GetAsList(&result_list)) { + base::MessageLoop::current()->PostTask( + FROM_HERE, + base::Bind(distiller_page_callback_, base::Passed(&page_info), false)); + } else { + int i = 0; + for (base::ListValue::const_iterator iter = result_list->begin(); + iter != result_list->end(); + ++iter, ++i) { + std::string item; + (*iter)->GetAsString(&item); + // The JavaScript returns an array where the first element is the title, + // the second element is the article content HTML, and the remaining + // elements are image URLs referenced in the HTML. + switch (i) { + case 0: + page_info->title = item; + break; + case 1: + page_info->html = item; + found_content = true; + break; + case 2: + page_info->next_page_url = item; + break; + case 3: + page_info->prev_page_url = item; + break; + default: + if (GURL(item).is_valid()) { + page_info->image_urls.push_back(item); + } + } + } + base::MessageLoop::current()->PostTask( + FROM_HERE, + base::Bind( + distiller_page_callback_, base::Passed(&page_info), found_content)); + } } } // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller_page.h b/components/dom_distiller/core/distiller_page.h index 94ad51e..a5c06cb 100644 --- a/components/dom_distiller/core/distiller_page.h +++ b/components/dom_distiller/core/distiller_page.h @@ -7,6 +7,7 @@ #include +#include "base/callback.h" #include "base/memory/scoped_ptr.h" #include "base/memory/weak_ptr.h" #include "base/values.h" @@ -14,39 +15,45 @@ namespace dom_distiller { +struct DistilledPageInfo { + std::string title; + std::string html; + std::string next_page_url; + std::string prev_page_url; + std::vector image_urls; + DistilledPageInfo(); + ~DistilledPageInfo(); + + private: + DISALLOW_COPY_AND_ASSIGN(DistilledPageInfo); +}; + // Injects JavaScript into a page, and uses it to extract and return long-form // content. The class can be reused to load and distill multiple pages, // following the state transitions described along with the class's states. class DistillerPage { public: - class Delegate { - public: - virtual ~Delegate() {} - virtual void OnLoadURLDone() {} - virtual void OnExecuteJavaScriptDone(const GURL& page_url, - const base::Value* value) {} - }; - - // Specifies the Delegate that owns this distiller page. - explicit DistillerPage(const base::WeakPtr& delegate); + typedef base::Callback distilled_page, + bool distillation_successful)> + DistillerPageCallback; + DistillerPage(); virtual ~DistillerPage(); - // Initializes a |DistillerPage|. It must be called before any // other functions, and must only be called once. void Init(); // Loads a URL. |OnLoadURLDone| is called when the load completes or fails. // May be called when the distiller is idle or a page is available. - void LoadURL(const GURL& url); + void DistillPage(const GURL& url, const DistillerPageCallback& callback); virtual void OnLoadURLDone(); virtual void OnLoadURLFailed(); // Injects and executes JavaScript in the context of a loaded page. |LoadURL| // must complete before this function is called. May be called only when // a page is available. - void ExecuteJavaScript(const std::string& script); + void ExecuteJavaScript(); // Called when the JavaScript execution completes. |page_url| is the url of // the distilled page. |value| contains data returned by the script. @@ -82,14 +89,11 @@ class DistillerPage { // to inject and execute JavaScript within the context of the loaded page. virtual void ExecuteJavaScriptImpl(const std::string& script) = 0; - - // The current state of the |DistillerPage|, initially |NO_CONTEXT|. State state_; private: - // The pointer to the delegate that owns this distiller page. - base::WeakPtr delegate_; + DistillerPageCallback distiller_page_callback_; DISALLOW_COPY_AND_ASSIGN(DistillerPage); }; @@ -98,8 +102,7 @@ class DistillerPageFactory { public: virtual ~DistillerPageFactory(); - virtual scoped_ptr CreateDistillerPage( - const base::WeakPtr& delegate) const = 0; + virtual scoped_ptr CreateDistillerPage() const = 0; }; } // namespace dom_distiller diff --git a/components/dom_distiller/core/distiller_unittest.cc b/components/dom_distiller/core/distiller_unittest.cc index 471f91f..d4449d1 100644 --- a/components/dom_distiller/core/distiller_unittest.cc +++ b/components/dom_distiller/core/distiller_unittest.cc @@ -224,7 +224,7 @@ class TestDistillerURLFetcherFactory : public DistillerURLFetcherFactory { class MockDistillerURLFetcherFactory : public DistillerURLFetcherFactory { public: - explicit MockDistillerURLFetcherFactory() + MockDistillerURLFetcherFactory() : DistillerURLFetcherFactory(NULL) {} virtual ~MockDistillerURLFetcherFactory() {} @@ -237,20 +237,15 @@ class MockDistillerPage : public DistillerPage { MOCK_METHOD1(LoadURLImpl, void(const GURL& gurl)); MOCK_METHOD1(ExecuteJavaScriptImpl, void(const string& script)); - explicit MockDistillerPage( - const base::WeakPtr& delegate) - : DistillerPage(delegate) {} + MockDistillerPage() {} }; class MockDistillerPageFactory : public DistillerPageFactory { public: - MOCK_CONST_METHOD1( - CreateDistillerPageMock, - DistillerPage*(const base::WeakPtr& delegate)); + MOCK_CONST_METHOD0(CreateDistillerPageMock, DistillerPage*()); - virtual scoped_ptr CreateDistillerPage( - const base::WeakPtr& delegate) const OVERRIDE { - return scoped_ptr(CreateDistillerPageMock(delegate)); + virtual scoped_ptr CreateDistillerPage() const OVERRIDE { + return scoped_ptr(CreateDistillerPageMock()); } }; @@ -286,8 +281,7 @@ ACTION_P3(DistillerPageOnExecuteJavaScriptDone, distiller_page, url, list) { } ACTION_P2(CreateMockDistillerPage, list, kurl) { - const base::WeakPtr& delegate = arg0; - MockDistillerPage* distiller_page = new MockDistillerPage(delegate); + MockDistillerPage* distiller_page = new MockDistillerPage(); EXPECT_CALL(*distiller_page, InitImpl()); EXPECT_CALL(*distiller_page, LoadURLImpl(kurl)) .WillOnce(testing::InvokeWithoutArgs(distiller_page, @@ -300,8 +294,7 @@ ACTION_P2(CreateMockDistillerPage, list, kurl) { ACTION_P2(CreateMockDistillerPageWithPendingJSCallback, distiller_page_ptr, kurl) { - const base::WeakPtr& delegate = arg0; - MockDistillerPage* distiller_page = new MockDistillerPage(delegate); + MockDistillerPage* distiller_page = new MockDistillerPage(); *distiller_page_ptr = distiller_page; EXPECT_CALL(*distiller_page, InitImpl()); EXPECT_CALL(*distiller_page, LoadURLImpl(kurl)) @@ -315,8 +308,7 @@ ACTION_P3(CreateMockDistillerPages, distiller_data, pages_size, start_page_num) { - const base::WeakPtr& delegate = arg0; - MockDistillerPage* distiller_page = new MockDistillerPage(delegate); + MockDistillerPage* distiller_page = new MockDistillerPage(); EXPECT_CALL(*distiller_page, InitImpl()); { testing::InSequence s; @@ -339,7 +331,7 @@ TEST_F(DistillerTest, DistillPage) { base::MessageLoopForUI loop; scoped_ptr list = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector(), ""); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL))); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); distiller_->Init(); @@ -359,7 +351,7 @@ TEST_F(DistillerTest, DistillPageWithImages) { image_indices.push_back(1); scoped_ptr list = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL))); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); distiller_->Init(); @@ -396,7 +388,7 @@ TEST_F(DistillerTest, DistillMultiplePages) { distiller_data->image_ids.push_back(image_indices); } - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPages(distiller_data.get(), kNumPages, 0)); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); @@ -413,7 +405,7 @@ TEST_F(DistillerTest, DistillLinkLoop) { // happen if javascript misparses a next page link. scoped_ptr list = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector(), kURL); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL))); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); distiller_->Init(); @@ -443,7 +435,7 @@ TEST_F(DistillerTest, CheckMaxPageLimitExtraPage) { distiller_data->distilled_values.pop_back(); distiller_data->distilled_values.push_back(last_page_data.release()); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)).WillOnce( + EXPECT_CALL(page_factory_, CreateDistillerPageMock()).WillOnce( CreateMockDistillerPages(distiller_data.get(), kMaxPagesInArticle, 0)); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); @@ -463,7 +455,7 @@ TEST_F(DistillerTest, CheckMaxPageLimitExactLimit) { scoped_ptr distiller_data = CreateMultipageDistillerDataWithoutImages(kMaxPagesInArticle); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)).WillOnce( + EXPECT_CALL(page_factory_, CreateDistillerPageMock()).WillOnce( CreateMockDistillerPages(distiller_data.get(), kMaxPagesInArticle, 0)); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); @@ -483,7 +475,7 @@ TEST_F(DistillerTest, SinglePageDistillationFailure) { base::MessageLoopForUI loop; // To simulate failure return a null value. scoped_ptr nullValue(base::Value::CreateNullValue()); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPage(nullValue.get(), GURL(kURL))); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); distiller_->Init(); @@ -508,7 +500,7 @@ TEST_F(DistillerTest, MultiplePagesDistillationFailure) { distiller_data->distilled_values.begin() + failed_page_num, base::Value::CreateNullValue()); // Expect only calls till the failed page number. - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)).WillOnce( + EXPECT_CALL(page_factory_, CreateDistillerPageMock()).WillOnce( CreateMockDistillerPages(distiller_data.get(), failed_page_num + 1, 0)); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); @@ -529,7 +521,7 @@ TEST_F(DistillerTest, DistillPreviousPage) { scoped_ptr distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num)); @@ -550,7 +542,7 @@ TEST_F(DistillerTest, IncrementalUpdates) { scoped_ptr distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num)); @@ -573,7 +565,7 @@ TEST_F(DistillerTest, IncrementalUpdatesDoNotDeleteFinalArticle) { scoped_ptr distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num)); @@ -598,7 +590,7 @@ TEST_F(DistillerTest, DeletingArticleDoesNotInterfereWithUpdates) { // The page number of the article on which distillation starts. int start_page_num = 3; - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num)); @@ -622,7 +614,7 @@ TEST_F(DistillerTest, CancelWithDelayedImageFetchCallback) { image_indices.push_back(0); scoped_ptr distilled_value = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPage(distilled_value.get(), GURL(kURL))); TestDistillerURLFetcher* delayed_fetcher = new TestDistillerURLFetcher(true); @@ -646,7 +638,7 @@ TEST_F(DistillerTest, CancelWithDelayedJSCallback) { scoped_ptr distilled_value = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector(), ""); MockDistillerPage* distiller_page = NULL; - EXPECT_CALL(page_factory_, CreateDistillerPageMock(_)) + EXPECT_CALL(page_factory_, CreateDistillerPageMock()) .WillOnce(CreateMockDistillerPageWithPendingJSCallback(&distiller_page, GURL(kURL))); distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_)); diff --git a/components/dom_distiller/core/page_distiller.cc b/components/dom_distiller/core/page_distiller.cc deleted file mode 100644 index 308e358..0000000 --- a/components/dom_distiller/core/page_distiller.cc +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/dom_distiller/core/page_distiller.h" - -#include - -#include "base/location.h" -#include "base/message_loop/message_loop.h" -#include "base/strings/utf_string_conversions.h" -#include "base/values.h" -#include "components/dom_distiller/core/distiller_page.h" -#include "components/dom_distiller/core/distiller_url_fetcher.h" -#include "grit/component_resources.h" -#include "net/url_request/url_request_context_getter.h" -#include "ui/base/resource/resource_bundle.h" -#include "url/gurl.h" - -namespace dom_distiller { - -DistilledPageInfo::DistilledPageInfo() {} - -DistilledPageInfo::~DistilledPageInfo() {} - -PageDistiller::PageDistiller(const DistillerPageFactory& distiller_page_factory) - : weak_factory_(this) { - distiller_page_ = - distiller_page_factory.CreateDistillerPage(weak_factory_.GetWeakPtr()) - .Pass(); -} - -PageDistiller::~PageDistiller() {} - -void PageDistiller::Init() { distiller_page_->Init(); } - -void PageDistiller::DistillPage(const GURL& url, - const PageDistillerCallback& callback) { - page_distiller_callback_ = callback; - LoadURL(url); -} - -void PageDistiller::LoadURL(const GURL& url) { - DVLOG(1) << "Loading for distillation: " << url.spec(); - distiller_page_->LoadURL(url); -} - -void PageDistiller::OnLoadURLDone() { GetDistilledContent(); } - -void PageDistiller::GetDistilledContent() { - DVLOG(1) << "Beginning distillation"; - std::string script = ResourceBundle::GetSharedInstance() - .GetRawDataResource(IDR_DISTILLER_JS) - .as_string(); - distiller_page_->ExecuteJavaScript(script); -} - -void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url, - const base::Value* value) { - DVLOG(1) << "Distillation complete; extracting resources for " - << page_url.spec(); - - scoped_ptr page_info(new DistilledPageInfo()); - std::string result; - const base::ListValue* result_list = NULL; - bool found_content = false; - if (!value->GetAsList(&result_list)) { - base::MessageLoop::current()->PostTask( - FROM_HERE, - base::Bind(page_distiller_callback_, base::Passed(&page_info), false)); - } else { - int i = 0; - for (base::ListValue::const_iterator iter = result_list->begin(); - iter != result_list->end(); - ++iter, ++i) { - std::string item; - (*iter)->GetAsString(&item); - // The JavaScript returns an array where the first element is the title, - // the second element is the article content HTML, and the remaining - // elements are image URLs referenced in the HTML. - switch (i) { - case 0: - page_info->title = item; - break; - case 1: - page_info->html = item; - found_content = true; - break; - case 2: - page_info->next_page_url = item; - break; - case 3: - page_info->prev_page_url = item; - break; - default: - if (GURL(item).is_valid()) { - page_info->image_urls.push_back(item); - } - } - } - base::MessageLoop::current()->PostTask( - FROM_HERE, - base::Bind(page_distiller_callback_, - base::Passed(&page_info), - found_content)); - } -} - -} // namespace dom_distiller diff --git a/components/dom_distiller/core/page_distiller.h b/components/dom_distiller/core/page_distiller.h deleted file mode 100644 index a1bc042..0000000 --- a/components/dom_distiller/core/page_distiller.h +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_DISTILLER_H_ -#define COMPONENTS_DOM_DISTILLER_CORE_PAGE_DISTILLER_H_ - -#include -#include - -#include "base/callback.h" -#include "base/memory/scoped_ptr.h" -#include "base/memory/weak_ptr.h" -#include "base/values.h" -#include "components/dom_distiller/core/distiller_page.h" -#include "url/gurl.h" - -namespace dom_distiller { - -class DistillerImpl; - -struct DistilledPageInfo { - std::string title; - std::string html; - std::string next_page_url; - std::string prev_page_url; - std::vector image_urls; - DistilledPageInfo(); - ~DistilledPageInfo(); - - private: - DISALLOW_COPY_AND_ASSIGN(DistilledPageInfo); -}; - -// Distills a single page of an article. -class PageDistiller : public DistillerPage::Delegate { - public: - typedef base::Callback distilled_page, - bool distillation_successful)> - PageDistillerCallback; - explicit PageDistiller(const DistillerPageFactory& distiller_page_factory); - virtual ~PageDistiller(); - - // Creates an execution context. This must be called once before any calls are - // made to distill the page. - virtual void Init(); - - // Distills the |url| and posts the |callback| with results. - virtual void DistillPage(const GURL& url, - const PageDistillerCallback& callback); - - // DistillerPage::Delegate - virtual void OnLoadURLDone() OVERRIDE; - virtual void OnExecuteJavaScriptDone(const GURL& page_url, - const base::Value* value) OVERRIDE; - - private: - virtual void LoadURL(const GURL& url); - - // Injects JavaScript to distill a loaded page down to its important content, - // e.g., extracting a news article from its surrounding boilerplate. - void GetDistilledContent(); - - scoped_ptr distiller_page_; - PageDistillerCallback page_distiller_callback_; - - base::WeakPtrFactory weak_factory_; - - DISALLOW_COPY_AND_ASSIGN(PageDistiller); -}; - -} // namespace dom_distiller - -#endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_DISTILLER_H_ -- cgit v1.1