diff options
author | cjhopman@chromium.org <cjhopman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-05-23 07:27:31 +0000 |
---|---|---|
committer | cjhopman@chromium.org <cjhopman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-05-23 07:27:31 +0000 |
commit | 07e92a5667bfa8b18e3681edcbbdb7df2f7ae454 (patch) | |
tree | dbb7d48392feafefd199869172a7c5e3006d96d8 | |
parent | 634869e4df15db6589184aee37a4929857d14b8c (diff) | |
download | chromium_src-07e92a5667bfa8b18e3681edcbbdb7df2f7ae454.zip chromium_src-07e92a5667bfa8b18e3681edcbbdb7df2f7ae454.tar.gz chromium_src-07e92a5667bfa8b18e3681edcbbdb7df2f7ae454.tar.bz2 |
Pull DomDistillerOptions up to the DistillerFactory
This allows the creator of the DistillerFactory to set the
DomDistillerOptions for all pages distilled by Distillers from that
factory.
Add an option to the content_extractor to extract just the text from the
page.
DEPENDSON= https://codereview.chromium.org/270663005/ https://codereview.chromium.org/286453002/
Review URL: https://codereview.chromium.org/286583002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@272431 0039d316-1c4b-4281-b951-d872f2087c98
10 files changed, 92 insertions, 34 deletions
diff --git a/chrome/browser/dom_distiller/dom_distiller_service_factory.cc b/chrome/browser/dom_distiller/dom_distiller_service_factory.cc index 035b8f4..92ae35c 100644 --- a/chrome/browser/dom_distiller/dom_distiller_service_factory.cc +++ b/chrome/browser/dom_distiller/dom_distiller_service_factory.cc @@ -63,7 +63,8 @@ KeyedService* DomDistillerServiceFactory::BuildServiceInstanceFor( scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( new DistillerURLFetcherFactory(profile->GetRequestContext())); scoped_ptr<DistillerFactory> distiller_factory( - new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass())); + new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(), + dom_distiller::proto::DomDistillerOptions())); DomDistillerContextKeyedService* service = new DomDistillerContextKeyedService( diff --git a/components/dom_distiller.gypi b/components/dom_distiller.gypi index a86746f..092a26a 100644 --- a/components/dom_distiller.gypi +++ b/components/dom_distiller.gypi @@ -48,6 +48,7 @@ ], 'export_dependent_settings': [ 'distilled_page_proto', + '../third_party/dom_distiller_js/dom_distiller_js.gyp:dom_distiller_js_proto', ], 'sources': [ 'dom_distiller/android/component_jni_registrar.cc', diff --git a/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc index 72b572f..5d1cd2a 100644 --- a/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc +++ b/components/dom_distiller/content/distiller_page_web_contents_browsertest.cc @@ -36,6 +36,7 @@ class DistillerPageWebContentsTest : public ContentBrowserTest { quit_closure_ = quit_closure; distiller_page_->DistillPage( embedded_test_server()->GetURL(url), + dom_distiller::proto::DomDistillerOptions(), base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished, this)); } diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc index 070e681..472b453 100644 --- a/components/dom_distiller/core/distiller.cc +++ b/components/dom_distiller/core/distiller.cc @@ -29,15 +29,17 @@ const size_t kMaxPagesInArticle = 32; namespace dom_distiller { DistillerFactoryImpl::DistillerFactoryImpl( - scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) - : distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) { + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory, + const dom_distiller::proto::DomDistillerOptions& dom_distiller_options) + : distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()), + dom_distiller_options_(dom_distiller_options) { } DistillerFactoryImpl::~DistillerFactoryImpl() {} scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { - scoped_ptr<DistillerImpl> distiller( - new DistillerImpl(*distiller_url_fetcher_factory_)); + scoped_ptr<DistillerImpl> distiller(new DistillerImpl( + *distiller_url_fetcher_factory_, dom_distiller_options_)); return distiller.PassAs<Distiller>(); } @@ -46,8 +48,10 @@ DistillerImpl::DistilledPageData::DistilledPageData() {} DistillerImpl::DistilledPageData::~DistilledPageData() {} DistillerImpl::DistillerImpl( - const DistillerURLFetcherFactory& distiller_url_fetcher_factory) + const DistillerURLFetcherFactory& distiller_url_fetcher_factory, + const dom_distiller::proto::DomDistillerOptions& dom_distiller_options) : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), + dom_distiller_options_(dom_distiller_options), max_pages_in_article_(kMaxPagesInArticle), destruction_allowed_(true), weak_factory_(this) { @@ -120,6 +124,7 @@ void DistillerImpl::DistillNextPage() { started_pages_index_[page_num] = pages_.size() - 1; distiller_page_->DistillPage( url, + dom_distiller_options_, base::Bind(&DistillerImpl::OnPageDistillationFinished, weak_factory_.GetWeakPtr(), page_num, diff --git a/components/dom_distiller/core/distiller.h b/components/dom_distiller/core/distiller.h index 106967b..648949e 100644 --- a/components/dom_distiller/core/distiller.h +++ b/components/dom_distiller/core/distiller.h @@ -56,19 +56,22 @@ class DistillerFactory { class DistillerFactoryImpl : public DistillerFactory { public: DistillerFactoryImpl( - scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); + scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory, + const dom_distiller::proto::DomDistillerOptions& dom_distiller_options); virtual ~DistillerFactoryImpl(); virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; private: scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; + dom_distiller::proto::DomDistillerOptions dom_distiller_options_; }; // Distills a article from a page and associated pages. class DistillerImpl : public Distiller { public: DistillerImpl( - const DistillerURLFetcherFactory& distiller_url_fetcher_factory); + const DistillerURLFetcherFactory& distiller_url_fetcher_factory, + const dom_distiller::proto::DomDistillerOptions& dom_distiller_options); virtual ~DistillerImpl(); virtual void DistillPage( @@ -147,6 +150,8 @@ class DistillerImpl : public Distiller { const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; scoped_ptr<DistillerPage> distiller_page_; + + dom_distiller::proto::DomDistillerOptions dom_distiller_options_; DistillationFinishedCallback finished_cb_; DistillationUpdateCallback update_cb_; diff --git a/components/dom_distiller/core/distiller_page.cc b/components/dom_distiller/core/distiller_page.cc index ceee2a8..a44531c 100644 --- a/components/dom_distiller/core/distiller_page.cc +++ b/components/dom_distiller/core/distiller_page.cc @@ -58,14 +58,15 @@ DistillerPage::DistillerPage() : ready_(true) {} DistillerPage::~DistillerPage() {} -void DistillerPage::DistillPage(const GURL& gurl, - const DistillerPageCallback& callback) { +void DistillerPage::DistillPage( + const GURL& gurl, + const dom_distiller::proto::DomDistillerOptions options, + const DistillerPageCallback& callback) { DCHECK(ready_); // It is only possible to distill one page at a time. |ready_| is reset when // the callback to OnDistillationDone happens. ready_ = false; distiller_page_callback_ = callback; - dom_distiller::proto::DomDistillerOptions options; DistillPageImpl(gurl, GetDistillerScriptWithOptions(options)); } diff --git a/components/dom_distiller/core/distiller_page.h b/components/dom_distiller/core/distiller_page.h index a30d5e7..429a676 100644 --- a/components/dom_distiller/core/distiller_page.h +++ b/components/dom_distiller/core/distiller_page.h @@ -11,6 +11,7 @@ #include "base/memory/scoped_ptr.h" #include "base/memory/weak_ptr.h" #include "base/values.h" +#include "third_party/dom_distiller_js/dom_distiller.pb.h" #include "url/gurl.h" namespace dom_distiller { @@ -44,9 +45,11 @@ class DistillerPage { // Loads a URL. |OnDistillationDone| is called when the load completes or // fails. May be called when the distiller is idle. Callers can assume that, - // for a given |url|, any DistillerPage implementation will extract the same - // content. - void DistillPage(const GURL& url, const DistillerPageCallback& callback); + // for a given |url| and |options|, any DistillerPage implementation will + // extract the same content. + void DistillPage(const GURL& url, + const dom_distiller::proto::DomDistillerOptions options, + const DistillerPageCallback& callback); // Called when the JavaScript execution completes. |page_url| is the url of // the distilled page. |value| contains data returned by the script. diff --git a/components/dom_distiller/core/distiller_unittest.cc b/components/dom_distiller/core/distiller_unittest.cc index 7157e71..109912f 100644 --- a/components/dom_distiller/core/distiller_unittest.cc +++ b/components/dom_distiller/core/distiller_unittest.cc @@ -12,6 +12,7 @@ #include "base/location.h" #include "base/memory/scoped_ptr.h" #include "base/message_loop/message_loop.h" +#include "base/path_service.h" #include "base/strings/string_number_conversions.h" #include "base/values.h" #include "components/dom_distiller/core/article_distillation_update.h" @@ -25,6 +26,7 @@ #include "testing/gtest/include/gtest/gtest.h" #include "third_party/dom_distiller_js/dom_distiller.pb.h" #include "third_party/dom_distiller_js/dom_distiller_json_converter.h" +#include "ui/base/resource/resource_bundle.h" using std::vector; using std::string; @@ -32,6 +34,9 @@ using ::testing::Invoke; using ::testing::Return; using ::testing::_; +using dom_distiller::proto::DomDistillerOptions; +using dom_distiller::proto::DomDistillerResult; + namespace { const char kTitle[] = "Title"; const char kContent[] = "Content"; @@ -51,7 +56,7 @@ scoped_ptr<base::Value> CreateDistilledValueReturnedFromJS( const vector<int>& image_indices, const string& next_page_url, const string& prev_page_url = "") { - dom_distiller::proto::DomDistillerResult result; + DomDistillerResult result; result.set_title(title); result.mutable_distilled_content()->set_html(content); result.mutable_pagination_info()->set_next_page(next_page_url); @@ -181,6 +186,15 @@ void VerifyArticleProtoMatchesMultipageData( } } +void AddComponentsResources() { + base::FilePath pak_file; + base::FilePath pak_dir; + PathService::Get(base::DIR_MODULE, &pak_dir); + pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak")); + ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath( + pak_file, ui::SCALE_FACTOR_NONE); +} + } // namespace namespace dom_distiller { @@ -198,7 +212,7 @@ class TestDistillerURLFetcher : public DistillerURLFetcher { virtual void FetchURL(const string& url, const URLFetcherCallback& callback) OVERRIDE { - DCHECK(callback_.is_null()); + ASSERT_FALSE(callback.is_null()); url_ = url; callback_ = callback; if (!delay_fetch_) { @@ -208,6 +222,7 @@ class TestDistillerURLFetcher : public DistillerURLFetcher { void PostCallbackTask() { ASSERT_TRUE(base::MessageLoop::current()); + ASSERT_FALSE(callback_.is_null()); base::MessageLoop::current()->PostTask( FROM_HERE, base::Bind(callback_, responses_[url_])); } @@ -241,6 +256,10 @@ class DistillerTest : public testing::Test { public: virtual ~DistillerTest() {} + virtual void SetUp() OVERRIDE { + AddComponentsResources(); + } + void OnDistillArticleDone(scoped_ptr<DistilledArticleProto> proto) { article_proto_ = proto.Pass(); } @@ -311,7 +330,8 @@ TEST_F(DistillerTest, DistillPage) { base::MessageLoopForUI loop; scoped_ptr<base::Value> result = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), ""); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL)).Pass()); base::MessageLoop::current()->RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); @@ -328,7 +348,8 @@ TEST_F(DistillerTest, DistillPageWithImages) { image_indices.push_back(1); scoped_ptr<base::Value> result = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL)).Pass()); base::MessageLoop::current()->RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); @@ -362,7 +383,8 @@ TEST_F(DistillerTest, DistillMultiplePages) { distiller_data->image_ids.push_back(image_indices); } - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage( distiller_data->page_urls[0], CreateMockDistillerPages(distiller_data.get(), kNumPages, 0).Pass()); @@ -377,7 +399,8 @@ TEST_F(DistillerTest, DistillLinkLoop) { // happen if javascript misparses a next page link. scoped_ptr<base::Value> result = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), kURL); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL)).Pass()); base::MessageLoop::current()->RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); @@ -404,7 +427,8 @@ TEST_F(DistillerTest, CheckMaxPageLimitExtraPage) { distiller_data->distilled_values.pop_back(); distiller_data->distilled_values.push_back(last_page_data.release()); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle); @@ -423,7 +447,8 @@ TEST_F(DistillerTest, CheckMaxPageLimitExactLimit) { scoped_ptr<MultipageDistillerData> distiller_data = CreateMultipageDistillerDataWithoutImages(kMaxPagesInArticle); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); // Check if distilling an article with exactly the page limit works. distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle); @@ -441,7 +466,8 @@ TEST_F(DistillerTest, SinglePageDistillationFailure) { base::MessageLoopForUI loop; // To simulate failure return a null value. scoped_ptr<base::Value> nullValue(base::Value::CreateNullValue()); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(kURL, CreateMockDistillerPage(nullValue.get(), GURL(kURL)).Pass()); base::MessageLoop::current()->RunUntilIdle(); @@ -464,7 +490,8 @@ TEST_F(DistillerTest, MultiplePagesDistillationFailure) { distiller_data->distilled_values.begin() + failed_page_num, base::Value::CreateNullValue()); // Expect only calls till the failed page number. - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(distiller_data->page_urls[0], CreateMockDistillerPages( distiller_data.get(), failed_page_num + 1, 0).Pass()); @@ -483,7 +510,8 @@ TEST_F(DistillerTest, DistillPreviousPage) { scoped_ptr<MultipageDistillerData> distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(distiller_data->page_urls[start_page_num], CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num).Pass()); @@ -501,7 +529,8 @@ TEST_F(DistillerTest, IncrementalUpdates) { scoped_ptr<MultipageDistillerData> distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(distiller_data->page_urls[start_page_num], CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num).Pass()); @@ -521,7 +550,8 @@ TEST_F(DistillerTest, IncrementalUpdatesDoNotDeleteFinalArticle) { scoped_ptr<MultipageDistillerData> distiller_data = CreateMultipageDistillerDataWithoutImages(kNumPages); - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(distiller_data->page_urls[start_page_num], CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num).Pass()); @@ -543,7 +573,8 @@ TEST_F(DistillerTest, DeletingArticleDoesNotInterfereWithUpdates) { // The page number of the article on which distillation starts. int start_page_num = 3; - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(distiller_data->page_urls[start_page_num], CreateMockDistillerPages( distiller_data.get(), kNumPages, start_page_num).Pass()); @@ -565,10 +596,11 @@ TEST_F(DistillerTest, CancelWithDelayedImageFetchCallback) { scoped_ptr<base::Value> distilled_value = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); TestDistillerURLFetcher* delayed_fetcher = new TestDistillerURLFetcher(true); - MockDistillerURLFetcherFactory url_fetcher_factory; - EXPECT_CALL(url_fetcher_factory, CreateDistillerURLFetcher()) + MockDistillerURLFetcherFactory mock_url_fetcher_factory; + EXPECT_CALL(mock_url_fetcher_factory, CreateDistillerURLFetcher()) .WillOnce(Return(delayed_fetcher)); - distiller_.reset(new DistillerImpl(url_fetcher_factory)); + distiller_.reset( + new DistillerImpl(mock_url_fetcher_factory, DomDistillerOptions())); DistillPage( kURL, CreateMockDistillerPage(distilled_value.get(), GURL(kURL)).Pass()); base::MessageLoop::current()->RunUntilIdle(); @@ -585,7 +617,8 @@ TEST_F(DistillerTest, CancelWithDelayedJSCallback) { scoped_ptr<base::Value> distilled_value = CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), ""); MockDistillerPage* distiller_page = NULL; - distiller_.reset(new DistillerImpl(url_fetcher_factory_)); + distiller_.reset( + new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); DistillPage(kURL, CreateMockDistillerPageWithPendingJSCallback(&distiller_page, GURL(kURL))); diff --git a/components/dom_distiller/standalone/content_extractor.cc b/components/dom_distiller/standalone/content_extractor.cc index 760b165..dd69495 100644 --- a/components/dom_distiller/standalone/content_extractor.cc +++ b/components/dom_distiller/standalone/content_extractor.cc @@ -22,6 +22,7 @@ #include "content/public/test/content_browser_test.h" #include "content/shell/browser/shell.h" #include "net/dns/mock_host_resolver.h" +#include "third_party/dom_distiller_js/dom_distiller.pb.h" #include "ui/base/resource/resource_bundle.h" using content::ContentBrowserTest; @@ -43,6 +44,8 @@ const char* kOutputFile = "output-file"; // output. const char* kShouldOutputBinary = "output-binary"; +const char* kExtractTextOnly = "extract-text-only"; + scoped_ptr<DomDistillerService> CreateDomDistillerService( content::BrowserContext* context, const base::FilePath& db_path) { @@ -61,8 +64,13 @@ scoped_ptr<DomDistillerService> CreateDomDistillerService( new DistillerPageWebContentsFactory(context)); scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( new DistillerURLFetcherFactory(context->GetRequestContext())); + + dom_distiller::proto::DomDistillerOptions options; + if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { + options.set_extract_text_only(true); + } scoped_ptr<DistillerFactory> distiller_factory( - new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass())); + new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(), options)); return scoped_ptr<DomDistillerService>(new DomDistillerService( dom_distiller_store.PassAs<DomDistillerStoreInterface>(), diff --git a/third_party/dom_distiller_js/dom_distiller_js.gyp b/third_party/dom_distiller_js/dom_distiller_js.gyp index 1e6641b..aaa96ed 100644 --- a/third_party/dom_distiller_js/dom_distiller_js.gyp +++ b/third_party/dom_distiller_js/dom_distiller_js.gyp @@ -11,7 +11,7 @@ 'proto_in_dir': 'package/proto', 'proto_out_dir': 'third_party/dom_distiller_js', }, - 'all_dependent_settings': { + 'direct_dependent_settings': { 'include_dirs': ['package/proto_gen'], }, 'includes': [ '../../build/protoc.gypi', ], |