summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--build/ios/grit_whitelist.txt2
-rw-r--r--components/BUILD.gn1
-rw-r--r--components/components_tests.gyp1
-rw-r--r--components/dom_distiller.gypi2
-rw-r--r--components/dom_distiller/content/BUILD.gn2
-rw-r--r--components/dom_distiller/content/distillable_page_utils.cc74
-rw-r--r--components/dom_distiller/content/distillable_page_utils.h25
-rw-r--r--components/dom_distiller/content/distillable_page_utils_browsertest.cc169
-rw-r--r--components/dom_distiller/core/data/distillable_page_model.binbin0 -> 2204 bytes
-rw-r--r--components/dom_distiller/core/distillable_page_detector.cc20
-rw-r--r--components/dom_distiller/core/distillable_page_detector.h1
-rw-r--r--components/dom_distiller/core/distillable_page_detector_unittest.cc12
-rw-r--r--components/dom_distiller/core/javascript/extract_features.js28
-rw-r--r--components/dom_distiller/core/page_features.cc16
-rw-r--r--components/dom_distiller/core/page_features.h6
-rw-r--r--components/dom_distiller/core/page_features_unittest.cc9
-rw-r--r--components/resources/dom_distiller_resources.grdp2
-rw-r--r--components/test/data/dom_distiller/non_og_article.html8
-rw-r--r--components/test/data/dom_distiller/og_article.html9
19 files changed, 383 insertions, 4 deletions
diff --git a/build/ios/grit_whitelist.txt b/build/ios/grit_whitelist.txt
index d1cea6d..20bd369 100644
--- a/build/ios/grit_whitelist.txt
+++ b/build/ios/grit_whitelist.txt
@@ -17,10 +17,12 @@ IDR_DEFAULT_FAVICON
IDR_DEFAULT_FAVICON_32
IDR_DEFAULT_FAVICON_64
IDR_DIR_HEADER_HTML
+IDR_DISTILLABLE_PAGE_SERIALIZED_MODEL
IDR_DISTILLER_CSS
IDR_DISTILLER_JS
IDR_DOM_DISTILLER_VIEWER_HTML
IDR_DOM_DISTILLER_VIEWER_JS
+IDR_EXTRACT_PAGE_FEATURES_JS
IDR_FLAGS_FAVICON
IDR_FLAGS_HTML
IDR_FLAGS_JS
diff --git a/components/BUILD.gn b/components/BUILD.gn
index 8716abb..93aa824 100644
--- a/components/BUILD.gn
+++ b/components/BUILD.gn
@@ -314,6 +314,7 @@ test("components_browsertests") {
sources = [
"autofill/content/browser/risk/fingerprint_browsertest.cc",
"autofill/content/renderer/password_form_conversion_utils_browsertest.cc",
+ "dom_distiller/content/distillable_page_utils_browsertest.cc",
"dom_distiller/content/distiller_page_web_contents_browsertest.cc",
"password_manager/content/renderer/credential_manager_client_browsertest.cc",
]
diff --git a/components/components_tests.gyp b/components/components_tests.gyp
index bbd0e76..46fa9b5 100644
--- a/components/components_tests.gyp
+++ b/components/components_tests.gyp
@@ -1155,6 +1155,7 @@
'sources': [
'autofill/content/browser/risk/fingerprint_browsertest.cc',
'autofill/content/renderer/password_form_conversion_utils_browsertest.cc',
+ 'dom_distiller/content/distillable_page_utils_browsertest.cc',
'dom_distiller/content/distiller_page_web_contents_browsertest.cc',
'password_manager/content/renderer/credential_manager_client_browsertest.cc',
],
diff --git a/components/dom_distiller.gypi b/components/dom_distiller.gypi
index 9f64e30..759fe080 100644
--- a/components/dom_distiller.gypi
+++ b/components/dom_distiller.gypi
@@ -176,6 +176,8 @@
'..',
],
'sources': [
+ 'dom_distiller/content/distillable_page_utils.cc',
+ 'dom_distiller/content/distillable_page_utils.h',
'dom_distiller/content/distiller_page_web_contents.cc',
'dom_distiller/content/distiller_page_web_contents.h',
'dom_distiller/content/dom_distiller_viewer_source.cc',
diff --git a/components/dom_distiller/content/BUILD.gn b/components/dom_distiller/content/BUILD.gn
index 35af9dd..61dd8e6 100644
--- a/components/dom_distiller/content/BUILD.gn
+++ b/components/dom_distiller/content/BUILD.gn
@@ -6,6 +6,8 @@ if (!is_ios) {
# GYP version: components/dom_distiller.gypi:dom_distiller_content
static_library("content") {
sources = [
+ "distillable_page_utils.cc",
+ "distillable_page_utils.h",
"distiller_page_web_contents.cc",
"distiller_page_web_contents.h",
"dom_distiller_viewer_source.cc",
diff --git a/components/dom_distiller/content/distillable_page_utils.cc b/components/dom_distiller/content/distillable_page_utils.cc
new file mode 100644
index 0000000..7f1b3cc
--- /dev/null
+++ b/components/dom_distiller/content/distillable_page_utils.cc
@@ -0,0 +1,74 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/dom_distiller/content/distillable_page_utils.h"
+
+#include "base/bind.h"
+#include "base/message_loop/message_loop.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/values.h"
+#include "components/dom_distiller/core/distillable_page_detector.h"
+#include "components/dom_distiller/core/page_features.h"
+#include "content/public/browser/render_frame_host.h"
+#include "grit/components_resources.h"
+#include "ui/base/resource/resource_bundle.h"
+
+namespace dom_distiller {
+namespace {
+void OnOGArticleJsResult(base::Callback<void(bool)> callback,
+ const base::Value* result) {
+ bool success = false;
+ if (result) {
+ result->GetAsBoolean(&success);
+ }
+ callback.Run(success);
+}
+
+void OnExtractFeaturesJsResult(const DistillablePageDetector* detector,
+ base::Callback<void(bool)> callback,
+ const base::Value* result) {
+ callback.Run(detector->Classify(CalculateDerivedFeaturesFromJSON(result)));
+}
+} // namespace
+
+void IsOpenGraphArticle(content::WebContents* web_contents,
+ base::Callback<void(bool)> callback) {
+ content::RenderFrameHost* main_frame = web_contents->GetMainFrame();
+ if (!main_frame) {
+ base::MessageLoop::current()->PostTask(FROM_HERE,
+ base::Bind(callback, false));
+ return;
+ }
+ std::string og_article_js = ResourceBundle::GetSharedInstance()
+ .GetRawDataResource(IDR_IS_DISTILLABLE_JS)
+ .as_string();
+ main_frame->ExecuteJavaScript(base::UTF8ToUTF16(og_article_js),
+ base::Bind(OnOGArticleJsResult, callback));
+}
+
+void IsDistillablePage(content::WebContents* web_contents,
+ base::Callback<void(bool)> callback) {
+ IsDistillablePageForDetector(web_contents,
+ DistillablePageDetector::GetDefault(), callback);
+}
+
+void IsDistillablePageForDetector(content::WebContents* web_contents,
+ const DistillablePageDetector* detector,
+ base::Callback<void(bool)> callback) {
+ content::RenderFrameHost* main_frame = web_contents->GetMainFrame();
+ if (!main_frame) {
+ base::MessageLoop::current()->PostTask(FROM_HERE,
+ base::Bind(callback, false));
+ return;
+ }
+ std::string extract_features_js =
+ ResourceBundle::GetSharedInstance()
+ .GetRawDataResource(IDR_EXTRACT_PAGE_FEATURES_JS)
+ .as_string();
+ main_frame->ExecuteJavaScript(
+ base::UTF8ToUTF16(extract_features_js),
+ base::Bind(OnExtractFeaturesJsResult, detector, callback));
+}
+
+} // namespace dom_distiller
diff --git a/components/dom_distiller/content/distillable_page_utils.h b/components/dom_distiller/content/distillable_page_utils.h
new file mode 100644
index 0000000..95216bd
--- /dev/null
+++ b/components/dom_distiller/content/distillable_page_utils.h
@@ -0,0 +1,25 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLABLE_PAGE_UTILS_H_
+#define COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLABLE_PAGE_UTILS_H_
+
+#include "base/callback.h"
+#include "content/public/browser/web_contents.h"
+
+namespace dom_distiller {
+
+class DistillablePageDetector;
+
+void IsOpenGraphArticle(content::WebContents* web_contents,
+ base::Callback<void(bool)> callback);
+void IsDistillablePage(content::WebContents* web_contents,
+ base::Callback<void(bool)> callback);
+// The passed detector must be alive until after the callback is called.
+void IsDistillablePageForDetector(content::WebContents* web_contents,
+ const DistillablePageDetector* detector,
+ base::Callback<void(bool)> callback);
+}
+
+#endif // COMPONENTS_DOM_DISTILLER_CONTENT_DISTILLABLE_PAGE_UTILS_H_
diff --git a/components/dom_distiller/content/distillable_page_utils_browsertest.cc b/components/dom_distiller/content/distillable_page_utils_browsertest.cc
new file mode 100644
index 0000000..9525982
--- /dev/null
+++ b/components/dom_distiller/content/distillable_page_utils_browsertest.cc
@@ -0,0 +1,169 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/bind.h"
+#include "base/path_service.h"
+#include "base/run_loop.h"
+#include "components/dom_distiller/content/distillable_page_utils.h"
+#include "components/dom_distiller/core/distillable_page_detector.h"
+#include "components/dom_distiller/core/page_features.h"
+#include "content/public/browser/browser_context.h"
+#include "content/public/browser/render_frame_host.h"
+#include "content/public/browser/web_contents_observer.h"
+#include "content/public/test/content_browser_test.h"
+#include "content/shell/browser/shell.h"
+#include "net/test/embedded_test_server/embedded_test_server.h"
+#include "ui/base/resource/resource_bundle.h"
+
+namespace dom_distiller {
+namespace {
+
+const char* kArticlePath = "/og_article.html";
+const char* kNonArticlePath = "/non_og_article.html";
+
+class DomDistillerDistillablePageUtilsTest : public content::ContentBrowserTest,
+ content::WebContentsObserver {
+ public:
+ void SetUpOnMainThread() override {
+ AddComponentsResources();
+ SetUpTestServer();
+ ContentBrowserTest::SetUpOnMainThread();
+ }
+
+ void LoadURL(const std::string& url) {
+ content::WebContents* current_web_contents = shell()->web_contents();
+ Observe(current_web_contents);
+ base::RunLoop url_loaded_runner;
+ main_frame_loaded_callback_ = url_loaded_runner.QuitClosure();
+ current_web_contents->GetController().LoadURL(
+ embedded_test_server()->GetURL(url),
+ content::Referrer(),
+ ui::PAGE_TRANSITION_TYPED,
+ std::string());
+ url_loaded_runner.Run();
+ main_frame_loaded_callback_ = base::Closure();
+ Observe(nullptr);
+ }
+
+ private:
+ void AddComponentsResources() {
+ base::FilePath pak_file;
+ base::FilePath pak_dir;
+ PathService::Get(base::DIR_MODULE, &pak_dir);
+ pak_file =
+ pak_dir.Append(FILE_PATH_LITERAL("components_tests_resources.pak"));
+ ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
+ pak_file, ui::SCALE_FACTOR_NONE);
+ }
+
+ void SetUpTestServer() {
+ base::FilePath path;
+ PathService::Get(base::DIR_SOURCE_ROOT, &path);
+ path = path.AppendASCII("components/test/data/dom_distiller");
+ embedded_test_server()->ServeFilesFromDirectory(path);
+ ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
+ }
+
+ void DocumentLoadedInFrame(
+ content::RenderFrameHost* render_frame_host) override {
+ if (!render_frame_host->GetParent())
+ main_frame_loaded_callback_.Run();
+ }
+
+ base::Closure main_frame_loaded_callback_;
+};
+
+class ResultHolder {
+ public:
+ ResultHolder(base::Closure callback) : callback_(callback) {}
+
+ void OnResult(bool result) {
+ result_ = result;
+ callback_.Run();
+ }
+
+ bool GetResult() {
+ return result_;
+ }
+
+ base::Callback<void(bool)> GetCallback() {
+ return base::Bind(&ResultHolder::OnResult, base::Unretained(this));
+ }
+
+ private:
+ base::Closure callback_;
+ bool result_;
+};
+
+} // namespace
+
+IN_PROC_BROWSER_TEST_F(DomDistillerDistillablePageUtilsTest, TestIsOGArticle) {
+ LoadURL(kArticlePath);
+ base::RunLoop run_loop_;
+ ResultHolder holder(run_loop_.QuitClosure());
+ IsOpenGraphArticle(shell()->web_contents(), holder.GetCallback());
+ run_loop_.Run();
+ ASSERT_TRUE(holder.GetResult());
+}
+
+IN_PROC_BROWSER_TEST_F(DomDistillerDistillablePageUtilsTest,
+ TestIsNotOGArticle) {
+ LoadURL(kNonArticlePath);
+ base::RunLoop run_loop_;
+ ResultHolder holder(run_loop_.QuitClosure());
+ IsOpenGraphArticle(shell()->web_contents(), holder.GetCallback());
+ run_loop_.Run();
+ ASSERT_FALSE(holder.GetResult());
+}
+
+IN_PROC_BROWSER_TEST_F(DomDistillerDistillablePageUtilsTest,
+ TestIsDistillablePage) {
+ scoped_ptr<AdaBoostProto> proto(new AdaBoostProto);
+ proto->set_num_features(kDerivedFeaturesCount);
+ proto->set_num_stumps(1);
+
+ StumpProto* stump = proto->add_stump();
+ stump->set_feature_number(0);
+ stump->set_weight(1);
+ stump->set_split(-1);
+ scoped_ptr<DistillablePageDetector> detector(
+ new DistillablePageDetector(proto.Pass()));
+ EXPECT_DOUBLE_EQ(0.5, detector->GetThreshold());
+ // The first value of the first feature is either 0 or 1. Since the stump's
+ // split is -1, the stump weight will be applied to any set of derived
+ // features.
+ LoadURL(kArticlePath);
+ base::RunLoop run_loop_;
+ ResultHolder holder(run_loop_.QuitClosure());
+ IsDistillablePageForDetector(shell()->web_contents(), detector.get(),
+ holder.GetCallback());
+ run_loop_.Run();
+ ASSERT_TRUE(holder.GetResult());
+}
+
+IN_PROC_BROWSER_TEST_F(DomDistillerDistillablePageUtilsTest,
+ TestIsNotDistillablePage) {
+ scoped_ptr<AdaBoostProto> proto(new AdaBoostProto);
+ proto->set_num_features(kDerivedFeaturesCount);
+ proto->set_num_stumps(1);
+ StumpProto* stump = proto->add_stump();
+ stump->set_feature_number(0);
+ stump->set_weight(-1);
+ stump->set_split(-1);
+ scoped_ptr<DistillablePageDetector> detector(
+ new DistillablePageDetector(proto.Pass()));
+ EXPECT_DOUBLE_EQ(-0.5, detector->GetThreshold());
+ // The first value of the first feature is either 0 or 1. Since the stump's
+ // split is -1, the stump weight will be applied to any set of derived
+ // features.
+ LoadURL(kArticlePath);
+ base::RunLoop run_loop_;
+ ResultHolder holder(run_loop_.QuitClosure());
+ IsDistillablePageForDetector(shell()->web_contents(), detector.get(),
+ holder.GetCallback());
+ run_loop_.Run();
+ ASSERT_FALSE(holder.GetResult());
+}
+
+} // namespace dom_distiller
diff --git a/components/dom_distiller/core/data/distillable_page_model.bin b/components/dom_distiller/core/data/distillable_page_model.bin
new file mode 100644
index 0000000..39d02eb
--- /dev/null
+++ b/components/dom_distiller/core/data/distillable_page_model.bin
Binary files differ
diff --git a/components/dom_distiller/core/distillable_page_detector.cc b/components/dom_distiller/core/distillable_page_detector.cc
index d11e042..aa7ddcf 100644
--- a/components/dom_distiller/core/distillable_page_detector.cc
+++ b/components/dom_distiller/core/distillable_page_detector.cc
@@ -5,9 +5,25 @@
#include "components/dom_distiller/core/distillable_page_detector.h"
#include "base/logging.h"
+#include "grit/components_resources.h"
+#include "ui/base/resource/resource_bundle.h"
namespace dom_distiller {
+const DistillablePageDetector* DistillablePageDetector::GetDefault() {
+ static DistillablePageDetector* detector = nullptr;
+ if (!detector) {
+ std::string serialized_proto =
+ ResourceBundle::GetSharedInstance()
+ .GetRawDataResource(IDR_DISTILLABLE_PAGE_SERIALIZED_MODEL)
+ .as_string();
+ scoped_ptr<AdaBoostProto> proto(new AdaBoostProto);
+ CHECK(proto->ParseFromString(serialized_proto));
+ detector = new DistillablePageDetector(proto.Pass());
+ }
+ return detector;
+}
+
DistillablePageDetector::DistillablePageDetector(
scoped_ptr<AdaBoostProto> proto)
: proto_(proto.Pass()), threshold_(0.0) {
@@ -30,7 +46,9 @@ bool DistillablePageDetector::Classify(
double DistillablePageDetector::Score(
const std::vector<double>& features) const {
- CHECK(features.size() == size_t(proto_->num_features()));
+ if (features.size() != size_t(proto_->num_features())) {
+ return 0.0;
+ }
double score = 0.0;
for (int i = 0; i < proto_->num_stumps(); ++i) {
const StumpProto& stump = proto_->stump(i);
diff --git a/components/dom_distiller/core/distillable_page_detector.h b/components/dom_distiller/core/distillable_page_detector.h
index 220d8b0..fbe36bf 100644
--- a/components/dom_distiller/core/distillable_page_detector.h
+++ b/components/dom_distiller/core/distillable_page_detector.h
@@ -18,6 +18,7 @@ namespace dom_distiller {
// model.
class DistillablePageDetector {
public:
+ static const DistillablePageDetector* GetDefault();
explicit DistillablePageDetector(scoped_ptr<AdaBoostProto> proto);
~DistillablePageDetector();
diff --git a/components/dom_distiller/core/distillable_page_detector_unittest.cc b/components/dom_distiller/core/distillable_page_detector_unittest.cc
index 6580fcd..835f8ea 100644
--- a/components/dom_distiller/core/distillable_page_detector_unittest.cc
+++ b/components/dom_distiller/core/distillable_page_detector_unittest.cc
@@ -91,6 +91,18 @@ TEST(DomDistillerDistillablePageDetectorTest, TestScoreAndClassify) {
EXPECT_TRUE(detector->Classify(features));
}
+TEST(DomDistillerDistillablePageDetectorTest, TestScoreWrongNumberFeatures) {
+ scoped_ptr<DistillablePageDetector> detector =
+ Builder().Stump(0, 1.0, 1.0).Stump(0, 1.4, 2.0).Build();
+ EXPECT_DOUBLE_EQ(1.5, detector->GetThreshold());
+
+ std::vector<double> features;
+ EXPECT_DOUBLE_EQ(0.0, detector->Score(features));
+ features.push_back(-3.0);
+ features.push_back(1.0);
+ EXPECT_DOUBLE_EQ(0.0, detector->Score(features));
+}
+
}
diff --git a/components/dom_distiller/core/javascript/extract_features.js b/components/dom_distiller/core/javascript/extract_features.js
new file mode 100644
index 0000000..031254c
--- /dev/null
+++ b/components/dom_distiller/core/javascript/extract_features.js
@@ -0,0 +1,28 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+(function() {
+ function hasOGArticle() {
+ var elems = document.head.querySelectorAll(
+ 'meta[property="og:type"],meta[name="og:type"]');
+ for (var i in elems) {
+ if (elems[i].content && elems[i].content.toUpperCase() == 'ARTICLE') {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ var body = document.body;
+ return JSON.stringify({
+ 'opengraph': hasOGArticle(),
+ 'url': document.location.href,
+ 'numElements': body.querySelectorAll('*').length,
+ 'numAnchors': body.querySelectorAll('a').length,
+ 'numForms': body.querySelectorAll('form').length,
+ 'innerText': body.innerText,
+ 'textContent': body.textContent,
+ 'innerHTML': body.innerHTML,
+ });
+})()
diff --git a/components/dom_distiller/core/page_features.cc b/components/dom_distiller/core/page_features.cc
index 057adbf..0540588 100644
--- a/components/dom_distiller/core/page_features.cc
+++ b/components/dom_distiller/core/page_features.cc
@@ -6,6 +6,7 @@
#include <string>
+#include "base/json/json_reader.h"
#include "third_party/re2/re2/re2.h"
namespace dom_distiller {
@@ -47,6 +48,8 @@ bool EndsWith(const std::string& t, const std::string& s) {
}
}
+int kDerivedFeaturesCount = 29;
+
std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
const GURL& url,
double numElements,
@@ -132,7 +135,18 @@ std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
return features;
}
-std::vector<double> CalculateDerivedFeaturesFromJSON(const base::Value* json) {
+std::vector<double> CalculateDerivedFeaturesFromJSON(
+ const base::Value* stringified_json) {
+ std::string stringified;
+ if (!stringified_json->GetAsString(&stringified)) {
+ return std::vector<double>();
+ }
+
+ scoped_ptr<base::Value> json(base::JSONReader::Read(stringified));
+ if (!json) {
+ return std::vector<double>();
+ }
+
const base::DictionaryValue* dict;
if (!json->GetAsDictionary(&dict)) {
return std::vector<double>();
diff --git a/components/dom_distiller/core/page_features.h b/components/dom_distiller/core/page_features.h
index 919a90a1..236796b 100644
--- a/components/dom_distiller/core/page_features.h
+++ b/components/dom_distiller/core/page_features.h
@@ -12,6 +12,9 @@
namespace dom_distiller {
+// The length of the derived features vector.
+extern int kDerivedFeaturesCount;
+
// The distillable page detector is a model trained on a list of numeric
// features derived from core more complex features of a webpage (like the
// body's .textContent). This derives the numeric features for a set of core
@@ -31,7 +34,8 @@ std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
// Calculates the derived features from the JSON value as returned by the
// javascript core feature extraction.
-std::vector<double> CalculateDerivedFeaturesFromJSON(const base::Value* json);
+std::vector<double> CalculateDerivedFeaturesFromJSON(
+ const base::Value* stringified_json);
} // namespace dom_distiller
diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc
index a863afc..413c55f 100644
--- a/components/dom_distiller/core/page_features_unittest.cc
+++ b/components/dom_distiller/core/page_features_unittest.cc
@@ -9,6 +9,7 @@
#include "base/files/file_util.h"
#include "base/json/json_reader.h"
+#include "base/json/json_writer.h"
#include "base/memory/scoped_ptr.h"
#include "base/path_service.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -66,8 +67,14 @@ TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
base::DictionaryValue* core_features;
ASSERT_TRUE(input_entries->GetDictionary(i, &entry));
ASSERT_TRUE(entry->GetDictionary("features", &core_features));
+ // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified
+ // JSON (and not a base::Value of the JSON itself)
+ std::string stringified_json;
+ ASSERT_TRUE(base::JSONWriter::Write(core_features, &stringified_json));
+ scoped_ptr<base::Value> stringified_value(
+ new base::StringValue(stringified_json));
std::vector<double> derived(
- CalculateDerivedFeaturesFromJSON(core_features));
+ CalculateDerivedFeaturesFromJSON(stringified_value.get()));
ASSERT_EQ(labels.size(), derived.size());
ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry));
diff --git a/components/resources/dom_distiller_resources.grdp b/components/resources/dom_distiller_resources.grdp
index 186bd5e..9e81af2 100644
--- a/components/resources/dom_distiller_resources.grdp
+++ b/components/resources/dom_distiller_resources.grdp
@@ -8,4 +8,6 @@
<include name="IDR_DISTILLER_JS" file="../dom_distiller/core/javascript/domdistiller.js" flattenhtml="true" type="BINDATA" />
<include name="IDR_DISTILLER_CSS" file="../dom_distiller/core/css/distilledpage.css" type="BINDATA" />
<include name="IDR_IS_DISTILLABLE_JS" file="../dom_distiller/core/javascript/is_distillable_trigger.js" type="BINDATA" />
+ <include name="IDR_EXTRACT_PAGE_FEATURES_JS" file="../dom_distiller/core/javascript/extract_features.js" type="BINDATA" />
+ <include name="IDR_DISTILLABLE_PAGE_SERIALIZED_MODEL" file="../dom_distiller/core/data/distillable_page_model.bin" type="BINDATA" />
</grit-part>
diff --git a/components/test/data/dom_distiller/non_og_article.html b/components/test/data/dom_distiller/non_og_article.html
new file mode 100644
index 0000000..928a566
--- /dev/null
+++ b/components/test/data/dom_distiller/non_og_article.html
@@ -0,0 +1,8 @@
+<html>
+<head>
+ <title>Non-opengraph Article Title</title>
+</head>
+<body>
+Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
+</body>
+</html>
diff --git a/components/test/data/dom_distiller/og_article.html b/components/test/data/dom_distiller/og_article.html
new file mode 100644
index 0000000..9a6fbf7
--- /dev/null
+++ b/components/test/data/dom_distiller/og_article.html
@@ -0,0 +1,9 @@
+<html>
+<head prefix="og: http://ogp.me/ns#">
+ <title>Opengraph Article Title</title>
+ <meta property="og:type" content="article"/>
+</head>
+<body>
+Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
+</body>
+</html>