summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwychen <wychen@chromium.org>2016-02-18 13:51:31 -0800
committerCommit bot <commit-bot@chromium.org>2016-02-18 21:53:45 +0000
commite8ef9f9ef375bd1a0cbe43ee0327a20c122ca781 (patch)
tree848ec1cae8bfb6c33a830f16a8fd202f5b86a37a
parent4300f52e922e14a8f94f59ac55e86e640e9e7799 (diff)
downloadchromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.zip
chromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.tar.gz
chromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.tar.bz2
Support blacklisting domains for Reader Mode
The contents on some domains are mostly certainly not suitable for DOM distiller to extract, and the triggering logic sometimes has false positives. BUG=587974 Review URL: https://codereview.chromium.org/1705323004 Cr-Commit-Position: refs/heads/master@{#376281}
-rw-r--r--components/dom_distiller/content/renderer/distillability_agent.cc17
1 files changed, 16 insertions, 1 deletions
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
index 99592ef..660985e 100644
--- a/components/dom_distiller/content/renderer/distillability_agent.cc
+++ b/components/dom_distiller/content/renderer/distillability_agent.cc
@@ -23,6 +23,10 @@ using namespace blink;
namespace {
+const char* const kBlacklist[] = {
+ "www.reddit.com"
+};
+
// Returns whether it is necessary to send updates back to the browser.
// The number of updates can be from 0 to 2. See the tests in
// "distillable_page_utils_browsertest.cc".
@@ -50,6 +54,15 @@ bool IsLast(bool is_loaded) {
return true;
}
+bool IsBlacklisted(const GURL& url) {
+ for (size_t i = 0; i < arraysize(kBlacklist); ++i) {
+ if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool IsDistillablePageAdaboost(WebDocument& doc,
const DistillablePageDetector* detector,
bool is_last) {
@@ -68,6 +81,7 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
features.mozScoreAllSqrt,
features.mozScoreAllLinear
));
+ bool blacklisted = IsBlacklisted(parsed_url);
int bucket = static_cast<unsigned>(features.isMobileFriendly) |
(static_cast<unsigned>(distillable) << 1);
@@ -78,7 +92,8 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing",
bucket, 4);
}
- return distillable && (!features.isMobileFriendly);
+
+ return distillable && (!features.isMobileFriendly) && (!blacklisted);
}
bool IsDistillablePage(WebDocument& doc, bool is_last) {