diff options
| author | wychen <wychen@chromium.org> | 2016-02-18 13:51:31 -0800 |
|---|---|---|
| committer | Commit bot <commit-bot@chromium.org> | 2016-02-18 21:53:45 +0000 |
| commit | e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781 (patch) | |
| tree | 848ec1cae8bfb6c33a830f16a8fd202f5b86a37a | |
| parent | 4300f52e922e14a8f94f59ac55e86e640e9e7799 (diff) | |
| download | chromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.zip chromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.tar.gz chromium_src-e8ef9f9ef375bd1a0cbe43ee0327a20c122ca781.tar.bz2 | |
Support blacklisting domains for Reader Mode
The contents on some domains are mostly certainly not suitable for DOM
distiller to extract, and the triggering logic sometimes has false
positives.
BUG=587974
Review URL: https://codereview.chromium.org/1705323004
Cr-Commit-Position: refs/heads/master@{#376281}
| -rw-r--r-- | components/dom_distiller/content/renderer/distillability_agent.cc | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc index 99592ef..660985e 100644 --- a/components/dom_distiller/content/renderer/distillability_agent.cc +++ b/components/dom_distiller/content/renderer/distillability_agent.cc @@ -23,6 +23,10 @@ using namespace blink; namespace { +const char* const kBlacklist[] = { + "www.reddit.com" +}; + // Returns whether it is necessary to send updates back to the browser. // The number of updates can be from 0 to 2. See the tests in // "distillable_page_utils_browsertest.cc". @@ -50,6 +54,15 @@ bool IsLast(bool is_loaded) { return true; } +bool IsBlacklisted(const GURL& url) { + for (size_t i = 0; i < arraysize(kBlacklist); ++i) { + if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { + return true; + } + } + return false; +} + bool IsDistillablePageAdaboost(WebDocument& doc, const DistillablePageDetector* detector, bool is_last) { @@ -68,6 +81,7 @@ bool IsDistillablePageAdaboost(WebDocument& doc, features.mozScoreAllSqrt, features.mozScoreAllLinear )); + bool blacklisted = IsBlacklisted(parsed_url); int bucket = static_cast<unsigned>(features.isMobileFriendly) | (static_cast<unsigned>(distillable) << 1); @@ -78,7 +92,8 @@ bool IsDistillablePageAdaboost(WebDocument& doc, UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", bucket, 4); } - return distillable && (!features.isMobileFriendly); + + return distillable && (!features.isMobileFriendly) && (!blacklisted); } bool IsDistillablePage(WebDocument& doc, bool is_last) { |
