// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ #include #include "base/values.h" #include "url/gurl.h" namespace dom_distiller { // The length of the derived features vector. extern int kDerivedFeaturesCount; // The distillable page detector is a model trained on a list of numeric // features derived from core more complex features of a webpage (like the // body's .textContent). This derives the numeric features for a set of core // features. // // Note: It is crucial that these features are derived in the same way and are // in the same order as in the training pipeline. See //heuristics/distillable // in the external DomDistillerJs repo. std::vector CalculateDerivedFeatures(bool isOGArticle, const GURL& url, double numElements, double numAnchors, double numForms, const std::string& innerText, const std::string& textContent, const std::string& innerHTML); // Calculates the derived features from the JSON value as returned by the // javascript core feature extraction. std::vector CalculateDerivedFeaturesFromJSON( const base::Value* stringified_json); } // namespace dom_distiller #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_