summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/safe_browsing/scorer.h
blob: 6a37c4fcfe7600bcdf2b67a621f0ce58740ea2fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This class loads a client-side model and lets you compute a phishing score
// for a set of previously extracted features.  The phishing score corresponds
// to the probability that the features are indicative of a phishing site.
//
// For more details on how the score is actually computed for a given model
// and a given set of features read the comments in client_model.proto file.
//
// See features.h for a list of features that are currently used.

#ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_
#define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_

#include <stddef.h>
#include <stdint.h>

#include <string>

#include "base/containers/hash_tables.h"
#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "chrome/common/safe_browsing/client_model.pb.h"

namespace safe_browsing {
class FeatureMap;

// Scorer methods are virtual to simplify mocking of this class.
class Scorer {
 public:
  virtual ~Scorer();

  // Factory method which creates a new Scorer object by parsing the given
  // model.  If parsing fails this method returns NULL.
  static Scorer* Create(const base::StringPiece& model_str);

  // This method computes the probability that the given features are indicative
  // of phishing.  It returns a score value that falls in the range [0.0,1.0]
  // (range is inclusive on both ends).
  virtual double ComputeScore(const FeatureMap& features) const;

  // Returns the version number of the loaded client model.
  int model_version() const;

  // -- Accessors used by the page feature extractor ---------------------------

  // Returns a set of hashed page terms that appear in the model in binary
  // format.
  const base::hash_set<std::string>& page_terms() const;

  // Returns a set of hashed page words that appear in the model in binary
  // format.
  const base::hash_set<uint32_t>& page_words() const;

  // Return the maximum number of words per term for the loaded model.
  size_t max_words_per_term() const;

  // Returns the murmurhash3 seed for the loaded model.
  uint32_t murmurhash3_seed() const;

  // Return the maximum number of unique shingle hashes per page.
  size_t max_shingles_per_page() const;

  // Return the number of words in a shingle.
  size_t shingle_size() const;

 protected:
  // Most clients should use the factory method.  This constructor is public
  // to allow for mock implementations.
  Scorer();

 private:
  friend class PhishingScorerTest;

  // Computes the score for a given rule and feature map.  The score is computed
  // by multiplying the rule weight with the product of feature weights for the
  // given rule.  The feature weights are stored in the feature map.  If a
  // particular feature does not exist in the feature map we set its weight to
  // zero.
  double ComputeRuleScore(const ClientSideModel::Rule& rule,
                          const FeatureMap& features) const;

  ClientSideModel model_;
  base::hash_set<std::string> page_terms_;
  base::hash_set<uint32_t> page_words_;

  DISALLOW_COPY_AND_ASSIGN(Scorer);
};
}  // namespace safe_browsing

#endif  // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_