1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/renderer/safe_browsing/scorer.h"
#include <math.h>
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_piece.h"
#include "chrome/common/safe_browsing/client_model.pb.h"
#include "chrome/renderer/safe_browsing/features.h"
namespace {
// Enum used to keep stats about the status of the Scorer creation.
enum ScorerCreationStatus {
SCORER_SUCCESS,
SCORER_FAIL_MODEL_OPEN_FAIL, // Not used anymore
SCORER_FAIL_MODEL_FILE_EMPTY, // Not used anymore
SCORER_FAIL_MODEL_FILE_TOO_LARGE, // Not used anymore
SCORER_FAIL_MODEL_PARSE_ERROR,
SCORER_FAIL_MODEL_MISSING_FIELDS,
SCORER_STATUS_MAX // Always add new values before this one.
};
void RecordScorerCreationStatus(ScorerCreationStatus status) {
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.ScorerCreationStatus",
status,
SCORER_STATUS_MAX);
}
} // namespace
namespace safe_browsing {
// Helper function which converts log odds to a probability in the range
// [0.0,1.0].
static double LogOdds2Prob(double log_odds) {
// 709 = floor(1023*ln(2)). 2**1023 is the largest finite double.
// Small log odds aren't a problem. as the odds will be 0. It's only
// when we get +infinity for the odds, that odds/(odds+1) would be NaN.
if (log_odds >= 709) {
return 1.0;
}
double odds = exp(log_odds);
return odds/(odds+1.0);
}
Scorer::Scorer() {}
Scorer::~Scorer() {}
/* static */
Scorer* Scorer::Create(const base::StringPiece& model_str) {
scoped_ptr<Scorer> scorer(new Scorer());
ClientSideModel& model = scorer->model_;
if (!model.ParseFromArray(model_str.data(), model_str.size())) {
DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is "
<< "invalid.";
RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR);
return NULL;
} else if (!model.IsInitialized()) {
DLOG(ERROR) << "Unable to parse phishing model. The model is missing "
<< "some required fields. Maybe the .proto file changed?";
RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS);
return NULL;
}
RecordScorerCreationStatus(SCORER_SUCCESS);
for (int i = 0; i < model.page_term_size(); ++i) {
scorer->page_terms_.insert(model.hashes(model.page_term(i)));
}
for (int i = 0; i < model.page_word_size(); ++i) {
scorer->page_words_.insert(model.page_word(i));
}
return scorer.release();
}
double Scorer::ComputeScore(const FeatureMap& features) const {
double logodds = 0.0;
for (int i = 0; i < model_.rule_size(); ++i) {
logodds += ComputeRuleScore(model_.rule(i), features);
}
return LogOdds2Prob(logodds);
}
int Scorer::model_version() const {
return model_.version();
}
const base::hash_set<std::string>& Scorer::page_terms() const {
return page_terms_;
}
const base::hash_set<uint32_t>& Scorer::page_words() const {
return page_words_;
}
size_t Scorer::max_words_per_term() const {
return model_.max_words_per_term();
}
uint32_t Scorer::murmurhash3_seed() const {
return model_.murmur_hash_seed();
}
size_t Scorer::max_shingles_per_page() const {
return model_.max_shingles_per_page();
}
size_t Scorer::shingle_size() const {
return model_.shingle_size();
}
double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
const FeatureMap& features) const {
const base::hash_map<std::string, double>& feature_map = features.features();
double rule_score = 1.0;
for (int i = 0; i < rule.feature_size(); ++i) {
base::hash_map<std::string, double>::const_iterator it = feature_map.find(
model_.hashes(rule.feature(i)));
if (it == feature_map.end() || it->second == 0.0) {
// If the feature of the rule does not exist in the given feature map the
// feature weight is considered to be zero. If the feature weight is zero
// we leave early since we know that the rule score will be zero.
return 0.0;
}
rule_score *= it->second;
}
return rule_score * rule.weight();
}
} // namespace safe_browsing
|