blob: 29675db9fde0b978690bfc1f44470b8e7fb805c2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/renderer/safe_browsing/features.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
namespace safe_browsing {
const size_t FeatureMap::kMaxFeatureMapSize = 10000;
FeatureMap::FeatureMap() {}
FeatureMap::~FeatureMap() {}
bool FeatureMap::AddBooleanFeature(const std::string& name) {
return AddRealFeature(name, 1.0);
}
bool FeatureMap::AddRealFeature(const std::string& name, double value) {
if (features_.size() >= kMaxFeatureMapSize) {
// If we hit this case, it indicates that either kMaxFeatureMapSize is
// too small, or there is a bug causing too many features to be added.
// In this case, we'll log to a histogram so we can see that this is
// happening, and make phishing classification fail silently.
LOG(ERROR) << "Not adding feature: " << name << " because the "
<< "feature map is too large.";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
return false;
}
// We only expect features in the range [0.0, 1.0], so fail if the feature is
// outside this range.
if (value < 0.0 || value > 1.0) {
LOG(ERROR) << "Not adding feature: " << name << " because the value "
<< value << " is not in the range [0.0, 1.0].";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
return false;
}
features_[name] = value;
return true;
}
void FeatureMap::Clear() {
features_.clear();
}
namespace features {
// URL host features
const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
const char kUrlTldToken[] = "UrlTld=";
const char kUrlDomainToken[] = "UrlDomain=";
const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
// URL host aggregate features
const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
// URL path features
const char kUrlPathToken[] = "UrlPathToken=";
// DOM HTML form features
const char kPageHasForms[] = "PageHasForms";
const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
const char kPageHasTextInputs[] = "PageHasTextInputs";
const char kPageHasPswdInputs[] = "PageHasPswdInputs";
const char kPageHasRadioInputs[] = "PageHasRadioInputs";
const char kPageHasCheckInputs[] = "PageHasCheckInputs";
// DOM HTML link features
const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
const char kPageLinkDomain[] = "PageLinkDomain=";
const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
// DOM HTML script features
const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
// Other DOM HTML features
const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
// Page term features
const char kPageTerm[] = "PageTerm=";
} // namespace features
} // namespace safe_browsing
|