summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/safe_browsing/features.cc
blob: 29675db9fde0b978690bfc1f44470b8e7fb805c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/renderer/safe_browsing/features.h"

#include "base/logging.h"
#include "base/metrics/histogram.h"

namespace safe_browsing {

const size_t FeatureMap::kMaxFeatureMapSize = 10000;

FeatureMap::FeatureMap() {}
FeatureMap::~FeatureMap() {}

bool FeatureMap::AddBooleanFeature(const std::string& name) {
  return AddRealFeature(name, 1.0);
}

bool FeatureMap::AddRealFeature(const std::string& name, double value) {
  if (features_.size() >= kMaxFeatureMapSize) {
    // If we hit this case, it indicates that either kMaxFeatureMapSize is
    // too small, or there is a bug causing too many features to be added.
    // In this case, we'll log to a histogram so we can see that this is
    // happening, and make phishing classification fail silently.
    LOG(ERROR) << "Not adding feature: " << name << " because the "
               << "feature map is too large.";
    UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
    return false;
  }
  // We only expect features in the range [0.0, 1.0], so fail if the feature is
  // outside this range.
  if (value < 0.0 || value > 1.0) {
    LOG(ERROR) << "Not adding feature: " << name << " because the value "
               << value << " is not in the range [0.0, 1.0].";
    UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
    return false;
  }

  features_[name] = value;
  return true;
}

void FeatureMap::Clear() {
  features_.clear();
}

namespace features {
// URL host features
const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
const char kUrlTldToken[] = "UrlTld=";
const char kUrlDomainToken[] = "UrlDomain=";
const char kUrlOtherHostToken[] = "UrlOtherHostToken=";

// URL host aggregate features
const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";

// URL path features
const char kUrlPathToken[] = "UrlPathToken=";

// DOM HTML form features
const char kPageHasForms[] = "PageHasForms";
const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
const char kPageHasTextInputs[] = "PageHasTextInputs";
const char kPageHasPswdInputs[] = "PageHasPswdInputs";
const char kPageHasRadioInputs[] = "PageHasRadioInputs";
const char kPageHasCheckInputs[] = "PageHasCheckInputs";

// DOM HTML link features
const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
const char kPageLinkDomain[] = "PageLinkDomain=";
const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";

// DOM HTML script features
const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";

// Other DOM HTML features
const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";

// Page term features
const char kPageTerm[] = "PageTerm=";

}  // namespace features
}  // namespace safe_browsing