summaryrefslogtreecommitdiffstats
path: root/chrome/browser
diff options
context:
space:
mode:
authorpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-17 19:51:40 +0000
committerpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-17 19:51:40 +0000
commit682343d9f1b8db68c75ab3c4632af2022dcc5ddf (patch)
treea77d4c6b29a1972b996436ca0b07e02fdf4613b3 /chrome/browser
parent7e40f180b28951bc84ba636d93929d0494747619 (diff)
downloadchromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.zip
chromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.tar.gz
chromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.tar.bz2
Reduce the false positive rate for SafeBrowsing gethash requests.
This CL increases the memory consumption of the bloom filter used by the SafeBrowsing system in order to decrease the number of false positive gethash requests (gethash requests that result in an empty or 204 response from the servers). The filter size in bytes is calculated as: number_of_add_prefixes * bits_per_prefix / 8 From analysis of our histograms, users have between 250k - 330k add prefixes. 'bits_per_prefix' is hard coded to 25, which means that we expect the typical bloom filter to be between 760-1000 kB, compared to the current value of approximately 450 kB. We add histograms to track the filter size, as well as the number of gethash requests that return empty results and non-empty results. BUG=10584 (http://crbug.com/10584) Review URL: http://codereview.chromium.org/67243 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@13958 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
-rw-r--r--chrome/browser/safe_browsing/protocol_manager.cc6
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.cc9
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.h3
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_unittest.cc13
4 files changed, 27 insertions, 4 deletions
diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc
index 1dcec49..9f3ca7e 100644
--- a/chrome/browser/safe_browsing/protocol_manager.cc
+++ b/chrome/browser/safe_browsing/protocol_manager.cc
@@ -196,6 +196,12 @@ void SafeBrowsingProtocolManager::OnURLFetchComplete(
std::vector<SBFullHashResult> full_hashes;
bool can_cache = false;
if (response_code == 200 || response_code == 204) {
+ // For tracking our GetHash false positive (204) rate, compared to real
+ // (200) responses.
+ if (response_code == 200)
+ UMA_HISTOGRAM_COUNTS("SB2.GetHash200", 1);
+ else
+ UMA_HISTOGRAM_COUNTS("SB2.GetHash204", 1);
can_cache = true;
gethash_error_count_ = 0;
gethash_back_off_mult_ = 1;
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
index 8e7566a..cc8bfcc 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
@@ -30,9 +30,6 @@ static const int kDatabaseVersion = 6;
// downloading the data and then keep having to rebuild it.
static const int kBloomFilterMinSize = 250000;
-// How many bits to use per item. See the design doc for more information.
-static const int kBloomFilterSizeRatio = 13;
-
// When we awake from a low power state, we try to avoid doing expensive disk
// operations for a few minutes to let the system page itself in and settle
// down.
@@ -1065,8 +1062,11 @@ bool SafeBrowsingDatabaseBloom::WritePrefixes(
return false;
}
+ // Determine the size of the new bloom filter. We will cap the maximum size at
+ // 2 MB to prevent an error from consuming large amounts of memory.
int number_of_keys = std::max(add_count_, kBloomFilterMinSize);
- int filter_size = number_of_keys * kBloomFilterSizeRatio;
+ int filter_size = std::min(number_of_keys * kBloomFilterSizeRatio,
+ 2 * 1024 * 1024 * 8);
BloomFilter* new_filter = new BloomFilter(filter_size);
SBPair* add = adds;
int new_count = 0;
@@ -1357,6 +1357,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_);
UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs);
+ UMA_HISTOGRAM_COUNTS("SB2.FilterSize", filter->size());
int64 size_64;
if (file_util::GetFileSize(filename_, &size_64))
UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64));
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
index 82d4937..d8108d7 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
@@ -81,6 +81,9 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
virtual bool NeedToCheckUrl(const GURL& url);
+ // How many bits to use per item. See the design doc for more information.
+ static const int kBloomFilterSizeRatio = 25;
+
private:
// Opens the database.
bool Open();
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
index 2fea292..d49dc66 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
@@ -905,6 +905,19 @@ TEST(SafeBrowsingDatabase, HashCaching) {
// Prefix miss cache should be cleared.
EXPECT_EQ(database->prefix_miss_cache()->size(), 0U);
+ // Cache a GetHash miss for a particular prefix, and even though the prefix is
+ // in the database, it is flagged as a miss so looking up the associated URL
+ // will not succeed.
+ prefixes.clear();
+ full_hashes.clear();
+ prefix_misses.clear();
+ empty_full_hash.clear();
+ prefix_misses.push_back(Sha256Prefix("www.evil.com/phishing.html"));
+ database->CacheHashResults(prefix_misses, empty_full_hash);
+ EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &listname, &prefixes,
+ &full_hashes, Time::Now()));
+
lists.clear();
prefixes.clear();
full_hashes.clear();