diff options
author | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-04-17 19:51:40 +0000 |
---|---|---|
committer | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-04-17 19:51:40 +0000 |
commit | 682343d9f1b8db68c75ab3c4632af2022dcc5ddf (patch) | |
tree | a77d4c6b29a1972b996436ca0b07e02fdf4613b3 /chrome/browser | |
parent | 7e40f180b28951bc84ba636d93929d0494747619 (diff) | |
download | chromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.zip chromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.tar.gz chromium_src-682343d9f1b8db68c75ab3c4632af2022dcc5ddf.tar.bz2 |
Reduce the false positive rate for SafeBrowsing gethash requests.
This CL increases the memory consumption of the bloom filter used
by the SafeBrowsing system in order to decrease the number of false
positive gethash requests (gethash requests that result in an empty
or 204 response from the servers).
The filter size in bytes is calculated as:
number_of_add_prefixes * bits_per_prefix / 8
From analysis of our histograms, users have between 250k - 330k add
prefixes. 'bits_per_prefix' is hard coded to 25, which means that we
expect the typical bloom filter to be between 760-1000 kB, compared
to the current value of approximately 450 kB.
We add histograms to track the filter size, as well as the number of
gethash requests that return empty results and non-empty results.
BUG=10584 (http://crbug.com/10584)
Review URL: http://codereview.chromium.org/67243
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@13958 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
4 files changed, 27 insertions, 4 deletions
diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc index 1dcec49..9f3ca7e 100644 --- a/chrome/browser/safe_browsing/protocol_manager.cc +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -196,6 +196,12 @@ void SafeBrowsingProtocolManager::OnURLFetchComplete( std::vector<SBFullHashResult> full_hashes; bool can_cache = false; if (response_code == 200 || response_code == 204) { + // For tracking our GetHash false positive (204) rate, compared to real + // (200) responses. + if (response_code == 200) + UMA_HISTOGRAM_COUNTS("SB2.GetHash200", 1); + else + UMA_HISTOGRAM_COUNTS("SB2.GetHash204", 1); can_cache = true; gethash_error_count_ = 0; gethash_back_off_mult_ = 1; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc index 8e7566a..cc8bfcc 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc @@ -30,9 +30,6 @@ static const int kDatabaseVersion = 6; // downloading the data and then keep having to rebuild it. static const int kBloomFilterMinSize = 250000; -// How many bits to use per item. See the design doc for more information. -static const int kBloomFilterSizeRatio = 13; - // When we awake from a low power state, we try to avoid doing expensive disk // operations for a few minutes to let the system page itself in and settle // down. @@ -1065,8 +1062,11 @@ bool SafeBrowsingDatabaseBloom::WritePrefixes( return false; } + // Determine the size of the new bloom filter. We will cap the maximum size at + // 2 MB to prevent an error from consuming large amounts of memory. int number_of_keys = std::max(add_count_, kBloomFilterMinSize); - int filter_size = number_of_keys * kBloomFilterSizeRatio; + int filter_size = std::min(number_of_keys * kBloomFilterSizeRatio, + 2 * 1024 * 1024 * 8); BloomFilter* new_filter = new BloomFilter(filter_size); SBPair* add = adds; int new_count = 0; @@ -1357,6 +1357,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); + UMA_HISTOGRAM_COUNTS("SB2.FilterSize", filter->size()); int64 size_64; if (file_util::GetFileSize(filename_, &size_64)) UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h index 82d4937..d8108d7 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h @@ -81,6 +81,9 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { virtual bool NeedToCheckUrl(const GURL& url); + // How many bits to use per item. See the design doc for more information. + static const int kBloomFilterSizeRatio = 25; + private: // Opens the database. bool Open(); diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index 2fea292..d49dc66 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -905,6 +905,19 @@ TEST(SafeBrowsingDatabase, HashCaching) { // Prefix miss cache should be cleared. EXPECT_EQ(database->prefix_miss_cache()->size(), 0U); + // Cache a GetHash miss for a particular prefix, and even though the prefix is + // in the database, it is flagged as a miss so looking up the associated URL + // will not succeed. + prefixes.clear(); + full_hashes.clear(); + prefix_misses.clear(); + empty_full_hash.clear(); + prefix_misses.push_back(Sha256Prefix("www.evil.com/phishing.html")); + database->CacheHashResults(prefix_misses, empty_full_hash); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &listname, &prefixes, + &full_hashes, Time::Now())); + lists.clear(); prefixes.clear(); full_hashes.clear(); |