diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
commit | 09911bf300f1a419907a9412154760efd0b7abc3 (patch) | |
tree | f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/safe_browsing | |
parent | 586acc5fe142f498261f52c66862fa417c3d52d2 (diff) | |
download | chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2 |
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
23 files changed, 8338 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.cc b/chrome/browser/safe_browsing/bloom_filter.cc new file mode 100644 index 0000000..a3e8699 --- /dev/null +++ b/chrome/browser/safe_browsing/bloom_filter.cc @@ -0,0 +1,86 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/safe_browsing/bloom_filter.h" + +#include <windows.h> + + +BloomFilter::BloomFilter(int bit_size) { + byte_size_ = bit_size / 8 + 1; + bit_size_ = byte_size_ * 8; + data_.reset(new char[byte_size_]); + ZeroMemory(data_.get(), byte_size_); +} + +BloomFilter::BloomFilter(char* data, int size) { + byte_size_ = size; + bit_size_ = byte_size_ * 8; + data_.reset(data); +} + +BloomFilter::~BloomFilter() { +} + +void BloomFilter::Insert(int hash_int) { + uint32 hash; + memcpy(&hash, &hash_int, sizeof(hash)); + for (int i = 0; i < 4; ++i) { + hash = RotateLeft(hash); + uint32 index = hash % bit_size_; + + int byte = index / 8; + int bit = index % 8; + data_.get()[byte] |= 1 << bit; + } +} + +bool BloomFilter::Exists(int hash_int) const { + uint32 hash; + memcpy(&hash, &hash_int, sizeof(hash)); + for (int i = 0; i < 4; ++i) { + hash = RotateLeft(hash); + uint32 index = hash % bit_size_; + + int byte = index / 8; + int bit = index % 8; + char data = data_.get()[byte]; + if (!(data & (1 << bit))) + return false; + } + + return true; +} + +uint32 BloomFilter::RotateLeft(uint32 hash) { + uint32 left_byte = hash >> 24; + hash = hash << 8; + hash |= left_byte; + return hash; +} diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h new file mode 100644 index 0000000..67b05dc --- /dev/null +++ b/chrome/browser/safe_browsing/bloom_filter.h @@ -0,0 +1,58 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// A simple bloom filter. It's currently limited to four hashing functions, +// which are calculated from the item's hash. + +#include "base/scoped_ptr.h" +#include "base/basictypes.h" + +class BloomFilter { + public: + // Constructs an empty filter with the given size. + BloomFilter(int bit_size); + + // Constructs a filter from serialized data. This object owns the memory + // and will delete it on destruction. + BloomFilter(char* data, int size); + ~BloomFilter(); + + void Insert(int hash); + bool Exists(int hash) const; + + const char* data() const { return data_.get(); } + int size() const { return byte_size_; } + + private: + static uint32 RotateLeft(uint32 hash); + + int byte_size_; // size in bytes + int bit_size_; // size in bits + scoped_array<char> data_; +}; diff --git a/chrome/browser/safe_browsing/bloom_filter_unittest.cc b/chrome/browser/safe_browsing/bloom_filter_unittest.cc new file mode 100644 index 0000000..fe0ad7c --- /dev/null +++ b/chrome/browser/safe_browsing/bloom_filter_unittest.cc @@ -0,0 +1,103 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "chrome/browser/safe_browsing/bloom_filter.h" + +#include <set> + +#include "base/logging.h" +#include "base/string_util.h" +#include "base/win_util.h" +#include "chrome/common/rand_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +uint32 GenHash() { + return static_cast<uint32>(rand_util::RandIntSecure(0, kint32max)); +} + +} + +TEST(SafeBrowsing, BloomFilter) { + // rand_util isn't random enough on Win2K, see bug 1076619. + if (win_util::GetWinVersion() == win_util::WINVERSION_2000) + return; + + // Use a small number for unit test so it's not slow. + int count = 1000;//100000; + + // Build up the bloom filter. + BloomFilter filter(count * 10); + + typedef std::set<int> Values; + Values values; + for (int i = 0; i < count; ++i) { + uint32 value = GenHash(); + values.insert(value); + filter.Insert(value); + } + + // Check serialization works. + char* data_copy = new char[filter.size()]; + memcpy(data_copy, filter.data(), filter.size()); + BloomFilter filter_copy(data_copy, filter.size()); + + // Check no false negatives by ensuring that every time we inserted exists. + for (Values::iterator i = values.begin(); i != values.end(); ++i) { + EXPECT_TRUE(filter_copy.Exists(*i)); + } + + // Check false positive error rate by checking the same number of items that + // we inserted, but of different values, and calculating what percentage are + // "found". + uint32 found_count = 0; + uint32 checked = 0; + while (true) { + uint32 value = GenHash(); + if (values.find(value) != values.end()) + continue; + + if (filter_copy.Exists(value)) + found_count++; + + checked ++; + if (checked == count) + break; + } + + // The FP rate should be 1.2%. Keep a large margin of error because we don't + // want to fail this test because we happened to randomly pick a lot of FPs. + double fp_rate = found_count * 100.0 / count; + CHECK(fp_rate < 5.0); + + LOG(INFO) << "For safe browsing bloom filter of size " << count << + ", the FP rate was " << fp_rate << " %"; +} diff --git a/chrome/browser/safe_browsing/chunk_range.cc b/chrome/browser/safe_browsing/chunk_range.cc new file mode 100644 index 0000000..201504f --- /dev/null +++ b/chrome/browser/safe_browsing/chunk_range.cc @@ -0,0 +1,122 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Implementation of ChunkRange class. + +#include "chrome/browser/safe_browsing/chunk_range.h" + +#include "base/logging.h" +#include "base/string_util.h" + +ChunkRange::ChunkRange(int start) : start_(start), stop_(start) { +} + +ChunkRange::ChunkRange(int start, int stop) : start_(start), stop_(stop) { +} + +ChunkRange::ChunkRange(const ChunkRange& rhs) + : start_(rhs.start()), stop_(rhs.stop()) { +} + +// Helper functions ----------------------------------------------------------- + +// Traverse the chunks vector looking for contiguous integers. +void ChunksToRanges(const std::vector<int>& chunks, + std::vector<ChunkRange>* ranges) { + DCHECK(ranges); + for (size_t i = 0; i < chunks.size(); ++i) { + int start = static_cast<int>(i); + int next = start + 1; + while (next < static_cast<int>(chunks.size()) && + (chunks[start] == chunks[next] - 1 || + chunks[start] == chunks[next])) { + ++start; + ++next; + } + ranges->push_back(ChunkRange(chunks[i], chunks[start])); + if (next >= static_cast<int>(chunks.size())) + break; + i = start; + } +} + +void RangesToChunks(const std::vector<ChunkRange>& ranges, + std::vector<int>* chunks) { + DCHECK(chunks); + for (size_t i = 0; i < ranges.size(); ++i) { + const ChunkRange& range = ranges[i]; + for (int chunk = range.start(); chunk <= range.stop(); ++chunk) { + chunks->push_back(chunk); + } + } +} + +void RangesToString(const std::vector<ChunkRange>& ranges, + std::string* result) { + DCHECK(result); + result->clear(); + std::vector<ChunkRange>::const_iterator it = ranges.begin(); + for (; it != ranges.end(); ++it) { + if (!result->empty()) + result->append(","); + if (it->start() == it->stop()) { + char num_buf[11]; // One 32 bit positive integer + NULL. + _itoa_s(it->start(), num_buf, sizeof(num_buf), 10); + result->append(num_buf); + } else { + result->append(StringPrintf("%d-%d", it->start(), it->stop())); + } + } +} + +bool StringToRanges(const std::string& input, + std::vector<ChunkRange>* ranges) { + DCHECK(ranges); + + // Crack the string into chunk parts, then crack each part looking for a range. + std::vector<std::string> chunk_parts; + SplitString(input, ',', &chunk_parts); + + for (size_t i = 0; i < chunk_parts.size(); ++i) { + std::vector<std::string> chunk_ranges; + SplitString(chunk_parts[i], '-', &chunk_ranges); + int start = atoi(chunk_ranges[0].c_str()); + int stop = start; + if (chunk_ranges.size() == 2) + stop = atoi(chunk_ranges[1].c_str()); + if (start == 0 || stop == 0) { + // atoi error, since chunk numbers are guaranteed to never be 0. + ranges->clear(); + return false; + } + ranges->push_back(ChunkRange(start, stop)); + } + + return true; +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/chunk_range.h b/chrome/browser/safe_browsing/chunk_range.h new file mode 100644 index 0000000..4f4567b --- /dev/null +++ b/chrome/browser/safe_browsing/chunk_range.h @@ -0,0 +1,89 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Class for parsing lists of integers into ranges. +// +// The anti-phishing and anti-malware protocol sends ASCII strings of numbers +// and ranges of numbers corresponding to chunks of whitelists and blacklists. +// Clients of this protocol need to be able to convert back and forth between +// this representation, and individual integer chunk numbers. The ChunkRange +// class is a simple and compact mechanism for storing a continuous list of +// chunk numbers. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__ +#define CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__ + +#include <string> +#include <vector> + +// ChunkRange ------------------------------------------------------------------ +// Each ChunkRange represents a continuous range of chunk numbers [start, stop]. + +class ChunkRange { + public: + ChunkRange(int start); + ChunkRange(int start, int stop); + ChunkRange(const ChunkRange& rhs); + + inline int start() const { return start_; } + inline int stop() const { return stop_; } + + bool operator==(const ChunkRange& rhs) const { + return start_ == rhs.start() && stop_ == rhs.stop(); + } + + private: + int start_; + int stop_; +}; + + +// Helper functions ------------------------------------------------------------ + +// Convert a series of chunk numbers into a more compact range representation. +// The 'chunks' vector must be sorted in ascending order. +void ChunksToRanges(const std::vector<int>& chunks, + std::vector<ChunkRange>* ranges); + +// Convert a set of ranges into individual chunk numbers. +void RangesToChunks(const std::vector<ChunkRange>& ranges, + std::vector<int>* chunks); + +// Convert a series of chunk ranges into a string in protocol format. +void RangesToString(const std::vector<ChunkRange>& ranges, + std::string* result); + +// Returns 'true' if the string was successfully converted to ChunkRanges, +// 'false' if the input was malformed. +// The string must be in the form: "1-100,398,415,1138-2001,2019". +bool StringToRanges(const std::string& input, + std::vector<ChunkRange>* ranges); + + +#endif // CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/chunk_range_unittest.cc b/chrome/browser/safe_browsing/chunk_range_unittest.cc new file mode 100644 index 0000000..675de33 --- /dev/null +++ b/chrome/browser/safe_browsing/chunk_range_unittest.cc @@ -0,0 +1,202 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Test program to convert lists of integers into ranges, and vice versa. + +#include "base/logging.h" +#include "chunk_range.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Test formatting chunks into a string representation. +TEST(SafeBrowsingChunkRangeTest, TestRangesToString) { + std::vector<ChunkRange> ranges; + ranges.push_back(ChunkRange(1, 10)); + ranges.push_back(ChunkRange(15, 17)); + ranges.push_back(ChunkRange(21, 410)); + ranges.push_back(ChunkRange(991, 1000)); + + std::string range_string; + RangesToString(ranges, &range_string); + EXPECT_EQ(range_string, "1-10,15-17,21-410,991-1000"); + ranges.clear(); + + ranges.push_back(ChunkRange(4, 4)); + RangesToString(ranges, &range_string); + EXPECT_EQ(range_string, "4"); + + ranges.push_back(ChunkRange(7)); + ranges.push_back(ChunkRange(9)); + RangesToString(ranges, &range_string); + EXPECT_EQ(range_string, "4,7,9"); + + ranges.push_back(ChunkRange(42, 99)); + RangesToString(ranges, &range_string); + EXPECT_EQ(range_string, "4,7,9,42-99"); +} + + +// Test various configurations of chunk numbers. +TEST(SafeBrowsingChunkRangeTest, TestChunksToRanges) { + std::vector<int> chunks; + std::vector<ChunkRange> ranges; + + // Test one chunk range and one single value. + chunks.push_back(1); + chunks.push_back(2); + chunks.push_back(3); + chunks.push_back(4); + chunks.push_back(7); + ChunksToRanges(chunks, &ranges); + EXPECT_EQ(ranges.size(), 2); + EXPECT_EQ(ranges[0].start(), 1); + EXPECT_EQ(ranges[0].stop(), 4); + EXPECT_EQ(ranges[1].start(), 7); + EXPECT_EQ(ranges[1].stop(), 7); + + chunks.clear(); + ranges.clear(); + + // Test all chunk numbers in one range. + chunks.push_back(3); + chunks.push_back(4); + chunks.push_back(5); + chunks.push_back(6); + chunks.push_back(7); + chunks.push_back(8); + chunks.push_back(9); + chunks.push_back(10); + ChunksToRanges(chunks, &ranges); + EXPECT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start(), 3); + EXPECT_EQ(ranges[0].stop(), 10); + + chunks.clear(); + ranges.clear(); + + // Test no chunk numbers in contiguous ranges. + chunks.push_back(3); + chunks.push_back(5); + chunks.push_back(7); + chunks.push_back(9); + chunks.push_back(11); + chunks.push_back(13); + chunks.push_back(15); + chunks.push_back(17); + ChunksToRanges(chunks, &ranges); + EXPECT_EQ(ranges.size(), 8); + + chunks.clear(); + ranges.clear(); + + // Test a single chunk number. + chunks.push_back(17); + ChunksToRanges(chunks, &ranges); + EXPECT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start(), 17); + EXPECT_EQ(ranges[0].stop(), 17); + + chunks.clear(); + ranges.clear(); + + // Test duplicates. + chunks.push_back(1); + chunks.push_back(2); + chunks.push_back(2); + chunks.push_back(2); + chunks.push_back(3); + chunks.push_back(7); + chunks.push_back(7); + chunks.push_back(7); + chunks.push_back(7); + ChunksToRanges(chunks, &ranges); + EXPECT_EQ(ranges.size(), 2); + EXPECT_EQ(ranges[0].start(), 1); + EXPECT_EQ(ranges[0].stop(), 3); + EXPECT_EQ(ranges[1].start(), 7); + EXPECT_EQ(ranges[1].stop(), 7); +} + + +TEST(SafeBrowsingChunkRangeTest, TestStringToRanges) { + std::vector<ChunkRange> ranges; + + std::string input = "1-100,398,415,1138-2001,2019"; + EXPECT_TRUE(StringToRanges(input, &ranges)); + EXPECT_EQ(ranges.size(), 5); + EXPECT_EQ(ranges[0].start(), 1); + EXPECT_EQ(ranges[0].stop(), 100); + EXPECT_EQ(ranges[1].start(), 398); + EXPECT_EQ(ranges[1].stop(), 398); + EXPECT_EQ(ranges[3].start(), 1138); + EXPECT_EQ(ranges[3].stop(), 2001); + + ranges.clear(); + + input = "1,2,3,4,5,6,7"; + EXPECT_TRUE(StringToRanges(input, &ranges)); + EXPECT_EQ(ranges.size(), 7); + + ranges.clear(); + + input = "300-3001"; + EXPECT_TRUE(StringToRanges(input, &ranges)); + EXPECT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start(), 300); + EXPECT_EQ(ranges[0].stop(), 3001); + + ranges.clear(); + + input = "17"; + EXPECT_TRUE(StringToRanges(input, &ranges)); + EXPECT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start(), 17); + EXPECT_EQ(ranges[0].stop(), 17); + + ranges.clear(); + + input = "x-y"; + EXPECT_FALSE(StringToRanges(input, &ranges)); +} + + +TEST(SafeBrowsingChunkRangeTest, TestRangesToChunks) { + std::vector<ChunkRange> ranges; + ranges.push_back(ChunkRange(1, 4)); + ranges.push_back(ChunkRange(17)); + + std::vector<int> chunks; + RangesToChunks(ranges, &chunks); + + EXPECT_EQ(chunks.size(), 5); + EXPECT_EQ(chunks[0], 1); + EXPECT_EQ(chunks[1], 2); + EXPECT_EQ(chunks[2], 3); + EXPECT_EQ(chunks[3], 4); + EXPECT_EQ(chunks[4], 17); +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/database_perftest.cc b/chrome/browser/safe_browsing/database_perftest.cc new file mode 100644 index 0000000..ee61c38 --- /dev/null +++ b/chrome/browser/safe_browsing/database_perftest.cc @@ -0,0 +1,565 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <stdio.h> +#include <stdlib.h> + +#include <set> + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/perftimer.h" +#include "base/string_util.h" +#include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include "chrome/common/chrome_paths.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" +#include "chrome/test/test_file_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +// These tests are slow, especially the ones that create databases. So disable +// them by default. +//#define SAFE_BROWSING_DATABASE_TESTS_ENABLED +#ifdef SAFE_BROWSING_DATABASE_TESTS_ENABLED + +namespace { + +// Base class for a safebrowsing database. Derived classes can implement +// different types of tables to compare performance characteristics. +class Database { + public: + Database() : db_(NULL) { + } + + ~Database() { + if (db_) { + statement_cache_.Cleanup(); + sqlite3_close(db_); + db_ = NULL; + } + } + + bool Init(const std::string& name, bool create) { + // get an empty file for the test DB + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(ASCIIToWide(name)); + + if (create) { + DeleteFile(filename.c_str()); + } else { + DLOG(INFO) << "evicting " << name << " ..."; + file_util::EvictFileFromSystemCache(filename.c_str()); + DLOG(INFO) << "... evicted"; + } + + if (sqlite3_open(WideToUTF8(filename).c_str(), &db_) != SQLITE_OK) + return false; + + statement_cache_.set_db(db_); + + if (!create) + return true; + + return CreateTable(); + } + + virtual bool CreateTable() = 0; + virtual bool Add(int host_key, int* prefixes, int count) = 0; + virtual bool Read(int host_key, int* prefixes, int size, int* count) = 0; + virtual int Count() = 0; + virtual std::string GetDBSuffix() = 0; + + sqlite3* db() { return db_; } + + protected: + // The database connection. + sqlite3* db_; + + // Cache of compiled statements for our database. + SqliteStatementCache statement_cache_; +}; + +class SimpleDatabase : public Database { + public: + virtual bool CreateTable() { + if (DoesSqliteTableExist(db_, "hosts")) + return false; + + return sqlite3_exec(db_, "CREATE TABLE hosts (" + "host INTEGER," + "prefixes BLOB)", + NULL, NULL, NULL) == SQLITE_OK; + } + + virtual bool Add(int host_key, int* prefixes, int count) { + SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(host,prefixes)" + "VALUES (?,?)"); + if (!statement.is_valid()) + return false; + + statement->bind_int(0, host_key); + statement->bind_blob(1, prefixes, count*sizeof(int)); + return statement->step() == SQLITE_DONE; + } + + virtual bool Read(int host_key, int* prefixes, int size, int* count) { + SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, + "SELECT host, prefixes FROM hosts WHERE host=?"); + if (!statement.is_valid()) + return false; + + statement->bind_int(0, host_key); + + int rv = statement->step(); + if (rv == SQLITE_DONE) { + // no hostkey found, not an error + *count = -1; + return true; + } + + if (rv != SQLITE_ROW) + return false; + + *count = statement->column_bytes(1); + if (*count > size) + return false; + + memcpy(prefixes, statement->column_blob(0), *count); + return true; + } + + int Count() { + SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, + "SELECT COUNT(*) FROM hosts"); + if (!statement.is_valid()) { + EXPECT_TRUE(false); + return -1; + } + + if (statement->step() != SQLITE_ROW) { + EXPECT_TRUE(false); + return -1; + } + + return statement->column_int(0); + } + + std::string GetDBSuffix() { + return "Simple"; + } +}; + +class IndexedDatabase : public SimpleDatabase { + public: + virtual bool CreateTable() { + return sqlite3_exec(db_, "CREATE TABLE hosts (" + "host INTEGER PRIMARY KEY," + "prefixes BLOB)", + NULL, NULL, NULL) == SQLITE_OK; + } + + std::string GetDBSuffix() { + return "Indexed"; + } +}; + +class IndexedWithIDDatabase : public SimpleDatabase { + public: + virtual bool CreateTable() { + return sqlite3_exec(db_, "CREATE TABLE hosts (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "host INTEGER UNIQUE," + "prefixes BLOB)", + NULL, NULL, NULL) == SQLITE_OK; + } + + virtual bool Add(int host_key, int* prefixes, int count) { + SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(id,host,prefixes)" + "VALUES (NULL,?,?)"); + if (!statement.is_valid()) + return false; + + statement->bind_int(0, host_key); + statement->bind_blob(1, prefixes, count * sizeof(int)); + return statement->step() == SQLITE_DONE; + } + + std::string GetDBSuffix() { + return "IndexedWithID"; + } +}; + +} + +class SafeBrowsing: public testing::Test { + protected: + // Get the test parameters from the test case's name. + virtual void SetUp() { + logging::InitLogging( + NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG, + logging::LOCK_LOG_FILE, + logging::DELETE_OLD_LOG_FILE); + + const testing::TestInfo* const test_info = + testing::UnitTest::GetInstance()->current_test_info(); + std::string test_name = test_info->name(); + + TestType type; + if (test_name.find("Write") != std::string::npos) { + type = WRITE; + } else if (test_name.find("Read") != std::string::npos) { + type = READ; + } else { + type = COUNT; + } + + if (test_name.find("IndexedWithID") != std::string::npos) { + db_ = new IndexedWithIDDatabase(); + } else if (test_name.find("Indexed") != std::string::npos) { + db_ = new IndexedDatabase(); + } else { + db_ = new SimpleDatabase(); + } + + + char multiplier_letter = test_name[test_name.size() - 1]; + int multiplier = 0; + if (multiplier_letter == 'K') { + multiplier = 1000; + } else if (multiplier_letter == 'M') { + multiplier = 1000000; + } else { + NOTREACHED(); + } + + size_t index = test_name.size() - 1; + while (index != 0 && test_name[index] != '_') + index--; + + DCHECK(index); + const char* count_start = test_name.c_str() + ++index; + int count = atoi(count_start); + int size = count * multiplier; + + db_name_ = StringPrintf("TestSafeBrowsing"); + db_name_.append(count_start); + db_name_.append(db_->GetDBSuffix()); + + ASSERT_TRUE(db_->Init(db_name_, type == WRITE)); + + if (type == WRITE) { + WriteEntries(size); + } else if (type == READ) { + ReadEntries(100); + } else { + CountEntries(); + } + } + + virtual void TearDown() { + delete db_; + } + + // This writes the given number of entries to the database. + void WriteEntries(int count) { + int prefixes[4]; + + SQLTransaction transaction(db_->db()); + transaction.Begin(); + + int inc = kint32max / count; + for (int i = 0; i < count; i++) { + int hostkey; + rand_s((unsigned int*)&hostkey); + ASSERT_TRUE(db_->Add(hostkey, prefixes, 1)); + } + + transaction.Commit(); + } + + // Read the given number of entries from the database. + void ReadEntries(int count) { + int prefixes[4]; + + int64 total_ms = 0; + + for (int i = 0; i < count; ++i) { + int key; + rand_s((unsigned int*)&key); + + PerfTimer timer; + + int read; + ASSERT_TRUE(db_->Read(key, prefixes, sizeof(prefixes), &read)); + + int64 time_ms = timer.Elapsed().InMilliseconds(); + total_ms += time_ms; + DLOG(INFO) << "Read in " << time_ms << " ms."; + } + + DLOG(INFO) << db_name_ << " read " << count << " entries in average of " << + total_ms/count << " ms."; + } + + // Counts how many entries are in the database, which effectively does a full + // table scan. + void CountEntries() { + PerfTimer timer; + + int count = db_->Count(); + + DLOG(INFO) << db_name_ << " counted " << count << " entries in " << + timer.Elapsed().InMilliseconds() << " ms"; + } + + enum TestType { + WRITE, + READ, + COUNT, + }; + + private: + + Database* db_; + std::string db_name_; +}; + +TEST_F(SafeBrowsing, Write_100K) { +} + +TEST_F(SafeBrowsing, Read_100K) { +} + +TEST_F(SafeBrowsing, WriteIndexed_100K) { +} + +TEST_F(SafeBrowsing, ReadIndexed_100K) { +} + +TEST_F(SafeBrowsing, WriteIndexed_250K) { +} + +TEST_F(SafeBrowsing, ReadIndexed_250K) { +} + +TEST_F(SafeBrowsing, WriteIndexed_500K) { +} + +TEST_F(SafeBrowsing, ReadIndexed_500K) { +} + +TEST_F(SafeBrowsing, ReadIndexedWithID_250K) { +} + +TEST_F(SafeBrowsing, WriteIndexedWithID_250K) { +} + +TEST_F(SafeBrowsing, ReadIndexedWithID_500K) { +} + +TEST_F(SafeBrowsing, WriteIndexedWithID_500K) { +} + +TEST_F(SafeBrowsing, CountIndexed_250K) { +} + +TEST_F(SafeBrowsing, CountIndexed_500K) { +} + +TEST_F(SafeBrowsing, CountIndexedWithID_250K) { +} + +TEST_F(SafeBrowsing, CountIndexedWithID_500K) { +} + + +class SafeBrowsingDatabaseTest { + public: + SafeBrowsingDatabaseTest(const std::wstring& name) { + logging::InitLogging( + NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG, + logging::LOCK_LOG_FILE, + logging::DELETE_OLD_LOG_FILE); + + PathService::Get(base::DIR_TEMP, &filename_); + filename_.push_back(file_util::kPathSeparator); + filename_.append(name); + } + + void Create(int size) { + DeleteFile(filename_.c_str()); + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename_)); + + int chunk_id = 0; + int total_host_keys = size; + int host_keys_per_chunk = 100; + + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + + for (int i = 0; i < total_host_keys / host_keys_per_chunk; ++i) { + chunks->push_back(SBChunk()); + chunks->back().chunk_number = ++chunk_id; + + for (int j = 0; j < host_keys_per_chunk; ++j) { + SBChunkHost host; + rand_s((unsigned int*)&host.host); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->SetPrefixAt(0, 0x2425525); + host.entry->SetPrefixAt(1, 0x1536366); + + chunks->back().hosts.push_back(host); + } + } + + database.InsertChunks("goog-malware", chunks); + } + + void Read(bool use_bloom_filter) { + int keys_to_read = 500; + file_util::EvictFileFromSystemCache(filename_.c_str()); + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename_)); + + PerfTimer total_timer; + int64 db_ms = 0; + int keys_from_db = 0; + for (int i = 0; i < keys_to_read; ++i) { + int key; + rand_s((unsigned int*)&key); + + std::string url = StringPrintf("http://www.%d.com/blah.html", key); + + std::string matching_list; + std::vector<SBPrefix> prefix_hits; + GURL gurl(url); + if (!use_bloom_filter || database.NeedToCheckUrl(gurl)) { + PerfTimer timer; + database.ContainsUrl(gurl, &matching_list, &prefix_hits); + + int64 time_ms = timer.Elapsed().InMilliseconds(); + + DLOG(INFO) << "Read from db in " << time_ms << " ms."; + + db_ms += time_ms; + keys_from_db++; + } + } + + int64 total_ms = total_timer.Elapsed().InMilliseconds(); + + DLOG(INFO) << WideToASCII(file_util::GetFilenameFromPath(filename_)) << + " read " << keys_to_read << " entries in " << total_ms << " ms. " << + keys_from_db << " keys were read from the db, with average read taking " << + db_ms / keys_from_db << " ms"; + } + + void BuildBloomFilter() { + file_util::EvictFileFromSystemCache(filename_.c_str()); + file_util::Delete(SafeBrowsingDatabase::BloomFilterFilename(filename_), false); + + PerfTimer total_timer; + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename_)); + + int64 total_ms = total_timer.Elapsed().InMilliseconds(); + + DLOG(INFO) << WideToASCII(file_util::GetFilenameFromPath(filename_)) << + " built bloom filter in " << total_ms << " ms."; + } + + private: + std::wstring filename_; +}; + +// Adds 100K host records. +TEST(SafeBrowsingDatabase, FillUp100K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing100K"); + db.Create(100000); +} + +// Adds 250K host records. +TEST(SafeBrowsingDatabase, FillUp250K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing250K"); + db.Create(250000); +} + +// Adds 500K host records. +TEST(SafeBrowsingDatabase, FillUp500K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing500K"); + db.Create(500000); +} + +// Reads 500 entries and prints the timing. +TEST(SafeBrowsingDatabase, ReadFrom250K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing250K"); + db.Read(false); +} + +TEST(SafeBrowsingDatabase, ReadFrom500K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing500K"); + db.Read(false); +} + +// Read 500 entries with a bloom filter and print the timing. +TEST(SafeBrowsingDatabase, BloomReadFrom250K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing250K"); + db.Read(true); +} + +TEST(SafeBrowsingDatabase, BloomReadFrom500K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing500K"); + db.Read(true); +} + +// Test how long bloom filter creation takes. +TEST(SafeBrowsingDatabase, BuildBloomFilter250K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing250K"); + db.BuildBloomFilter(); +} + +TEST(SafeBrowsingDatabase, BuildBloomFilter500K) { + SafeBrowsingDatabaseTest db(L"SafeBrowsing500K"); + db.BuildBloomFilter(); +} + +#endif
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc new file mode 100644 index 0000000..086ceb9 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -0,0 +1,607 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/safe_browsing/protocol_manager.h" + +#include "base/histogram.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/string_util.h" +#include "base/task.h" +#include "base/timer.h" +#include "chrome/browser/profile.h" +#include "chrome/browser/safe_browsing/protocol_parser.h" +#include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/common/env_util.h" +#include "chrome/common/env_vars.h" +#include "chrome/common/rand_util.h" +#include "chrome/common/stl_util-inl.h" +#include "net/base/base64.h" +#include "net/base/load_flags.h" + + +// Maximum time, in seconds, from start up before we must issue an update query. +static const int kSbTimerStartIntervalSec = 300; + +// Update URL for querying about the latest set of chunk updates. +static const char* const kSbUpdateUrl = + "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=%s&appver=%d.%d&pver=2.1"; + +// GetHash request URL for retrieving full hashes. +static const char* const kSbGetHashUrl = + "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=%s&appver=%d.%d&pver=2.1"; + +// New MAC client key requests URL. +static const char* const kSbNewKeyUrl = + "https://sb-ssl.google.com/safebrowsing/newkey?client=%s&appver=%d.%d&pver=2.1"; + +// TODO(paulg): Change these values when we get close to launch. +static const char* const kSbClientName = "googleclient"; +static const int kSbClientMajorVersion = 1; +static const int kSbClientMinorVersion = 0; + +// Maximum back off multiplier. +static const int kSbMaxBackOff = 8; + + +// Periodic update task -------------------------------------------------------- +class SafeBrowsingProtocolUpdateTask : public Task { + public: + explicit SafeBrowsingProtocolUpdateTask(SafeBrowsingProtocolManager* manager) + : manager_(manager) { + } + + void Run() { + manager_->GetNextUpdate(); + } + + private: + SafeBrowsingProtocolManager* manager_; +}; + + +// SafeBrowsingProtocolManager implementation ---------------------------------- + +SafeBrowsingProtocolManager::SafeBrowsingProtocolManager( + SafeBrowsingService* sb_service, + MessageLoop* notify_loop, + const std::string& client_key, + const std::string& wrapped_key) + : sb_service_(sb_service), + request_type_(NO_REQUEST), + update_error_count_(0), + gethash_error_count_(0), + update_back_off_mult_(1), + gethash_back_off_mult_(1), + next_update_sec_(-1), + update_state_(FIRST_REQUEST), + initial_request_(true), + chunk_pending_to_write_(false), + notify_loop_(notify_loop), + client_key_(client_key), + wrapped_key_(wrapped_key) { + // Set the backoff multiplier fuzz to a random value between 0 and 1. + back_off_fuzz_ = static_cast<float>(rand_util::RandInt(1, INT_MAX)) / INT_MAX; + + // The first update must happen between 0-5 minutes of start up. + next_update_sec_ = rand_util::RandInt(60, kSbTimerStartIntervalSec); +} + +SafeBrowsingProtocolManager::~SafeBrowsingProtocolManager() { + if (update_timer_.get()) + MessageLoop::current()->timer_manager()->StopTimer(update_timer_.get()); + + // Delete in-progress SafeBrowsing requests. + STLDeleteContainerPairFirstPointers(hash_requests_.begin(), + hash_requests_.end()); + hash_requests_.clear(); +} + +// Public API used by the SafeBrowsingService ---------------------------------- + +// We can only have one update or chunk request outstanding, but there may be +// multiple GetHash requests pending since we don't want to serialize them and +// slow down the user. +void SafeBrowsingProtocolManager::GetFullHash( + SafeBrowsingService::SafeBrowsingCheck* check, + const std::vector<SBPrefix>& prefixes) { + // If we are in GetHash backoff, we need to check if we're past the next + // allowed time. If we are, we can proceed with the request. If not, we are + // required to return empty results (i.e. treat the page as safe). + if (gethash_error_count_ && Time::Now() <= next_gethash_time_) { + std::vector<SBFullHashResult> full_hashes; + sb_service_->HandleGetHashResults(check, full_hashes); + return; + } + + std::string url = StringPrintf(kSbGetHashUrl, + kSbClientName, + kSbClientMajorVersion, + kSbClientMinorVersion); + if (!client_key_.empty()) { + url.append("&wrkey="); + url.append(wrapped_key_); + } + + GURL gethash_url(url); + URLFetcher* fetcher = new URLFetcher(gethash_url, URLFetcher::POST, this); + hash_requests_[fetcher] = check; + + std::string get_hash; + SafeBrowsingProtocolParser parser; + parser.FormatGetHash(prefixes, &get_hash); + + fetcher->set_load_flags(net::LOAD_DISABLE_CACHE); + fetcher->set_request_context(Profile::GetDefaultRequestContext()); + fetcher->set_upload_data("text/plain", get_hash.data()); + fetcher->Start(); +} + +void SafeBrowsingProtocolManager::GetNextUpdate() { + if (initial_request_) { + if (client_key_.empty() || wrapped_key_.empty()) { + IssueKeyRequest(); + return; + } else { + initial_request_ = false; + } + } + + if (!request_.get()) + IssueUpdateRequest(); +} + +// URLFetcher::Delegate implementation ----------------------------------------- + +// All SafeBrowsing request responses are handled here. +// TODO(paulg): Clarify with the SafeBrowsing team whether a failed parse of a +// chunk should retry the download and parse of that chunk (and +// what back off / how many times to try), and if that effects the +// update back off. For now, a failed parse of the chunk means we +// drop it. This isn't so bad because the next UPDATE_REQUEST we +// do will report all the chunks we have. If that chunk is still +// required, the SafeBrowsing servers will tell us to get it again. +void SafeBrowsingProtocolManager::OnURLFetchComplete( + const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + scoped_ptr<const URLFetcher> fetcher; + bool parsed_ok = true; + bool must_back_off = false; // Reduce SafeBrowsing service query frequency. + + HashRequests::iterator it = hash_requests_.find(source); + if (it != hash_requests_.end()) { + // GetHash response. + fetcher.reset(it->first); + SafeBrowsingService::SafeBrowsingCheck* check = it->second; + std::vector<SBFullHashResult> full_hashes; + if (response_code == 200 || response_code == 204) { + gethash_error_count_ = 0; + gethash_back_off_mult_ = 1; + bool re_key = false; + SafeBrowsingProtocolParser parser; + parsed_ok = parser.ParseGetHash(data.data(), + static_cast<int>(data.length()), + client_key_, + &re_key, + &full_hashes); + if (!parsed_ok) { + // If we fail to parse it, we must still inform the SafeBrowsingService + // so that it doesn't hold up the user's request indefinitely. Not sure + // what to do at that point though! + full_hashes.clear(); + } else { + if (re_key) + HandleReKey(); + } + } else if (response_code >= 300) { + HandleGetHashError(); + SB_DLOG(INFO) << "SafeBrowsing GetHash request for: " << source->url() + << ", failed with error: " << response_code; + } + + // Call back the SafeBrowsingService with full_hashes, even if there was a + // parse error or an error response code (in which case full_hashes will be + // empty). We can't block the user regardless of the error status. + sb_service_->HandleGetHashResults(check, full_hashes); + + hash_requests_.erase(it); + } else { + // Update, chunk or key response. + DCHECK(source == request_.get()); + fetcher.reset(request_.release()); + + if (response_code == 200) { + // We have data from the SafeBrowsing service. + parsed_ok = HandleServiceResponse(source->url(), + data.data(), + static_cast<int>(data.length())); + if (!parsed_ok) { + SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url() + << "failed parse."; + } + + if (request_type_ == CHUNK_REQUEST) { + if (parsed_ok) { + chunk_request_urls_.pop_front(); + } else { + chunk_request_urls_.clear(); + } + } else if (request_type_ == GETKEY_REQUEST && initial_request_) { + // This is the first request we've made this session. Now that we have + // the keys, do the regular update request. + initial_request_ = false; + GetNextUpdate(); + return; + } + } else if (response_code >= 300) { + // The SafeBrowsing service error: back off. + must_back_off = true; + if (request_type_ == CHUNK_REQUEST) + chunk_request_urls_.clear(); + SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url() + << ", failed with error: " << response_code; + } + } + + // Schedule a new update request if we've finished retrieving all the chunks + // from the previous update. We treat the update request and the chunk URLs it + // contains as an atomic unit as far as back off is concerned. + if (chunk_request_urls_.empty() && + (request_type_ == CHUNK_REQUEST || request_type_ == UPDATE_REQUEST)) + ScheduleNextUpdate(must_back_off); + + // Get the next chunk if available. + IssueChunkRequest(); +} + +bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url, + const char* data, + int length) { + SafeBrowsingProtocolParser parser; + + switch (request_type_) { + case UPDATE_REQUEST: { + int next_update_sec = -1; + bool re_key = false; + bool reset = false; + std::vector<SBChunkDelete>* chunk_deletes = + new std::vector<SBChunkDelete>; + std::vector<ChunkUrl> chunk_urls; + if (!parser.ParseUpdate(data, length, client_key_, + &next_update_sec, &re_key, + &reset, chunk_deletes, &chunk_urls)) { + delete chunk_deletes; + return false; + } + + last_update_ = Time::Now(); + + if (update_state_ == FIRST_REQUEST) + update_state_ = SECOND_REQUEST; + else if (update_state_ == SECOND_REQUEST) + update_state_ = NORMAL_REQUEST; + + // New time for the next update. + if (next_update_sec > 0) { + next_update_sec_ = next_update_sec; + } else if (update_state_ == SECOND_REQUEST) { + next_update_sec_ = rand_util::RandInt(15, 45) * 60; + } + + // We need to request a new set of keys for MAC. + if (re_key) + HandleReKey(); + + // New chunks to download. + if (!chunk_urls.empty()) { + for (size_t i = 0; i < chunk_urls.size(); ++i) + chunk_request_urls_.push_back(chunk_urls[i]); + } + + // Handle the case were the SafeBrowsing service tells us to dump our + // database. + if (reset) { + sb_service_->ResetDatabase(); + return true; + } + + // Chunks to delete from our storage. + if (!chunk_deletes->empty()) + sb_service_->HandleChunkDelete(chunk_deletes); + + break; + } + case CHUNK_REQUEST: { + // Find list name from url. + std::string url_path = url.ExtractFileName(); + if (url_path.empty()) + return false; + + std::string::size_type pos = url_path.find_first_of('_'); + if (pos == std::string::npos) + return false; + + const ChunkUrl chunk_url = chunk_request_urls_.front(); + DCHECK(url.spec().find(chunk_url.url) != std::string::npos); + + bool re_key = false; + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + if (!parser.ParseChunk(data, length, + client_key_, chunk_url.mac, + &re_key, chunks)) { +#ifndef NDEBUG + std::string data_str; + data_str.assign(data, length); + std::string encoded_chunk; + Base64Encode(data, &encoded_chunk); + SB_DLOG(INFO) << "ParseChunk error for chunk: " << chunk_url.url + << ", client_key: " << client_key_ + << ", wrapped_key: " << wrapped_key_ + << ", mac: " << chunk_url.mac + << ", Base64Encode(data): " << encoded_chunk + << ", length: " << length; +#endif + safe_browsing_util::FreeChunks(chunks); + delete chunks; + return false; + } + + if (re_key) + HandleReKey(); + + if (chunks->empty()) { + delete chunks; + } else { + chunk_pending_to_write_ = true; + std::string list_name(url_path, 0, pos); + sb_service_->HandleChunk(list_name, chunks); + } + + break; + } + case GETKEY_REQUEST: { + std::string client_key, wrapped_key; + if (!parser.ParseNewKey(data, length, &client_key, &wrapped_key)) + return false; + + client_key_ = client_key; + wrapped_key_ = wrapped_key; + notify_loop_->PostTask(FROM_HERE, NewRunnableMethod( + sb_service_, &SafeBrowsingService::OnNewMacKeys, client_key_, + wrapped_key_)); + break; + } + + default: + return false; + } + + return true; +} + +void SafeBrowsingProtocolManager::Initialize() { + // Don't want to hit the safe browsing servers on build/chrome bots. + if (env_util::HasEnvironmentVariable(env_vars::kHeadless)) + return; + + ScheduleNextUpdate(false /* no back off */); +} + +void SafeBrowsingProtocolManager::ScheduleNextUpdate(bool back_off) { + DCHECK(next_update_sec_ > 0); + + if (!update_task_.get()) + update_task_.reset(new SafeBrowsingProtocolUpdateTask(this)); + + // Unschedule any current timer & task. + TimerManager* tm = MessageLoop::current()->timer_manager(); + if (update_timer_.get()) + tm->StopTimer(update_timer_.get()); + + // Reschedule with the new update. + const int next_update = GetNextUpdateTime(back_off); + update_timer_.reset(tm->StartTimer(next_update, update_task_.get(), false)); +} + +// According to section 5 of the SafeBrowsing protocol specification, we must +// back off after a certain number of errors. We only change 'next_update_sec_' +// when we receive a response from the SafeBrowsing service. +int SafeBrowsingProtocolManager::GetNextUpdateTime(bool back_off) { + int next = next_update_sec_; + if (back_off) { + next = GetNextBackOffTime(&update_error_count_, &update_back_off_mult_); + } else { + // Successful response means error reset. + update_error_count_ = 0; + update_back_off_mult_ = 1; + } + return next * 1000; // milliseconds +} + +int SafeBrowsingProtocolManager::GetNextBackOffTime(int* error_count, + int* multiplier) { + DCHECK(multiplier && error_count); + (*error_count)++; + if (*error_count > 1 && *error_count < 6) { + int next = static_cast<int>(*multiplier * (1 + back_off_fuzz_) * 30 * 60); + *multiplier *= 2; + if (*multiplier > kSbMaxBackOff) + *multiplier = kSbMaxBackOff; + return next; + } + + if (*error_count >= 6) + return 60 * 60 * 8; // 8 hours + + return 60; // 1 minute +} + +// This request requires getting a list of all the chunks for each list from the +// database asynchronously. The request will be issued when we're called back in +// OnGetChunksComplete. +// TODO(paulg): We should get this at start up and maintain a ChunkRange cache +// to avoid hitting the database with each update request. On the +// otherhand, this request will only occur ~20-30 minutes so there +// isn't that much overhead. Measure! +void SafeBrowsingProtocolManager::IssueUpdateRequest() { + request_type_ = UPDATE_REQUEST; + sb_service_->GetAllChunks(); +} + +void SafeBrowsingProtocolManager::IssueChunkRequest() { + // We are only allowed to have one request outstanding at any time. Also, + // don't get the next url until the previous one has been written to disk so + // that we don't use too much memory. + if (request_.get() || chunk_request_urls_.empty() || chunk_pending_to_write_) + return; + + ChunkUrl next_chunk = chunk_request_urls_.front(); + DCHECK(!next_chunk.url.empty()); + if (!StartsWithASCII(next_chunk.url, "http://", false) && + !StartsWithASCII(next_chunk.url, "https://", false)) + next_chunk.url = "http://" + next_chunk.url; + GURL chunk_url(next_chunk.url); + request_type_ = CHUNK_REQUEST; + request_.reset(new URLFetcher(chunk_url, URLFetcher::GET, this)); + request_->set_load_flags(net::LOAD_DISABLE_CACHE); + request_->set_request_context(Profile::GetDefaultRequestContext()); + request_->Start(); +} + +void SafeBrowsingProtocolManager::IssueKeyRequest() { + GURL key_url(StringPrintf(kSbNewKeyUrl, + kSbClientName, + kSbClientMajorVersion, + kSbClientMinorVersion)); + request_type_ = GETKEY_REQUEST; + request_.reset(new URLFetcher(key_url, URLFetcher::GET, this)); + request_->set_load_flags(net::LOAD_DISABLE_CACHE); + request_->set_request_context(Profile::GetDefaultRequestContext()); + request_->Start(); +} + +void SafeBrowsingProtocolManager::OnGetChunksComplete( + const std::vector<SBListChunkRanges>& lists, bool database_error) { + DCHECK(request_type_ == UPDATE_REQUEST); + + if (database_error) { + ScheduleNextUpdate(false); + return; + } + + const bool use_mac = !client_key_.empty(); + + // Format our stored chunks: + std::string list_data; + bool found_malware = false; + bool found_phishing = false; + for (size_t i = 0; i < lists.size(); ++i) { + list_data.append(FormatList(lists[i], use_mac)); + if (lists[i].name == "goog-phish-shavar") + found_phishing = true; + + if (lists[i].name == "goog-malware-shavar") + found_malware = true; + } + + // If we have an empty database, let the server know we want data for these + // lists. + if (!found_phishing) + list_data.append(FormatList(SBListChunkRanges("goog-phish-shavar"), + use_mac)); + + if (!found_malware) + list_data.append(FormatList(SBListChunkRanges("goog-malware-shavar"), + use_mac)); + + std::string url = StringPrintf(kSbUpdateUrl, + kSbClientName, + kSbClientMajorVersion, + kSbClientMinorVersion); + if (use_mac) { + url.append("&wrkey="); + url.append(wrapped_key_); + } + + GURL update_url(url); + request_.reset(new URLFetcher(update_url, URLFetcher::POST, this)); + request_->set_load_flags(net::LOAD_DISABLE_CACHE); + request_->set_request_context(Profile::GetDefaultRequestContext()); + request_->set_upload_data("text/plain", list_data); + request_->Start(); +} + +void SafeBrowsingProtocolManager::OnChunkInserted() { + chunk_pending_to_write_ = false; + + if (chunk_request_urls_.empty()) { + UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_); + } else { + IssueChunkRequest(); + } +} + +// static +std::string SafeBrowsingProtocolManager::FormatList( + const SBListChunkRanges& list, bool use_mac) { + std::string formatted_results; + formatted_results.append(list.name); + formatted_results.append(";"); + if (!list.adds.empty()) { + formatted_results.append("a:" + list.adds); + if (!list.subs.empty() || use_mac) + formatted_results.append(":"); + } + if (!list.subs.empty()) { + formatted_results.append("s:" + list.subs); + if (use_mac) + formatted_results.append(":"); + } + if (use_mac) + formatted_results.append("mac"); + formatted_results.append("\n"); + + return formatted_results; +} + +void SafeBrowsingProtocolManager::HandleReKey() { + client_key_.clear(); + wrapped_key_.clear(); + IssueKeyRequest(); +} + +void SafeBrowsingProtocolManager::HandleGetHashError() { + int next = GetNextBackOffTime(&gethash_error_count_, &gethash_back_off_mult_); + next_gethash_time_ = Time::Now() + TimeDelta::FromSeconds(next); +} diff --git a/chrome/browser/safe_browsing/protocol_manager.h b/chrome/browser/safe_browsing/protocol_manager.h new file mode 100644 index 0000000..e4d9fc5 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_manager.h @@ -0,0 +1,232 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__ +#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__ + +// A class that implements Chrome's interface with the SafeBrowsing protocol. +// The SafeBrowsingProtocolManager handles formatting and making requests of, +// and handling responses from, Google's SafeBrowsing servers. This class uses +// The SafeBrowsingProtocolParser class to do the actual parsing. + +#include <deque> +#include <hash_map> +#include <string> +#include <vector> + +#include "base/scoped_ptr.h" +#include "base/time.h" +#include "chrome/browser/url_fetcher.h" +#include "chrome/browser/safe_browsing/chunk_range.h" +#include "chrome/browser/safe_browsing/protocol_parser.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" +#include "net/url_request/url_request.h" + +class MessageLoop; +class Task; +class Timer; + + +class SafeBrowsingProtocolManager : public URLFetcher::Delegate { + // Testing friends: + friend class SafeBrowsingProtocolManagerTest_TestBackOffTimes_Test; + friend class SafeBrowsingProtocolManagerTest_TestChunkStrings_Test; + friend class SafeBrowsingProtocolManagerTest_TestGetHashBackOffTimes_Test; + + public: + SafeBrowsingProtocolManager(SafeBrowsingService* sb_service, + MessageLoop* notify_loop, + const std::string& client_key, + const std::string& wrapped_key); + ~SafeBrowsingProtocolManager(); + + // Set up the update schedule and internal state for making periodic requests + // of the SafeBrowsing service. + void Initialize(); + + // URLFetcher::Delegate interface. + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + // API used by the SafeBrowsingService for issuing queries. When the results + // are available, SafeBrowsingService::HandleGetHashResults is called. + void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check, + const std::vector<SBPrefix>& prefixes); + + // Scheduled update callback. + void GetNextUpdate(); + + // Called by the SafeBrowsingService when our request for a list of all chunks + // for each list is done. If database_error is true, that means the protocol + // manager shouldn't fetch updates since they can't be written to disk. It + // should try again later to open the database. + void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list, + bool database_error); + + // Called after the chunks that were parsed were inserted in the database. + void OnChunkInserted(); + + // The last time we received an update. + Time last_update() const { return last_update_; } + + private: + // Internal API for fetching information from the SafeBrowsing servers. The + // GetHash requests are higher priority since they can block user requests + // so are handled separately. + enum SafeBrowsingRequestType { + NO_REQUEST = 0, // No requests in progress + UPDATE_REQUEST, // Request for redirect URLs + CHUNK_REQUEST, // Request for a specific chunk + GETKEY_REQUEST // Update the client's MAC key + }; + + // Returns the time (in milliseconds) for the next update request. If + // 'back_off' is true, the time returned will increment an error count and + // return the appriate next time (see ScheduleNextUpdate below). + int GetNextUpdateTime(bool back_off); + + // Worker function for calculating GetHash and Update backoff times (in + // seconds). 'Multiplier' is doubled for each consecutive error between the + // 2nd and 5th, and 'error_count' is incremented with each call. + int GetNextBackOffTime(int* error_count, int* multiplier); + + // Manage our update with the next allowable update time. If 'back_off_' is + // true, we must decrease the frequency of requests of the SafeBrowsing + // service according to section 5 of the protocol specification. + void ScheduleNextUpdate(bool back_off); + + // Send a request for a list of chunks we should download to the SafeBrowsing + // servers. In order to format this request, we need to send all the chunk + // numbers for each list that we have to the server. Getting the chunk numbers + // requires a database query (run on the database thread), and the request + // is sent upon completion of that query in OnGetChunksComplete. + void IssueUpdateRequest(); + + // Send a request for a chunk to the SafeBrowsing servers. + void IssueChunkRequest(); + + // Get a key from the SafeBrowsing servers for use with MAC. This should only + // be called once per client unless the server directly tells us to update. + void IssueKeyRequest(); + + // Format a string returned from the database into: + // "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n" + static std::string FormatList(const SBListChunkRanges& list, bool use_mac); + + // Run the protocol parser on received data and update the SafeBrowsingService + // with the new content. Returns 'true' on successful parse, 'false' on error. + bool HandleServiceResponse(const GURL& url, const char* data, int length); + + // If the SafeBrowsing service wants us to re-key, we clear our key state and + // issue the request. + void HandleReKey(); + + // Update internal state for each GetHash response error. + void HandleGetHashError(); + + private: + // Main SafeBrowsing interface object. + SafeBrowsingService* sb_service_; + + // Current active request (in case we need to cancel) for updates or chunks + // from the SafeBrowsing service. We can only have one of these outstanding + // at any given time unlike GetHash requests, which are tracked separately. + scoped_ptr<URLFetcher> request_; + + // The kind of request that is currently in progress. + SafeBrowsingRequestType request_type_; + + // The number of HTTP response errors, used for request backoff timing. + int update_error_count_; + int gethash_error_count_; + + // Multipliers which double (max == 8) for each error after the second. + int update_back_off_mult_; + int gethash_back_off_mult_; + + // Multiplier between 0 and 1 to spread clients over an interval. + float back_off_fuzz_; + + // The list for which we are make a request. + std::string list_name_; + + // For managing the next earliest time to query the SafeBrowsing servers for + // updates. + int next_update_sec_; + scoped_ptr<Task> update_task_; + scoped_ptr<Timer> update_timer_; + + // All chunk requests that need to be made, along with their MAC. + std::deque<ChunkUrl> chunk_request_urls_; + + // Map of GetHash requests. + typedef stdext::hash_map<const URLFetcher*, + SafeBrowsingService::SafeBrowsingCheck*> HashRequests; + HashRequests hash_requests_; + + // The next scheduled update has special behavior for the first 2 requests. + enum UpdateRequestState { + FIRST_REQUEST = 0, + SECOND_REQUEST, + NORMAL_REQUEST + }; + UpdateRequestState update_state_; + + // We'll attempt to get keys once per browser session if we don't already have + // them. They are not essential to operation, but provide a layer of + // verification. + bool initial_request_; + + // True if the service has been given an add/sub chunk but it hasn't been + // added to the database yet. + bool chunk_pending_to_write_; + + // Message loop for forwarding MAC keys to the SafeBrowsingService for + // storage. + MessageLoop* notify_loop_; + + // The keys used for MAC. Empty keys mean we aren't using MAC. + std::string client_key_; + std::string wrapped_key_; + + // The last time we successfully received an update. + Time last_update_; + + // While in GetHash backoff, we can't make another GetHash until this time. + Time next_gethash_time_; + + DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolManager); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__ diff --git a/chrome/browser/safe_browsing/protocol_manager_unittest.cc b/chrome/browser/safe_browsing/protocol_manager_unittest.cc new file mode 100644 index 0000000..b2abbb8 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_manager_unittest.cc @@ -0,0 +1,158 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "base/logging.h" +#include "base/time.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "chrome/browser/safe_browsing/protocol_manager.h" + + +// Ensure that we respect section 5 of the SafeBrowsing protocol specification. +TEST(SafeBrowsingProtocolManagerTest, TestBackOffTimes) { + SafeBrowsingProtocolManager pm(NULL, NULL, "", ""); + pm.next_update_sec_ = 1800; + DCHECK(pm.back_off_fuzz_ >= 0.0 && pm.back_off_fuzz_ <= 1.0); + + // No errors received so far. + EXPECT_EQ(pm.GetNextUpdateTime(false), 1800 * 1000); + + // 1 error. + EXPECT_EQ(pm.GetNextUpdateTime(true), 60 * 1000); + + // 2 errors. + int next_time = pm.GetNextUpdateTime(true) / (60 * 1000); // Minutes + EXPECT_TRUE(next_time >= 30 && next_time <= 60); + + // 3 errors. + next_time = pm.GetNextUpdateTime(true) / (60 * 1000); + EXPECT_TRUE(next_time >= 60 && next_time <= 120); + + // 4 errors. + next_time = pm.GetNextUpdateTime(true) / (60 * 1000); + EXPECT_TRUE(next_time >= 120 && next_time <= 240); + + // 5 errors. + next_time = pm.GetNextUpdateTime(true) / (60 * 1000); + EXPECT_TRUE(next_time >= 240 && next_time <= 480); + + // 6 errors, reached max backoff. + EXPECT_EQ(pm.GetNextUpdateTime(true), 480 * 60 * 1000); + + // 7 errors. + EXPECT_EQ(pm.GetNextUpdateTime(true), 480 * 60 * 1000); + + // Received a successful response. + EXPECT_EQ(pm.GetNextUpdateTime(false), 1800 * 1000); +} + +// Test string combinations with and without MAC. +TEST(SafeBrowsingProtocolManagerTest, TestChunkStrings) { + SafeBrowsingProtocolManager pm(NULL, NULL, "", ""); + + // Add and Sub chunks. + SBListChunkRanges phish("goog-phish-shavar"); + phish.adds = "1,4,6,8-20,99"; + phish.subs = "16,32,64-96"; + EXPECT_EQ(pm.FormatList(phish, false), + "goog-phish-shavar;a:1,4,6,8-20,99:s:16,32,64-96\n"); + EXPECT_EQ(pm.FormatList(phish, true), + "goog-phish-shavar;a:1,4,6,8-20,99:s:16,32,64-96:mac\n"); + + // Add chunks only. + phish.subs = ""; + EXPECT_EQ(pm.FormatList(phish, false), + "goog-phish-shavar;a:1,4,6,8-20,99\n"); + EXPECT_EQ(pm.FormatList(phish, true), + "goog-phish-shavar;a:1,4,6,8-20,99:mac\n"); + + // Sub chunks only. + phish.adds = ""; + phish.subs = "16,32,64-96"; + EXPECT_EQ(pm.FormatList(phish, false), "goog-phish-shavar;s:16,32,64-96\n"); + EXPECT_EQ(pm.FormatList(phish, true), "goog-phish-shavar;s:16,32,64-96:mac\n"); + + // No chunks of either type. + phish.adds = ""; + phish.subs = ""; + EXPECT_EQ(pm.FormatList(phish, false), "goog-phish-shavar;\n"); + EXPECT_EQ(pm.FormatList(phish, true), "goog-phish-shavar;mac\n"); +} + +TEST(SafeBrowsingProtocolManagerTest, TestGetHashBackOffTimes) { + SafeBrowsingProtocolManager pm(NULL, NULL, "", ""); + + // No errors or back off time yet. + EXPECT_EQ(pm.gethash_error_count_, 0); + EXPECT_TRUE(pm.next_gethash_time_.is_null()); + + Time now = Time::Now(); + + // 1 error. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 1); + TimeDelta margin = TimeDelta::FromSeconds(5); // Fudge factor. + Time future = now + TimeDelta::FromMinutes(1); + EXPECT_TRUE(pm.next_gethash_time_ >= future - margin && + pm.next_gethash_time_ <= future + margin); + + // 2 errors. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 2); + EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(30)); + EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(60)); + + // 3 errors. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 3); + EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(60)); + EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(120)); + + // 4 errors. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 4); + EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(120)); + EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(240)); + + // 5 errors. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 5); + EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(240)); + EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(480)); + + // 6 errors, reached max backoff. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 6); + EXPECT_TRUE(pm.next_gethash_time_ == now + TimeDelta::FromMinutes(480)); + + // 7 errors. + pm.HandleGetHashError(); + EXPECT_EQ(pm.gethash_error_count_, 7); + EXPECT_TRUE(pm.next_gethash_time_== now + TimeDelta::FromMinutes(480)); +} diff --git a/chrome/browser/safe_browsing/protocol_parser.cc b/chrome/browser/safe_browsing/protocol_parser.cc new file mode 100644 index 0000000..9f729c6 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_parser.cc @@ -0,0 +1,496 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Parse the data returned from the SafeBrowsing v2.1 protocol response. + +#include <Winsock2.h> // for htonl + +#include "chrome/browser/safe_browsing/protocol_parser.h" + +#include "base/logging.h" +#include "base/string_util.h" + +namespace { +// Helper function for quick scans of a line oriented protocol. Note that we use +// std::string::assign(const charT* s, size_type n) +// to copy data into 'line'. This form of 'assign' does not call strlen on +// 'input', which is binary data and is not NULL terminated. 'input' may also +// contain valid NULL bytes in the payload, which a strlen based copy would +// truncate. +bool GetLine(const char* input, int input_len, std::string* line) { + const char* pos = input; + while (pos && (pos - input < input_len)) { + if (*pos == '\n') { + line->assign(input, pos - input); + return true; + } + ++pos; + } + return false; +} +} + +//------------------------------------------------------------------------------ +// SafeBrowsingParser implementation + +SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() { +} + +bool SafeBrowsingProtocolParser::ParseGetHash( + const char* chunk_data, + int chunk_len, + const std::string& key, + bool* re_key, + std::vector<SBFullHashResult>* full_hashes) { + full_hashes->clear(); + int length = chunk_len; + const char* data = chunk_data; + + int offset; + std::string line; + if (!key.empty()) { + if (!GetLine(data, length, &line)) + return false; // Error! Bad GetHash result. + + if (line == "e:pleaserekey") { + *re_key = true; + return true; + } + + offset = static_cast<int>(line.size()) + 1; + data += offset; + length -= offset; + + if (!safe_browsing_util::VerifyMAC(key, line, data, length)) + return false; + } + + while (length > 0) { + if (!GetLine(data, length, &line)) + return false; + + offset = static_cast<int>(line.size()) + 1; + data += offset; + length -= offset; + + std::vector<std::string> cmd_parts; + SplitString(line, ':', &cmd_parts); + if (cmd_parts.size() != 3) + return false; + + SBFullHashResult full_hash; + full_hash.list_name = cmd_parts[0]; + full_hash.add_chunk_id = atoi(cmd_parts[1].c_str()); + int full_hash_len = atoi(cmd_parts[2].c_str()); + + while (full_hash_len > 0) { + DCHECK(full_hash_len >= sizeof(SBFullHash)); + memcpy(&full_hash.hash, data, sizeof(SBFullHash)); + full_hashes->push_back(full_hash); + data += sizeof(SBFullHash); + length -= sizeof(SBFullHash); + full_hash_len -= sizeof(SBFullHash); + } + } + + return length == 0; +} + +void SafeBrowsingProtocolParser::FormatGetHash( + const std::vector<SBPrefix>& prefixes, std::string* request) { + DCHECK(request); + + // Format the request for GetHash. + request->append(StringPrintf("%d:%d\n", + sizeof(SBPrefix), + sizeof(SBPrefix) * prefixes.size())); + for (size_t i = 0; i < prefixes.size(); ++i) { + request->append(reinterpret_cast<const char*>(&prefixes[i]), + sizeof(SBPrefix)); + } +} + +bool SafeBrowsingProtocolParser::ParseUpdate( + const char* chunk_data, + int chunk_len, + const std::string& key, + int* next_update_sec, + bool* re_key, + bool* reset, + std::vector<SBChunkDelete>* deletes, + std::vector<ChunkUrl>* chunk_urls) { + DCHECK(next_update_sec); + DCHECK(deletes); + DCHECK(chunk_urls); + + int length = chunk_len; + const char* data = chunk_data; + + // Populated below. + std::string list_name; + + while (length > 0) { + std::string cmd_line; + if (!GetLine(data, length, &cmd_line)) + return false; // Error: bad list format! + + std::vector<std::string> cmd_parts; + SplitString(cmd_line, ':', &cmd_parts); + if (cmd_parts.empty()) + return false; + const std::string& command = cmd_parts[0]; + if (cmd_parts.size() != 2 && !(cmd_parts.size() == 3 && command[0] == 'u')) + return false; + + const int consumed = static_cast<int>(cmd_line.size()) + 1; + data += consumed; + length -= consumed; + if (length < 0) + return false; // Parsing error. + + // Differentiate on the first character of the command (which is usually + // only one character, with the exception of the 'ad' and 'sd' commands). + switch (command[0]) { + case 'a': + case 's': { + // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must + // have also parsed the list name before getting here, or the add-del + // or sub-del will have no context. + if (command.size() != 2 || command[1] != 'd' || list_name.empty()) + return false; + SBChunkDelete chunk_delete; + chunk_delete.is_sub_del = command[0] == 's'; + StringToRanges(cmd_parts[1], &chunk_delete.chunk_del); + chunk_delete.list_name = list_name; + deletes->push_back(chunk_delete); + break; + } + + case 'e': + if (cmd_parts[1] != "pleaserekey") + return false; + *re_key = true; + break; + + case 'i': + // The line providing the name of the list (i.e. 'goog-phish-shavar'). + list_name = cmd_parts[1]; + break; + + case 'm': + // Verify that the MAC of the remainer of this chunk is what we expect. + if (!key.empty() && + !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length)) + return false; + break; + + case 'n': + // The line providing the next earliest time (in seconds) to re-query. + *next_update_sec = atoi(cmd_parts[1].c_str()); + break; + + case 'u': { + // The line providing a URL redirect to a chunk. + std::string redirect_url = cmd_parts[1]; + if (cmd_parts.size() == 3) { + redirect_url += ':' + cmd_parts[2]; + } + + std::string mac; + if (!key.empty()) { + std::string::size_type mac_pos = redirect_url.rfind(','); + if (mac_pos == std::string::npos) + return false; + mac = redirect_url.substr(mac_pos + 1); + redirect_url = redirect_url.substr(0, mac_pos); + } + ChunkUrl chunk_url; + chunk_url.url = redirect_url; + if (!key.empty()) + chunk_url.mac = mac; + chunk_urls->push_back(chunk_url); + break; + } + + case 'r': + if (cmd_parts[1] != "pleasereset") + return false; + *reset = true; + break; + + default: + // A command we don't understand. + return false; + } + } + + return true; +} + +bool SafeBrowsingProtocolParser::ParseChunk(const char* data, + int length, + const std::string& key, + const std::string& mac, + bool* re_key, + std::deque<SBChunk>* chunks) { + int remaining = length; + const char* chunk_data = data; + + if (!key.empty() && + !safe_browsing_util::VerifyMAC(key, mac, data, length)) { + return false; + } + + while (remaining > 0) { + std::string cmd_line; + if (!GetLine(chunk_data, length, &cmd_line)) + return false; // Error: bad chunk format! + + const int line_len = static_cast<int>(cmd_line.length()) + 1; + std::vector<std::string> cmd_parts; + SplitString(cmd_line, ':', &cmd_parts); + + // Handle a possible re-key command. + if (cmd_parts.size() != 4) { + if (cmd_parts.size() == 2 && + cmd_parts[0] == "e" && + cmd_parts[1] == "pleaserekey") { + *re_key = true; + chunk_data += line_len; + remaining -= line_len; + continue; + } + return false; + } + + // Process the chunk data. + const int chunk_number = atoi(cmd_parts[1].c_str()); + const int hash_len = atoi(cmd_parts[2].c_str()); + if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) { + SB_DLOG(INFO) << "ParseChunk got unknown hashlen " << hash_len; + return false; + } + + const int chunk_len = atoi(cmd_parts[3].c_str()); + chunk_data += line_len; + remaining -= line_len; + + chunks->push_back(SBChunk()); + chunks->back().chunk_number = chunk_number; + + if (cmd_parts[0] == "a") { + if (!ParseAddChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts)) + return false; // Parse error. + } else if (cmd_parts[0] == "s") { + if (!ParseSubChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts)) + return false; // Parse error. + } else { + NOTREACHED(); + return false; + } + + chunk_data += chunk_len; + remaining -= chunk_len; + if (remaining < 0) + return false; // Parse error. + } + + DCHECK(remaining == 0); + + return true; +} + +bool SafeBrowsingProtocolParser::ParseAddChunk( + const char* data, int data_len, int hash_len, + std::deque<SBChunkHost>* hosts) { + + int remaining = data_len; + const char* chunk_data = data; + const int min_size = sizeof(SBPrefix) + 1; + + while (remaining >= min_size) { + SBPrefix host; + int prefix_count; + ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); + SBEntry::Type type = hash_len == sizeof(SBPrefix) ? + SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH; + SBEntry* entry; + int index_start = 0; + + // If a host has more than 255 prefixes, then subsequent entries are used. + // Check if this is the case, and if so put them in one SBEntry since the + // database code assumes that all prefixes from the same host and chunk are + // in one SBEntry. + if (!hosts->empty() && hosts->back().host == host && + hosts->back().entry->HashLen() == hash_len) { + // Reuse the SBChunkHost, but need to create a new SBEntry since we have + // more prefixes. + index_start = hosts->back().entry->prefix_count(); + entry = hosts->back().entry->Enlarge(prefix_count); + hosts->back().entry = entry; + } else { + entry = SBEntry::Create(type, prefix_count); + SBChunkHost chunk_host; + chunk_host.host = host; + chunk_host.entry = entry; + hosts->push_back(chunk_host); + } + + if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, index_start)) + return false; + } + + return remaining == 0; +} + +bool SafeBrowsingProtocolParser::ParseSubChunk( + const char* data, int data_len, int hash_len, + std::deque<SBChunkHost>* hosts) { + + int remaining = data_len; + const char* chunk_data = data; + const int min_size = 2 * sizeof(SBPrefix) + 1; + + while (remaining >= min_size) { + SBPrefix host; + int prefix_count; + ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); + SBEntry::Type type = hash_len == sizeof(SBPrefix) ? + SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH; + SBEntry* entry = SBEntry::Create(type, prefix_count); + + SBChunkHost chunk_host; + chunk_host.host = host; + chunk_host.entry = entry; + hosts->push_back(chunk_host); + + if (prefix_count == 0) { + // There is only an add chunk number (no prefixes). + entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining)); + continue; + } + + if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, 0)) + return false; + } + + return remaining == 0; +} + + +void SafeBrowsingProtocolParser::ReadHostAndPrefixCount( + const char** data, int* remaining, SBPrefix* host, int* count) { + // Next 4 bytes are the host prefix. + memcpy(host, *data, sizeof(SBPrefix)); + *data += sizeof(SBPrefix); + *remaining -= sizeof(SBPrefix); + + // Next 1 byte is the prefix count (could be zero, but never negative). + *count = static_cast<unsigned char>(**data); + *data += 1; + *remaining -= 1; +} + +int SafeBrowsingProtocolParser::ReadChunkId( + const char** data, int* remaining) { + int chunk_number; + memcpy(&chunk_number, *data, sizeof(chunk_number)); + *data += sizeof(chunk_number); + *remaining -= sizeof(chunk_number); + return htonl(chunk_number); +} + +bool SafeBrowsingProtocolParser::ReadPrefixes( + const char** data, int* remaining, SBEntry* entry, int count, + int index_start) { + int hash_len = entry->HashLen(); + for (int i = 0; i < count; ++i) { + if (entry->IsSub()) { + entry->SetChunkIdAtPrefix(index_start + i, ReadChunkId(data, remaining)); + if (*remaining <= 0) + return false; + } + + if (hash_len == sizeof(SBPrefix)) { + entry->SetPrefixAt(index_start + i, + *reinterpret_cast<const SBPrefix*>(*data)); + } else { + entry->SetFullHashAt(index_start + i, + *reinterpret_cast<const SBFullHash*>(*data)); + } + *data += hash_len; + *remaining -= hash_len; + if (*remaining < 0) + return false; + } + + return true; +} + +bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data, + int chunk_length, + std::string* client_key, + std::string* wrapped_key) { + DCHECK(client_key && wrapped_key); + client_key->clear(); + wrapped_key->clear(); + + const char* data = chunk_data; + int remaining = chunk_length; + + while (remaining > 0) { + std::string line; + if (!GetLine(data, remaining, &line)) + return false; + + std::vector<std::string> cmd_parts; + SplitString(line, ':', &cmd_parts); + if (cmd_parts.size() != 3) + return false; + + if (cmd_parts[2].size() != atoi(cmd_parts[1].c_str())) + return false; + + if (cmd_parts[0] == "clientkey") { + client_key->assign(cmd_parts[2]); + } else if (cmd_parts[0] == "wrappedkey") { + wrapped_key->assign(cmd_parts[2]); + } else { + return false; + } + + data += line.size() + 1; + remaining -= static_cast<int>(line.size()) + 1; + } + + if (client_key->empty() || wrapped_key->empty()) + return false; + + return true; +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/protocol_parser.h b/chrome/browser/safe_browsing/protocol_parser.h new file mode 100644 index 0000000..802b5e5 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_parser.h @@ -0,0 +1,153 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__ +#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__ + +// Parse the data returned from the chunk response. +// +// Based on the SafeBrowsing v2.1 protocol: +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec +// +// Read the response from a SafeBrowsing request, and parse into useful pieces. +// The protocol is generally line oriented, but can contain binary data in the +// actual chunk responses. The consumer of the protocol data should instantiate +// the parser and call the appropriate parsing function on the data. +// +// Examples of protocol responses: +// +// 1. List identification +// i:goog-phish-shavar\n +// <command>:<command_data>\n +// +// 2. Minimum time to wait (seconds) until the next download request can be made +// n:1200\n +// <command>:<time_in_seconds>\n +// +// 3. Redirect URL for retrieving a chunk +// u:cache.googlevideo.com/safebrowsing/rd/goog-phish-shavar_a_1\n +// <command>:<url>\n +// +// 4. Add and sub chunks +// a:1:4:523\n... <-- Add chunk + binary data +// s:13:4:17\n... <-- Sub chunk + binary data +// <chunk_type>:<chunk_number>:<prefix_len>:<chunk_bytes>\n<binary_data> +// +// 5. Add-del and sub-del requests +// ad:1-4000,5001\n <-- Add-del +// sd:1,3,5,7,903\n <-- Sub-del +// <command>:<chunk_range>\n + + +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "chrome/browser/safe_browsing/chunk_range.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" + + +class SafeBrowsingProtocolParser { + public: + SafeBrowsingProtocolParser(); + + // Parse the response of an update request. Results for chunk deletions (both + // add-del and sub-del are returned in 'chunk_deletes', and new chunk URLs to + // download are contained in 'chunk_urls'. The next time the client is allowed + // to request another update is returned in 'next_update_sec'. If the service + // wants us to retrieve new MAC keys, 're_key' will be set to true. If we are + // using MACs to verify responses, the 'key' must be set to the private key + // returned from the SafeBrowsing servers. 'reset' will be set to true if the + // SafeBrowsing service wants us to dump our database. + // Returns 'true'if it was able to decode the chunk properly, 'false' if not + // decoded properly and the results should be ignored. + bool ParseUpdate(const char* chunk_data, + int chunk_len, + const std::string& key, + int* next_update_sec, + bool* re_key, + bool* reset, + std::vector<SBChunkDelete>* chunk_deletes, + std::vector<ChunkUrl>* chunk_urls); + + // Parse the response from a chunk URL request and returns the hosts/prefixes + // for adds and subs in "chunks". Returns 'true' on successful parsing, + // 'false' otherwise. Any result should be ignored when a parse has failed. + bool ParseChunk(const char* chunk_data, + int chunk_len, + const std::string& key, + const std::string& mac, + bool* re_key, + std::deque<SBChunk>* chunks); + + // Parse the result of a GetHash request, returning the list of full hashes. + // If we are checking for valid MACs, the caller should populate 'key'. + bool ParseGetHash(const char* chunk_data, + int chunk_len, + const std::string& key, + bool* re_key, + std::vector<SBFullHashResult>* full_hashes); + + // Convert a list of partial hashes into a proper GetHash request. + void FormatGetHash(const std::vector<SBPrefix>& prefixes, + std::string* request); + + // Parse the keys used for subsequent communications with the SafeBrowsing + // servers. Returns true on successful parse, false on parse error. + bool ParseNewKey(const char* chunk_data, + int chunk_length, + std::string* client_key, + std::string* wrapped_key); + + private: + bool ParseAddChunk(const char* data, + int data_len, + int hash_len, + std::deque<SBChunkHost>* hosts); + bool ParseSubChunk(const char* data, + int data_len, + int hash_len, + std::deque<SBChunkHost>* hosts); + + // Helper functions used by ParseAddChunk and ParseSubChunk. + static void ReadHostAndPrefixCount( + const char** data, int* remaining, SBPrefix* host, int* count); + static int ReadChunkId(const char** data, int* remaining); + static bool ReadPrefixes( + const char** data, int* remaining, SBEntry* entry, int count, + int index_start); + + // The name of the current list + std::string list_name_; + + DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolParser); +}; + + +#endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/protocol_parser_unittest.cc b/chrome/browser/safe_browsing/protocol_parser_unittest.cc new file mode 100644 index 0000000..2366543 --- /dev/null +++ b/chrome/browser/safe_browsing/protocol_parser_unittest.cc @@ -0,0 +1,654 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Program to test the SafeBrowsing protocol parsing v2.1. + +#include <hash_map> + +#include "base/logging.h" +#include "base/string_util.h" +#include "base/win_util.h" +#include "chrome/browser/safe_browsing/protocol_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + + +// Test parsing one add chunk. +TEST(SafeBrowsingProtocolParsingTest, TestAddChunk) { + std::string add_chunk("a:1:4:35\naaaax1111\0032222333344447777\00288889999"); + add_chunk[13] = '\0'; + + // Run the parse. + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(add_chunk.data(), + static_cast<int>(add_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 1); + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), 3); + + EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161); + SBEntry* entry = chunks[0].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 0); + + EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131); + entry = chunks[0].hosts[1].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 3); + EXPECT_EQ(entry->PrefixAt(0), 0x32323232); + EXPECT_EQ(entry->PrefixAt(1), 0x33333333); + EXPECT_EQ(entry->PrefixAt(2), 0x34343434); + + EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737); + entry = chunks[0].hosts[2].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_EQ(entry->PrefixAt(0), 0x38383838); + EXPECT_EQ(entry->PrefixAt(1), 0x39393939); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing one add chunk with full hashes. +TEST(SafeBrowsingProtocolParsingTest, TestAddFullChunk) { + std::string add_chunk("a:1:32:69\naaaa"); + add_chunk.push_back(2); + + SBFullHash full_hash1, full_hash2; + for (int i = 0; i < 32; ++i) { + full_hash1.full_hash[i] = i % 2 ? 1 : 2; + full_hash2.full_hash[i] = i % 2 ? 3 : 4; + } + + add_chunk.append(full_hash1.full_hash, 32); + add_chunk.append(full_hash2.full_hash, 32); + + // Run the parse. + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(add_chunk.data(), + static_cast<int>(add_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 1); + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), 1); + + EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161); + SBEntry* entry = chunks[0].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_FULL_HASH); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_TRUE(entry->FullHashAt(0) == full_hash1); + EXPECT_TRUE(entry->FullHashAt(1) == full_hash2); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing multiple add chunks. We'll use the same chunk as above, and add +// one more after it. +TEST(SafeBrowsingProtocolParsingTest, TestAddChunks) { + std::string add_chunk("a:1:4:35\naaaax1111\0032222333344447777\00288889999" + "a:2:4:13\n5555\002ppppgggg"); + add_chunk[13] = '\0'; + + // Run the parse. + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(add_chunk.data(), + static_cast<int>(add_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 2); + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), 3); + + EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161); + SBEntry* entry = chunks[0].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 0); + + EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131); + entry = chunks[0].hosts[1].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 3); + EXPECT_EQ(entry->PrefixAt(0), 0x32323232); + EXPECT_EQ(entry->PrefixAt(1), 0x33333333); + EXPECT_EQ(entry->PrefixAt(2), 0x34343434); + + EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737); + entry = chunks[0].hosts[2].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_EQ(entry->PrefixAt(0), 0x38383838); + EXPECT_EQ(entry->PrefixAt(1), 0x39393939); + + + EXPECT_EQ(chunks[1].chunk_number, 2); + EXPECT_EQ(chunks[1].hosts.size(), 1); + + EXPECT_EQ(chunks[1].hosts[0].host, 0x35353535); + entry = chunks[1].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_EQ(entry->PrefixAt(0), 0x70707070); + EXPECT_EQ(entry->PrefixAt(1), 0x67676767); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing one add chunk where a hostkey spans several entries. +TEST(SafeBrowsingProtocolParsingTest, TestAddBigChunk) { + std::string add_chunk("a:1:4:1050\naaaaX"); + add_chunk[add_chunk.size() - 1] |= 0xFF; + for (int i = 0; i < 255; ++i) + add_chunk.append(StringPrintf("%04d", i)); + + add_chunk.append("aaaa"); + add_chunk.push_back(5); + for (int i = 0; i < 5; ++i) + add_chunk.append(StringPrintf("001%d", i)); + + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(add_chunk.data(), + static_cast<int>(add_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 1); + EXPECT_EQ(chunks[0].chunk_number, 1); + + EXPECT_EQ(chunks[0].hosts.size(), 1); + + const SBChunkHost& host = chunks[0].hosts[0]; + EXPECT_EQ(host.host, 0x61616161); + EXPECT_EQ(host.entry->prefix_count(), 260); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing one sub chunk. +TEST(SafeBrowsingProtocolParsingTest, TestSubChunk) { + std::string sub_chunk("s:9:4:59\naaaaxkkkk1111\003" + "zzzz2222zzzz3333zzzz4444" + "7777\002yyyy8888yyyy9999"); + sub_chunk[13] = '\0'; + + // Run the parse. + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(sub_chunk.data(), + static_cast<int>(sub_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 1); + EXPECT_EQ(chunks[0].chunk_number, 9); + EXPECT_EQ(chunks[0].hosts.size(), 3); + + EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161); + SBEntry* entry = chunks[0].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX); + EXPECT_EQ(entry->chunk_id(), 0x6b6b6b6b); + EXPECT_EQ(entry->prefix_count(), 0); + + EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131); + entry = chunks[0].hosts[1].entry; + EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX); + EXPECT_EQ(entry->prefix_count(), 3); + EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x7a7a7a7a); + EXPECT_EQ(entry->PrefixAt(0), 0x32323232); + EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x7a7a7a7a); + EXPECT_EQ(entry->PrefixAt(1), 0x33333333); + EXPECT_EQ(entry->ChunkIdAtPrefix(2), 0x7a7a7a7a); + EXPECT_EQ(entry->PrefixAt(2), 0x34343434); + + EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737); + entry = chunks[0].hosts[2].entry; + EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x79797979); + EXPECT_EQ(entry->PrefixAt(0), 0x38383838); + EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x79797979); + EXPECT_EQ(entry->PrefixAt(1), 0x39393939); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing one sub chunk with full hashes. +TEST(SafeBrowsingProtocolParsingTest, TestSubFullChunk) { + std::string sub_chunk("s:1:32:77\naaaa"); + sub_chunk.push_back(2); + + SBFullHash full_hash1, full_hash2; + for (int i = 0; i < 32; ++i) { + full_hash1.full_hash[i] = i % 2 ? 1 : 2; + full_hash2.full_hash[i] = i % 2 ? 3 : 4; + } + + sub_chunk.append("yyyy"); + sub_chunk.append(full_hash1.full_hash, 32); + sub_chunk.append("zzzz"); + sub_chunk.append(full_hash2.full_hash, 32); + + // Run the parse. + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + bool result = parser.ParseChunk(sub_chunk.data(), + static_cast<int>(sub_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_FALSE(re_key); + EXPECT_EQ(chunks.size(), 1); + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), 1); + + EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161); + SBEntry* entry = chunks[0].hosts[0].entry; + EXPECT_EQ(entry->type(), SBEntry::SUB_FULL_HASH); + EXPECT_EQ(entry->prefix_count(), 2); + EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x79797979); + EXPECT_TRUE(entry->FullHashAt(0) == full_hash1); + EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x7a7a7a7a); + EXPECT_TRUE(entry->FullHashAt(1) == full_hash2); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing the SafeBrowsing update response. +TEST(SafeBrowsingProtocolParsingTest, TestChunkDelete) { + std::string add_del("n:1700\ni:phishy\nad:1-7,43-597,44444,99999\n" + "i:malware\nsd:21-27,42,171717\n"); + + SafeBrowsingProtocolParser parser; + int next_query_sec = 0; + bool re_key = false; + bool reset = false; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(add_del.data(), + static_cast<int>(add_del.length()), "", + &next_query_sec, &re_key, + &reset, &deletes, &urls)); + + EXPECT_TRUE(urls.empty()); + EXPECT_FALSE(re_key); + EXPECT_FALSE(reset); + EXPECT_EQ(next_query_sec, 1700); + EXPECT_EQ(deletes.size(), 2); + + EXPECT_EQ(deletes[0].chunk_del.size(), 4); + EXPECT_TRUE(deletes[0].chunk_del[0] == ChunkRange(1, 7)); + EXPECT_TRUE(deletes[0].chunk_del[1] == ChunkRange(43, 597)); + EXPECT_TRUE(deletes[0].chunk_del[2] == ChunkRange(44444)); + EXPECT_TRUE(deletes[0].chunk_del[3] == ChunkRange(99999)); + + EXPECT_EQ(deletes[1].chunk_del.size(), 3); + EXPECT_TRUE(deletes[1].chunk_del[0] == ChunkRange(21, 27)); + EXPECT_TRUE(deletes[1].chunk_del[1] == ChunkRange(42)); + EXPECT_TRUE(deletes[1].chunk_del[2] == ChunkRange(171717)); + + // An update response with missing list name. + + next_query_sec = 0; + deletes.clear(); + urls.clear(); + add_del = "n:1700\nad:1-7,43-597,44444,99999\ni:malware\nsd:4,21-27171717\n"; + EXPECT_FALSE(parser.ParseUpdate(add_del.data(), + static_cast<int>(add_del.length()), "", + &next_query_sec, &re_key, + &reset, &deletes, &urls)); +} + +// Test parsing the SafeBrowsing update response. +TEST(SafeBrowsingProtocolParsingTest, TestRedirects) { + std::string redirects("i:goog-malware-shavar\n" + "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_1\n" + "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_2\n" + "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_3\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689," + "8691-8731,8733-8786\n"); + + SafeBrowsingProtocolParser parser; + int next_query_sec = 0; + bool re_key = false; + bool reset = false; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(redirects.data(), + static_cast<int>(redirects.length()), "", + &next_query_sec, &re_key, + &reset, &deletes, &urls)); + + EXPECT_FALSE(re_key); + EXPECT_FALSE(reset); + EXPECT_EQ(urls.size(), 4); + EXPECT_EQ(urls[0].url, + "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_1"); + EXPECT_EQ(urls[1].url, + "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_2"); + EXPECT_EQ(urls[2].url, + "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_3"); + EXPECT_EQ(urls[3].url, + "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689," + "8691-8731,8733-8786"); + EXPECT_EQ(next_query_sec, 0); + EXPECT_TRUE(deletes.empty()); +} + +TEST(SafeBrowsingProtocolParsingTest, TestRedirectsWithMac) { + std::string redirects("i:goog-phish-shavar\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505," + "pcY6iVeT9-CBQ3fdAF0rpnKjR1Y=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024," + "8026-8045,8048-8049,8051-8134,8136-8152,8155-8160," + "j6XXAEWnjYk9tVVLBSdQvIEq2Wg=\n"); + + SafeBrowsingProtocolParser parser; + int next_query_sec = 0; + bool re_key = false; + bool reset = false; + const std::string key("58Lqn5WIP961x3zuLGo5Uw=="); + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(redirects.data(), + static_cast<int>(redirects.length()), key, + &next_query_sec, &re_key, + &reset, &deletes, &urls)); + + EXPECT_FALSE(re_key); + EXPECT_FALSE(reset); + EXPECT_EQ(urls.size(), 2); + EXPECT_EQ(urls[0].url, + "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505"); + EXPECT_EQ(urls[0].mac, "pcY6iVeT9-CBQ3fdAF0rpnKjR1Y="); + EXPECT_EQ(urls[1].url, + "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024," + "8026-8045,8048-8049,8051-8134,8136-8152,8155-8160"); + EXPECT_EQ(urls[1].mac, "j6XXAEWnjYk9tVVLBSdQvIEq2Wg="); +} + +// Test parsing various SafeBrowsing protocol headers. +TEST(SafeBrowsingProtocolParsingTest, TestNextQueryTime) { + std::string headers("n:1800\ni:goog-white-shavar\n"); + SafeBrowsingProtocolParser parser; + int next_query_sec = 0; + bool re_key = false; + bool reset = false; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(headers.data(), + static_cast<int>(headers.length()), "", + &next_query_sec, &re_key, + &reset, &deletes, &urls)); + + EXPECT_EQ(next_query_sec, 1800); + EXPECT_FALSE(re_key); + EXPECT_FALSE(reset); + EXPECT_TRUE(deletes.empty()); + EXPECT_TRUE(urls.empty()); +} + +// Test parsing data from a GetHashRequest +TEST(SafeBrowsingProtocolParsingTest, TestGetHash) { + std::string get_hash("goog-phish-shavar:19:96\n" + "00112233445566778899aabbccddeeff" + "00001111222233334444555566667777" + "ffffeeeeddddccccbbbbaaaa99998888"); + std::vector<SBFullHashResult> full_hashes; + bool re_key = false; + SafeBrowsingProtocolParser parser; + parser.ParseGetHash(get_hash.data(), + static_cast<int>(get_hash.length()), "", + &re_key, + &full_hashes); + + EXPECT_FALSE(re_key); + EXPECT_EQ(full_hashes.size(), 3); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + "00112233445566778899aabbccddeeff", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[0].list_name, "goog-phish-shavar"); + EXPECT_EQ(memcmp(&full_hashes[1].hash, + "00001111222233334444555566667777", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[1].list_name, "goog-phish-shavar"); + EXPECT_EQ(memcmp(&full_hashes[2].hash, + "ffffeeeeddddccccbbbbaaaa99998888", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[2].list_name, "goog-phish-shavar"); + + // Test multiple lists in the GetHash results. + std::string get_hash2("goog-phish-shavar:19:32\n" + "00112233445566778899aabbccddeeff" + "goog-malware-shavar:19:64\n" + "cafebeefcafebeefdeaddeaddeaddead" + "zzzzyyyyxxxxwwwwvvvvuuuuttttssss"); + parser.ParseGetHash(get_hash2.data(), + static_cast<int>(get_hash2.length()), "", + &re_key, + &full_hashes); + + EXPECT_FALSE(re_key); + EXPECT_EQ(full_hashes.size(), 3); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + "00112233445566778899aabbccddeeff", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[0].list_name, "goog-phish-shavar"); + EXPECT_EQ(memcmp(&full_hashes[1].hash, + "cafebeefcafebeefdeaddeaddeaddead", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[1].list_name, "goog-malware-shavar"); + EXPECT_EQ(memcmp(&full_hashes[2].hash, + "zzzzyyyyxxxxwwwwvvvvuuuuttttssss", + sizeof(SBFullHash)), 0); + EXPECT_EQ(full_hashes[2].list_name, "goog-malware-shavar"); +} + +TEST(SafeBrowsingProtocolParsingTest, TestGetHashWithMac) { + // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until + // this bug is fixed. + if (win_util::GetWinVersion() <= win_util::WINVERSION_2000) + return; + + const unsigned char get_hash[] = { + 0x32, 0x56, 0x74, 0x6f, 0x6b, 0x36, 0x64, 0x41, + 0x51, 0x72, 0x65, 0x51, 0x62, 0x38, 0x51, 0x68, + 0x59, 0x45, 0x57, 0x51, 0x57, 0x4d, 0x52, 0x65, + 0x42, 0x63, 0x41, 0x3d, 0x0a, 0x67, 0x6f, 0x6f, + 0x67, 0x2d, 0x70, 0x68, 0x69, 0x73, 0x68, 0x2d, + 0x73, 0x68, 0x61, 0x76, 0x61, 0x72, 0x3a, 0x36, + 0x31, 0x36, 0x39, 0x3a, 0x33, 0x32, 0x0a, 0x17, + 0x7f, 0x03, 0x42, 0x28, 0x1c, 0x31, 0xb9, 0x0b, + 0x1c, 0x7b, 0x9d, 0xaf, 0x7b, 0x43, 0x99, 0x10, + 0xc1, 0xab, 0xe3, 0x1b, 0x35, 0x80, 0x38, 0x96, + 0xf9, 0x44, 0x4f, 0x28, 0xb4, 0xeb, 0x45 + }; + + const unsigned char hash_result [] = { + 0x17, 0x7f, 0x03, 0x42, 0x28, 0x1c, 0x31, 0xb9, + 0x0b, 0x1c, 0x7b, 0x9d, 0xaf, 0x7b, 0x43, 0x99, + 0x10, 0xc1, 0xab, 0xe3, 0x1b, 0x35, 0x80, 0x38, + 0x96, 0xf9, 0x44, 0x4f, 0x28, 0xb4, 0xeb, 0x45 + }; + + const std::string key = "58Lqn5WIP961x3zuLGo5Uw=="; + std::vector<SBFullHashResult> full_hashes; + bool re_key = false; + SafeBrowsingProtocolParser parser; + EXPECT_TRUE(parser.ParseGetHash(reinterpret_cast<const char*>(get_hash), + sizeof(get_hash), + key, + &re_key, + &full_hashes)); + EXPECT_FALSE(re_key); + EXPECT_EQ(full_hashes.size(), 1); + EXPECT_EQ(memcmp(hash_result, &full_hashes[0].hash, sizeof(SBFullHash)), 0); +} + + +TEST(SafeBrowsingProtocolParsingTest, TestFormatHash) { + SafeBrowsingProtocolParser parser; + std::vector<SBPrefix> prefixes; + std::string get_hash; + + prefixes.push_back(0x34333231); + prefixes.push_back(0x64636261); + prefixes.push_back(0x73727170); + + parser.FormatGetHash(prefixes, &get_hash); + EXPECT_EQ(get_hash, "4:12\n1234abcdpqrs"); +} + +TEST(SafeBrowsingProtocolParsingTest, TestGetKey) { + SafeBrowsingProtocolParser parser; + std::string key_response("clientkey:10:0123456789\n" + "wrappedkey:20:abcdefghijklmnopqrst\n"); + + std::string client_key, wrapped_key; + EXPECT_TRUE(parser.ParseNewKey(key_response.data(), + static_cast<int>(key_response.length()), + &client_key, + &wrapped_key)); + + EXPECT_EQ(client_key, "0123456789"); + EXPECT_EQ(wrapped_key, "abcdefghijklmnopqrst"); +} + +TEST(SafeBrowsingProtocolParsingTest, TestReKey) { + SafeBrowsingProtocolParser parser; + std::string update("n:1800\ni:phishy\ne:pleaserekey\n"); + + bool re_key = false; + bool reset = false; + int next_update = -1; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(update.data(), + static_cast<int>(update.size()), "", + &next_update, &re_key, + &reset, &deletes, &urls)); + EXPECT_TRUE(re_key); +} + +TEST(SafeBrowsingProtocolParsingTest, TestReset) { + SafeBrowsingProtocolParser parser; + std::string update("n:1800\ni:phishy\nr:pleasereset\n"); + + bool re_key = false; + bool reset = false; + int next_update = -1; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + EXPECT_TRUE(parser.ParseUpdate(update.data(), + static_cast<int>(update.size()), "", + &next_update, &re_key, + &reset, &deletes, &urls)); + EXPECT_TRUE(reset); +} + +TEST(SafeBrowsingProtocolParsingTest, TestVerifyUpdateMac) { + // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until + // this bug is fixed. + if (win_util::GetWinVersion() <= win_util::WINVERSION_2000) + return; + + SafeBrowsingProtocolParser parser; + + const std::string update = + "m:XIU0LiQhAPJq6dynXwHbygjS5tw=\n" + "n:1895\n" + "i:goog-phish-shavar\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505,pcY6iVeT9-CBQ3fdAF0rpnKjR1Y=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6506-6510:6506-6510,SDBrYC3rX3KEPe72LOypnP6QYac=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6511-6520:6511-6520,9UQo-e7OkcsXT2wFWTAhOuWOsUs=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6521-6560:6521-6560,qVNw6JIpR1q6PIXST7J4LJ9n3Zg=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6561-6720:6561-6720,7OiJvCbiwvpzPITW-hQohY5NHuc=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6721-6880:6721-6880,oBS3svhoi9deIa0sWZ_gnD0ujj8=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6881-7040:6881-7040,a0r8Xit4VvH39xgyQHZTPczKBIE=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_7041-7200:7041-7163,q538LChutGknBw55s6kcE2wTcvU=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024,8026-8045,8048-8049,8051-8134,8136-8152,8155-8160,j6XXAEWnjYk9tVVLBSdQvIEq2Wg=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8161-8320:8161-8215,8217-8222,8224-8320,YaNfiqdQOt-uLCLWVLj46AZpAjQ=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8321-8480:8321-8391,8393-8399,8402,8404-8419,8421-8425,8427,8431-8433,8435-8439,8441-8443,8445-8446,8448-8480,ALj31GQMwGiIeU3bM2ZYKITfU-U=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8481-8640:8481-8500,8502-8508,8510-8511,8513-8517,8519-8525,8527-8531,8533,8536-8539,8541-8576,8578-8638,8640,TlQYRmS_kZ5PBAUIUyNQDq0Jprs=\n" + "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689,8691-8731,8733-8786,x1Qf7hdNrO8b6yym03ZzNydDS1o=\n"; + + bool re_key = false; + bool reset = false; + int next_update = -1; + std::vector<SBChunkDelete> deletes; + std::vector<ChunkUrl> urls; + const std::string key("58Lqn5WIP961x3zuLGo5Uw=="); + EXPECT_TRUE(parser.ParseUpdate(update.data(), + static_cast<int>(update.size()), key, + &next_update, &re_key, + &reset, &deletes, &urls)); + EXPECT_FALSE(re_key); + EXPECT_EQ(next_update, 1895); +} + +TEST(SafeBrowsingProtocolParsingTest, TestVerifyChunkMac) { + // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until + // this bug is fixed. + if (win_util::GetWinVersion() <= win_util::WINVERSION_2000) + return; + + SafeBrowsingProtocolParser parser; + + const unsigned char chunk[] = { + 0x73, 0x3a, 0x32, 0x30, 0x30, 0x32, 0x3a, 0x34, + 0x3a, 0x32, 0x32, 0x0a, 0x2f, 0x4f, 0x89, 0x7a, + 0x01, 0x00, 0x00, 0x0a, 0x59, 0xc8, 0x71, 0xdf, + 0x9d, 0x29, 0x0c, 0xba, 0xd7, 0x00, 0x00, 0x00, + 0x0a, 0x59 + }; + + bool re_key = false; + std::deque<SBChunk> chunks; + const std::string key("v_aDSz6jI92WeHCOoZ07QA=="); + const std::string mac("W9Xp2fUcQ9V66If6Cvsrstpa4Kk="); + + EXPECT_TRUE(parser.ParseChunk(reinterpret_cast<const char*>(chunk), + sizeof(chunk), key, mac, + &re_key, &chunks)); + EXPECT_FALSE(re_key); + + safe_browsing_util::FreeChunks(&chunks); +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc new file mode 100644 index 0000000..ec6f671 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc @@ -0,0 +1,361 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Implementation of the SafeBrowsingBlockingPage class. + +#include "chrome/browser/safe_browsing/safe_browsing_blocking_page.h" + +#include "chrome/app/locales/locale_settings.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/browser_resources.h" +#include "chrome/browser/dom_operation_notification_details.h" +#include "chrome/browser/google_util.h" +#include "chrome/browser/navigation_controller.h" +#include "chrome/browser/navigation_entry.h" +#include "chrome/browser/tab_util.h" +#include "chrome/browser/web_contents.h" +#include "chrome/common/jstemplate_builder.h" +#include "chrome/common/l10n_util.h" +#include "chrome/common/resource_bundle.h" +#include "generated_resources.h" +#include "net/base/escape.h" + + +// For malware interstitial pages, we link the problematic URL to Google's +// diagnostic page. +// TODO(paulg): Change 'googleclient' to a proper client name before launch. +static const char* const kSbDiagnosticUrl = + "http://safebrowsing.clients.google.com/safebrowsing/diagnostic?site=%s&client=googleclient"; + +static const char* const kSbReportPhishingUrl = + "http://www.google.com/safebrowsing/report_error/"; + +static const wchar_t* const kSbDiagnosticHtml = + L"<a href=\"\" onClick=\"sendCommand(4); return false;\">%s</a>"; + +// Created on the io_thread. +SafeBrowsingBlockingPage::SafeBrowsingBlockingPage( + SafeBrowsingService* sb_service, + SafeBrowsingService::Client* client, + int render_process_host_id, + int render_view_id, + const GURL& url, + ResourceType::Type resource_type, + SafeBrowsingService::UrlCheckResult result) + : sb_service_(sb_service), + client_(client), + render_process_host_id_(render_process_host_id), + render_view_id_(render_view_id), + url_(url), + result_(result), + proceed_(false), + tab_(NULL), + controller_(NULL), + delete_pending_(false), + is_main_frame_(resource_type == ResourceType::MAIN_FRAME), + created_temporary_entry_(false) { +} + +// Deleted on the io_thread. +SafeBrowsingBlockingPage::~SafeBrowsingBlockingPage() { +} + +void SafeBrowsingBlockingPage::DisplayBlockingPage() { + TabContents* tab = tab_util::GetTabContentsByID(render_process_host_id_, + render_view_id_); + if (!tab || tab->type() != TAB_CONTENTS_WEB) { + NotifyDone(); + return; + } + + tab_ = tab; + controller_ = tab->controller(); + + // Register for notifications of events from this tab. + NotificationService* ns = NotificationService::current(); + DCHECK(ns); + ns->AddObserver(this, NOTIFY_TAB_CLOSING, + Source<NavigationController>(controller_)); + ns->AddObserver(this, NOTIFY_DOM_OPERATION_RESPONSE, + Source<TabContents>(tab_)); + + // Hold an extra reference to ourself until the interstitial is gone. + AddRef(); + + WebContents* web_contents = tab->AsWebContents(); + + // Load the HTML page and create the template components. + DictionaryValue strings; + ResourceBundle& rb = ResourceBundle::GetSharedInstance(); + std::string html; + + if (result_ == SafeBrowsingService::URL_MALWARE) { + std::wstring link = StringPrintf(kSbDiagnosticHtml, + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DIAGNOSTIC_PAGE).c_str()); + + strings.SetString(L"badURL", UTF8ToWide(url_.host())); + strings.SetString(L"title", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_TITLE)); + strings.SetString(L"headLine", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_HEADLINE)); + + // Check to see if we're blocking the main page, or a sub-resource on the + // main page. + GURL top_url = tab_->GetURL(); + if (top_url == url_) { + strings.SetString(L"description1", + l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION1, + UTF8ToWide(url_.host()))); + strings.SetString(L"description2", + l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION2, + link, + UTF8ToWide(url_.host()))); + } else { + strings.SetString(L"description1", + l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION4, + UTF8ToWide(top_url.host()), + UTF8ToWide(url_.host()))); + strings.SetString(L"description2", + l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION5, + link, + UTF8ToWide(url_.host()))); + } + + strings.SetString(L"description3", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION3)); + strings.SetString(L"confirm_text", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION_AGREE)); + strings.SetString(L"continue_button", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_PROCEED_BUTTON)); + strings.SetString(L"back_button", + l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_BACK_BUTTON)); + strings.SetString(L"textdirection", + (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT) ? + L"rtl" : L"ltr"); + html = rb.GetDataResource(IDR_SAFE_BROWSING_MALWARE_BLOCK); + } else { + strings.SetString(L"title", + l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_TITLE)); + strings.SetString(L"headLine", + l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_HEADLINE)); + strings.SetString(L"description1", + l10n_util::GetStringF(IDS_SAFE_BROWSING_PHISHING_DESCRIPTION1, + UTF8ToWide(url_.host()))); + strings.SetString(L"description2", + l10n_util::GetStringF(IDS_SAFE_BROWSING_PHISHING_DESCRIPTION2, + UTF8ToWide(url_.host()))); + + strings.SetString(L"continue_button", + l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_PROCEED_BUTTON)); + strings.SetString(L"back_button", + l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_BACK_BUTTON)); + strings.SetString(L"report_error", + l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_REPORT_ERROR)); + strings.SetString(L"textdirection", + (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT) ? + L"rtl" : L"ltr"); + html = rb.GetDataResource(IDR_SAFE_BROWSING_PHISHING_BLOCK); + } + + std::string html_page(jstemplate_builder::GetTemplateHtml(html, + &strings, + "template_root")); + + // If the malware is the actual main frame and we have no pending entry + // (typically the navigation was initiated by the page), we create a fake + // navigation entry (so the location bar shows the page's URL). + if (is_main_frame_ && tab_->controller()->GetPendingEntryIndex() == -1) { + // New navigation. + NavigationEntry* nav_entry = new NavigationEntry(TAB_CONTENTS_WEB); + + // We set the page ID to max page id so to ensure the controller considers + // this dummy entry a new one. Because we'll remove the entry when the + // interstitial is going away, it will not conflict with any future + // navigations. + nav_entry->SetPageID(tab_->GetMaxPageID() + 1); + nav_entry->SetPageType(NavigationEntry::INTERSTITIAL_PAGE); + nav_entry->SetURL(url_); + tab_->controller()->DidNavigateToEntry(nav_entry); + created_temporary_entry_ = true; + } + + // Show the interstitial page. + web_contents->ShowInterstitialPage(html_page, this); +} + +void SafeBrowsingBlockingPage::Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details) { + switch (type) { + case NOTIFY_TAB_CLOSING: + HandleClose(); + break; + case NOTIFY_DOM_OPERATION_RESPONSE: + Continue(Details<DomOperationNotificationDetails>(details)->json()); + break; + default: + NOTREACHED(); + } +} + +void SafeBrowsingBlockingPage::InterstitialClosed() { + HandleClose(); +} + +bool SafeBrowsingBlockingPage::GoBack() { + WebContents* web_contents = tab_->AsWebContents(); + NavigationEntry* prev_entry = + web_contents->controller()->GetEntryAtOffset(-1); + + if (!prev_entry) { + // Nothing to go to, default to about:blank. Navigating will cause the + // interstitial to hide which will trigger "this" to be deleted. + tab_->controller()->LoadURL(GURL("about:blank"), + PageTransition::AUTO_BOOKMARK); + } else if (prev_entry->GetType() != TAB_CONTENTS_WEB || + prev_entry->restored() || + !is_main_frame_) { + // We do navigate back if any of these is true: + // - the page is not a WebContents, its TabContents might have to be + // recreated. + // - we have not yet visited that navigation entry (typically session + // restore), in which case the page is not already available. + // - the interstitial was triggered by a sub-resource. In that case we + // really need to navigate, just hiding the interstitial would show the + // page containing the bad resource, and we don't want that. + web_contents->controller()->GoBack(); + } else { + // Otherwise, the user was viewing a page and navigated to a URL that was + // interrupted by an interstitial. Thus, we can just hide the interstitial + // and show the page the user was on before. + web_contents->HideInterstitialPage(false, false); + } + + // WARNING: at this point we are now either deleted or pending deletion from + // the IO thread. + + // Remove the navigation entry for the malware page. Note that we always + // remove the entry even if we did not create it as it has been flagged as + // malware and we don't want the user navigating back to it. + web_contents->controller()->RemoveLastEntry(); + + return true; +} + +void SafeBrowsingBlockingPage::Continue(const std::string& user_action) { + TabContents* tab = tab_util::GetTabContentsByID(render_process_host_id_, + render_view_id_); + DCHECK(tab); + WebContents* web = tab->AsWebContents(); + if (user_action == "2") { + // User pressed "Learn more". + GURL url; + if (result_ == SafeBrowsingService::URL_MALWARE) { + url = GURL(l10n_util::GetString(IDS_LEARN_MORE_MALWARE_URL)); + } else if (result_ == SafeBrowsingService::URL_PHISHING) { + url = GURL(l10n_util::GetString(IDS_LEARN_MORE_PHISHING_URL)); + } else { + NOTREACHED(); + } + web->OpenURL(url, CURRENT_TAB, PageTransition::LINK); + return; + } + if (user_action == "3") { + // User pressed "Report error" for a phishing site. + // Note that we cannot just put a link in the interstitial at this point. + // It is not OK to navigate in the context of an interstitial page. + DCHECK(result_ == SafeBrowsingService::URL_PHISHING); + GURL report_url = + safe_browsing_util::GeneratePhishingReportUrl(kSbReportPhishingUrl, + url_.spec()); + web->OpenURL(report_url, CURRENT_TAB, PageTransition::LINK); + return; + } + if (user_action == "4") { + // We're going to take the user to Google's SafeBrowsing diagnostic page. + std::string diagnostic = + StringPrintf(kSbDiagnosticUrl, + EscapeQueryParamValue(url_.spec()).c_str()); + GURL diagnostic_url(diagnostic); + diagnostic_url = google_util::AppendGoogleLocaleParam(diagnostic_url); + DCHECK(result_ == SafeBrowsingService::URL_MALWARE); + web->OpenURL(diagnostic_url, CURRENT_TAB, PageTransition::LINK); + return; + } + + proceed_ = user_action == "1"; + + if (proceed_) { + // We are continuing, if we have created a temporary navigation entry, + // delete it as a new will be created on navigation. + if (created_temporary_entry_) + web->controller()->RemoveLastEntry(); + if (is_main_frame_) + web->HideInterstitialPage(true, true); + else + web->HideInterstitialPage(false, false); + } else { + GoBack(); + } + + NotifyDone(); +} + +void SafeBrowsingBlockingPage::HandleClose() { + NotificationService* ns = NotificationService::current(); + DCHECK(ns); + ns->RemoveObserver(this, NOTIFY_TAB_CLOSING, + Source<NavigationController>(controller_)); + ns->RemoveObserver(this, NOTIFY_DOM_OPERATION_RESPONSE, + Source<TabContents>(tab_)); + + NotifyDone(); + Release(); +} + +void SafeBrowsingBlockingPage::NotifyDone() { + if (delete_pending_) + return; + + delete_pending_ = true; + + if (tab_ && tab_->AsWebContents()) { + // Ensure the WebContents does not keep a pointer to us. + tab_->AsWebContents()->set_interstitial_delegate(NULL); + } + + Thread* io_thread = g_browser_process->io_thread(); + if (!io_thread) + return; + + io_thread->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + sb_service_, + &SafeBrowsingService::OnBlockingPageDone, + this, client_, proceed_)); +} diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page.h b/chrome/browser/safe_browsing/safe_browsing_blocking_page.h new file mode 100644 index 0000000..90dc03a --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page.h @@ -0,0 +1,135 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Classes for managing the SafeBrowsing interstitial pages. +// +// When a user is about to visit a page the SafeBrowsing system has deemed to +// be malicious, either as malware or a phishing page, we show an interstitial +// page with some options (go back, continue) to give the user a chance to avoid +// the harmful page. +// +// The SafeBrowsingBlockingPage is created by the SafeBrowsingService on the IO +// thread when we've determined that a page is malicious. The operation of the +// blocking page occurs on the UI thread, where it waits for the user to make a +// decision about what to do: either go back or continue on. +// +// The blocking page forwards the result of the user's choice back to the +// SafeBrowsingService so that we can cancel the request for the new page, or +// or allow it to continue. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__ + +#include "base/logging.h" +#include "chrome/browser/interstitial_page_delegate.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/common/notification_service.h" +#include "googleurl/src/gurl.h" + +class MessageLoop; +class TabContents; +class NavigationController; + +class SafeBrowsingBlockingPage + : public InterstitialPageDelegate, + public base::RefCountedThreadSafe<SafeBrowsingBlockingPage>, + public NotificationObserver { + public: + // Created and destroyed on the IO thread, operates on the UI thread. + SafeBrowsingBlockingPage(SafeBrowsingService* service, + SafeBrowsingService::Client* client, + int render_process_host_id, + int render_view_id, + const GURL& url, + ResourceType::Type resource_type, + SafeBrowsingService::UrlCheckResult result); + ~SafeBrowsingBlockingPage(); + + // Display the page to the user. This method runs on the UI thread. + void DisplayBlockingPage(); + + // NotificationObserver interface, runs on the UI thread. + virtual void Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details); + + const GURL& url() { return url_; } + int render_process_host_id() { return render_process_host_id_; } + int render_view_id() { return render_view_id_; } + SafeBrowsingService::UrlCheckResult result() { return result_; } + + // InterstitialPageDelegate methods: + virtual void InterstitialClosed(); + virtual bool GoBack(); + + private: + // Handle user action for blocking page navigation choices. + void Continue(const std::string& user_action); + + // Tell the SafeBrowsingService that the handling of the current page is done. + void HandleClose(); + void NotifyDone(); + + private: + // For reporting back user actions. + SafeBrowsingService* sb_service_; + SafeBrowsingService::Client* client_; + MessageLoop* report_loop_; + + // For determining which tab to block. + int render_process_host_id_; + int render_view_id_; + + GURL url_; + SafeBrowsingService::UrlCheckResult result_; + + // Inform the SafeBrowsingService whether we are continuing with this page + // load or going back to the previous page. + bool proceed_; + + // Stored for use in the notification service, and are only used for their + // pointer value, but not for calling methods on. This is done to allow us to + // unregister as observers after the tab has gone (is NULL). + TabContents* tab_; + NavigationController* controller_; + + // Used for cleaning up after ourself. + bool delete_pending_; + + // Whether the flagged resource is the main page (or a sub-resource is false). + bool is_main_frame_; + + // Whether we have created a temporary navigation entry as part of showing + // the blocking page. + bool created_temporary_entry_; + + DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingBlockingPage); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc new file mode 100644 index 0000000..9e7c223 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -0,0 +1,1283 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/safe_browsing/safe_browsing_database.h" + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/sha2.h" +#include "base/string_util.h" +#include "chrome/browser/safe_browsing/bloom_filter.h" +#include "chrome/browser/safe_browsing/chunk_range.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" + +// Database version. If this is different than what's stored on disk, the +// database is reset. +static const int kDatabaseVersion = 2; + +// Filename suffix for the bloom filter. +static const wchar_t kBloomFilterFile[] = L" Filter"; + +// Don't want to create too small of a bloom filter initially while we're +// downloading the data and then keep having to rebuild it. +static const int kBloomFilterMinSize = 250000; + +// How many bits to use per item. See the design doc for more information. +static const int kBloomFilterSizeRatio = 13; + +// The minimum number of reads/misses before we will consider rebuilding the +// bloom filter. This is needed because we don't want a few misses after +// starting the browser to skew the percentage. +// TODO(jabdelmalek): report to UMA how often we rebuild. +static const int kBloomFilterMinReadsToCheckFP = 200; + +// The percentage of hit rate in the bloom filter when we regenerate it. +static const double kBloomFilterMaxFPRate = 5.0; + +// When we awake from a low power state, we try to avoid doing expensive disk +// operations for a few minutes to let the system page itself in and settle +// down. +static const int kOnResumeHoldupMs = 5 * 60 * 1000; // 5 minutes. + +// When doing any database operations that can take a long time, we do it in +// small chunks up to this amount. Once this much time passes, we sleep for +// the same amount and continue. This avoids blocking the thread so that if +// we get a bloom filter hit, we don't block the network request. +static const int kMaxThreadHoldupMs = 100; + +// How long to wait after updating the database to write the bloom filter. +static const int kBloomFilterWriteDelayMs = (60 * 1000); + +// The maximum staleness for a cached entry. +static const int kMaxStalenessMinutes = 45; + +SafeBrowsingDatabase::SafeBrowsingDatabase() + : db_(NULL), + init_(false), + transaction_count_(0), + asynchronous_(true), + chunk_inserted_callback_(NULL), +#pragma warning(suppress: 4355) // can use this + bloom_read_factory_(this), +#pragma warning(suppress: 4355) // can use this + bloom_write_factory_(this), +#pragma warning(suppress: 4355) // can use this + process_factory_(this), +#pragma warning(suppress: 4355) // can use this + reset_factory_(this), +#pragma warning(suppress: 4355) // can use this + resume_factory_(this), + disk_delay_(kMaxThreadHoldupMs) { +} + +SafeBrowsingDatabase::~SafeBrowsingDatabase() { + Close(); +} + +bool SafeBrowsingDatabase::Init(const std::wstring& filename, + Callback0::Type* chunk_inserted_callback) { + DCHECK(!init_ && filename_.empty()); + + filename_ = filename; + if (!Open()) + return false; + + bool load_filter = false; + if (!DoesSqliteTableExist(db_, "hosts")) { + if (!CreateTables()) { + // Database could be corrupt, try starting from scratch. + if (!ResetDatabase()) + return false; + } + } else if (!CheckCompatibleVersion()) { + if (!ResetDatabase()) + return false; + } else { + load_filter = true; + } + + bloom_filter_filename_ = BloomFilterFilename(filename_); + + if (load_filter) { + LoadBloomFilter(); + } else { + bloom_filter_.reset( + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + } + + init_ = true; + chunk_inserted_callback_ = chunk_inserted_callback; + return true; +} + +bool SafeBrowsingDatabase::Open() { + if (sqlite3_open(WideToUTF8(filename_).c_str(), &db_) != SQLITE_OK) + return false; + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); + + statement_cache_.reset(new SqliteStatementCache(db_)); + bloom_filter_read_count_= 0; + bloom_filter_fp_count_ = 0; + bloom_filter_building_ = false; + + process_factory_.RevokeAll(); + bloom_read_factory_.RevokeAll(); + bloom_write_factory_.RevokeAll(); + + return true; +} + +bool SafeBrowsingDatabase::Close() { + if (!db_) + return true; + + process_factory_.RevokeAll(); + bloom_read_factory_.RevokeAll(); + bloom_write_factory_.RevokeAll(); + + if (!pending_add_del_.empty()) { + while (!pending_add_del_.empty()) + pending_add_del_.pop(); + + EndTransaction(); + } + + while (!pending_chunks_.empty()) { + std::deque<SBChunk>* chunks = pending_chunks_.front(); + safe_browsing_util::FreeChunks(chunks); + delete chunks; + pending_chunks_.pop(); + EndTransaction(); + } + + statement_cache_.reset(); // Must free statements before closing DB. + transaction_.reset(); + bool result = sqlite3_close(db_) == SQLITE_OK; + db_ = NULL; + return result; +} + +bool SafeBrowsingDatabase::CreateTables() { + SQLTransaction transaction(db_); + transaction.Begin(); + + // We use an autoincrement integer as the primary key to allow full table + // scans to be quick. Otherwise if we used host, then we'd have to jump + // all over the table when doing a full table scan to generate the bloom + // filter and that's an order of magnitude slower. By marking host as + // unique, an index is created automatically. + if (sqlite3_exec(db_, "CREATE TABLE hosts (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "host INTEGER UNIQUE," + "entries BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + if (sqlite3_exec(db_, "CREATE TABLE chunks (" + "list_id INTEGER," + "chunk_type INTEGER," + "chunk_id INTEGER," + "hostkeys TEXT)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + if (sqlite3_exec(db_, "CREATE TABLE list_names (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "name TEXT)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + sqlite3_exec(db_, "CREATE INDEX chunks_chunk_id ON chunks(chunk_id)", + NULL, NULL, NULL); + + std::string version = "PRAGMA user_version="; + version += StringPrintf("%d", kDatabaseVersion); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + if (statement->step() != SQLITE_DONE) + return false; + + transaction.Commit(); + return true; +} + +// The SafeBrowsing service assumes this operation is synchronous. +bool SafeBrowsingDatabase::ResetDatabase() { + hash_cache_.clear(); + + bool rv = Close(); + DCHECK(rv); + + if (!file_util::Delete(filename_, false)) { + NOTREACHED(); + return false; + } + + bloom_filter_.reset( + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + file_util::Delete(bloom_filter_filename_, false); + + if (!Open()) + return false; + + return CreateTables(); +} + +bool SafeBrowsingDatabase::CheckCompatibleVersion() { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "PRAGMA user_version"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int result = statement->step(); + if (result != SQLITE_ROW) + return false; + + return statement->column_int(0) == kDatabaseVersion; +} + +bool SafeBrowsingDatabase::ContainsUrl( + const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update) { + matching_list->clear(); + prefix_hits->clear(); + if (!init_) { + DCHECK(false); + return false; + } + + if (!url.is_valid()) + return false; + + std::vector<std::string> hosts, paths; + safe_browsing_util::GenerateHostsToCheck(url, &hosts); + safe_browsing_util::GeneratePathsToCheck(url, &paths); + if (hosts.size() == 0) + return false; + + // Per the spec, if there is at least 3 components, check both the most + // significant three components and the most significant two components. + // If only two components, check the most significant two components. + // If it's an IP address, use the entire IP address as the host. + SBPrefix host_key_2, host_key_3, host_key_ip; + if (url.HostIsIPAddress()) { + base::SHA256HashString(url.host() + "/", &host_key_ip, sizeof(SBPrefix)); + CheckUrl(url.host(), host_key_ip, paths, matching_list, prefix_hits); + } else { + base::SHA256HashString(hosts[0] + "/", &host_key_2, sizeof(SBPrefix)); + if (hosts.size() > 1) + base::SHA256HashString(hosts[1] + "/", &host_key_3, sizeof(SBPrefix)); + + for (size_t i = 0; i < hosts.size(); ++i) { + SBPrefix host_key = i == 0 ? host_key_2 : host_key_3; + CheckUrl(hosts[i], host_key, paths, matching_list, prefix_hits); + } + } + + if (!matching_list->empty() || !prefix_hits->empty()) { + GetCachedFullHashes(prefix_hits, full_hits, last_update); + return true; + } + + // Check if we're getting too many FPs in the bloom filter, in which case + // it's time to rebuild it. + bloom_filter_fp_count_++; + if (!bloom_filter_building_ && + bloom_filter_read_count_ > kBloomFilterMinReadsToCheckFP) { + double fp_rate = bloom_filter_fp_count_ * 100 / bloom_filter_read_count_; + if (fp_rate > kBloomFilterMaxFPRate) { + DeleteBloomFilter(); + MessageLoop::current()->PostTask(FROM_HERE, + bloom_read_factory_.NewRunnableMethod( + &SafeBrowsingDatabase::BuildBloomFilter)); + } + } + + return false; +} + +void SafeBrowsingDatabase::CheckUrl(const std::string& host, + SBPrefix host_key, + const std::vector<std::string>& paths, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits) { + // First see if there are any entries in the db for this host. + SBHostInfo info; + if (!ReadInfo(host_key, &info, NULL)) + return; // No hostkey found. This is definitely safe. + + std::vector<SBFullHash> prefixes; + prefixes.resize(paths.size()); + for (size_t i = 0; i < paths.size(); ++i) + base::SHA256HashString(host + paths[i], &prefixes[i], sizeof(SBFullHash)); + + std::vector<SBPrefix> hits; + int list_id = -1; + if (!info.Contains(prefixes, &list_id, &hits)) + return; + + if (list_id != -1) { + *matching_list = GetListName(list_id); + } else if (hits.empty()) { + prefix_hits->push_back(host_key); + } else { + for (size_t i = 0; i < hits.size(); ++i) + prefix_hits->push_back(hits[i]); + } +} + +bool SafeBrowsingDatabase::ReadInfo(int host_key, SBHostInfo* info, int* id) { + STATS_COUNTER(L"SB.HostSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT id, entries FROM hosts WHERE host=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, host_key); + int result = statement->step(); + if (result == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + + if (result == SQLITE_DONE) + return false; + + if (result != SQLITE_ROW) { + DLOG(ERROR) << "SafeBrowsingDatabase got " + "statement->step() != SQLITE_ROW for " + << host_key; + return false; + } + + if (id) + *id = statement->column_int(0); + + return info->Initialize(statement->column_blob(1), + statement->column_bytes(1)); +} + +void SafeBrowsingDatabase::WriteInfo(int host_key, + const SBHostInfo& info, + int id) { + SQLITE_UNIQUE_STATEMENT(statement1, *statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(host,entries)" + "VALUES (?,?)"); + + SQLITE_UNIQUE_STATEMENT(statement2, *statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(id,host,entries)" + "VALUES (?,?,?)"); + + SqliteCompiledStatement& statement = id == 0 ? statement1 : statement2; + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + int start_index = 0; + if (id != 0) { + statement->bind_int(start_index++, id); + STATS_COUNTER(L"SB.HostReplace", 1); + } else { + STATS_COUNTER(L"SB.HostInsert", 1); + } + + statement->bind_int(start_index++, host_key); + statement->bind_blob(start_index++, info.data(), info.size()); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } + AddHostToBloomFilter(host_key); +} + +void SafeBrowsingDatabase::DeleteInfo(int host_key) { + STATS_COUNTER(L"SB.HostDelete", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "DELETE FROM hosts WHERE host=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, host_key); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabase::StartThrottledWork() { + if (process_factory_.empty()) + RunThrottledWork(); +} + +void SafeBrowsingDatabase::RunThrottledWork() { + while (true) { + bool done = ProcessChunks(); + + if (done) + done = ProcessAddDel(); + + if (done) + break; + + if (asynchronous_) { + // For production code, we want to throttle by calling InvokeLater to + // continue the work after a delay. However for unit tests we depend on + // updates to happen synchronously. + MessageLoop::current()->PostDelayedTask(FROM_HERE, + process_factory_.NewRunnableMethod( + &SafeBrowsingDatabase::RunThrottledWork), disk_delay_); + break; + } else { + Sleep(kMaxThreadHoldupMs); + } + } +} + +void SafeBrowsingDatabase::InsertChunks(const std::string& list_name, + std::deque<SBChunk>* chunks) { + // We've going to be updating the bloom filter, so delete the on-disk + // serialization so that if the process crashes we'll generate a new one on + // startup, instead of reading a stale filter. + DeleteBloomFilter(); + + int list_id = GetListID(list_name); + std::deque<SBChunk>::iterator i = chunks->begin(); + for (; i != chunks->end(); ++i) { + SBChunk& chunk = (*i); + std::deque<SBChunkHost>::iterator j = chunk.hosts.begin(); + for (; j != chunk.hosts.end(); ++j) { + j->entry->set_list_id(list_id); + if (j->entry->IsAdd()) + j->entry->set_chunk_id(chunk.chunk_number); + } + } + + pending_chunks_.push(chunks); + + BeginTransaction(); + StartThrottledWork(); +} + +bool SafeBrowsingDatabase::ProcessChunks() { + if (pending_chunks_.empty()) + return true; + + while (!pending_chunks_.empty()) { + std::deque<SBChunk>* chunks = pending_chunks_.front(); + bool done = false; + // The entries in one chunk are all either adds or subs. + if (chunks->front().hosts.front().entry->IsAdd()) { + done = ProcessAddChunks(chunks); + } else { + done = ProcessSubChunks(chunks); + } + + if (!done) + return false; + + delete chunks; + pending_chunks_.pop(); + EndTransaction(); + } + + if (!bloom_filter_building_) { + if (asynchronous_) { + // When we're updating, there will usually be a bunch of pending_chunks_ + // to process, and we don't want to keep writing the bloom filter to disk + // 10 or 20 times unnecessarily. So schedule to write it in a minute, and + // if any new updates happen in the meantime, push that forward. + if (!bloom_write_factory_.empty()) + bloom_write_factory_.RevokeAll(); + + MessageLoop::current()->PostDelayedTask(FROM_HERE, + bloom_write_factory_.NewRunnableMethod( + &SafeBrowsingDatabase::WriteBloomFilter), + kBloomFilterWriteDelayMs); + } else { + WriteBloomFilter(); + } + } + + if (chunk_inserted_callback_) + chunk_inserted_callback_->Run(); + + return true; +} + +bool SafeBrowsingDatabase::ProcessAddChunks(std::deque<SBChunk>* chunks) { + Time before = Time::Now(); + while (!chunks->empty()) { + SBChunk& chunk = chunks->front(); + int list_id = chunk.hosts.front().entry->list_id(); + int chunk_id = chunk.chunk_number; + + // The server can give us a chunk that we already have because it's part of + // a range. Don't add it again. + if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) { + while (!chunk.hosts.empty()) { + // Read the existing record for this host, if it exists. + SBPrefix host = chunk.hosts.front().host; + SBEntry* entry = chunk.hosts.front().entry; + + UpdateInfo(host, entry, false); + + if (!add_chunk_modified_hosts_.empty()) + add_chunk_modified_hosts_.append(","); + + add_chunk_modified_hosts_.append(StringPrintf("%d", host)); + + entry->Destroy(); + chunk.hosts.pop_front(); + if (!chunk.hosts.empty() && + (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + return false; + } + } + + AddChunkInformation(list_id, ADD_CHUNK, chunk_id, + add_chunk_modified_hosts_); + add_chunk_modified_hosts_.clear(); + } + + chunks->pop_front(); + } + + return true; +} + +bool SafeBrowsingDatabase::ProcessSubChunks(std::deque<SBChunk>* chunks) { + Time before = Time::Now(); + while (!chunks->empty()) { + SBChunk& chunk = chunks->front(); + int list_id = chunk.hosts.front().entry->list_id(); + int chunk_id = chunk.chunk_number; + + if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) { + while (!chunk.hosts.empty()) { + SBPrefix host = chunk.hosts.front().host; + SBEntry* entry = chunk.hosts.front().entry; + UpdateInfo(host, entry, true); + + entry->Destroy(); + chunk.hosts.pop_front(); + if (!chunk.hosts.empty() && + (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + return false; + } + } + + AddChunkInformation(list_id, SUB_CHUNK, chunk_id, ""); + } + + chunks->pop_front(); + } + + return true; +} + +void SafeBrowsingDatabase::UpdateInfo(SBPrefix host_key, + SBEntry* entry, + bool persist) { + // If an existing record exists, and the new record is smaller, then reuse + // its entry to reduce database fragmentation. + int old_id = 0; + SBHostInfo info; + // If the bloom filter isn't there, then assume that the entry exists, + // otherwise test the bloom filter. + bool exists = !bloom_filter_.get() || bloom_filter_->Exists(host_key); + if (exists) + exists = ReadInfo(host_key, &info, &old_id); + int old_size = info.size(); + + if (entry->IsAdd()) { + info.AddPrefixes(entry); + } else { + ClearCachedHashes(entry); + info.RemovePrefixes(entry, persist); + } + + if (old_size == info.size()) { + // The entry didn't change, so no point writing it. + return; + } + + if (!info.size()) { + // Just delete the existing information instead of writing an empty one. + if (exists) + DeleteInfo(host_key); + return; + } + + if (info.size() > old_size) { + // New record is larger, so just add a new entry. + old_id = 0; + } + + WriteInfo(host_key, info, old_id); +} + +void SafeBrowsingDatabase::DeleteChunks( + std::vector<SBChunkDelete>* chunk_deletes) { + BeginTransaction(); + bool pending_add_del_were_empty = pending_add_del_.empty(); + + for (size_t i = 0; i < chunk_deletes->size(); ++i) { + const SBChunkDelete& chunk = (*chunk_deletes)[i]; + std::vector<int> chunk_numbers; + RangesToChunks(chunk.chunk_del, &chunk_numbers); + for (size_t del = 0; del < chunk_numbers.size(); ++del) { + if (chunk.is_sub_del) { + SubDel(chunk.list_name, chunk_numbers[del]); + } else { + AddDel(chunk.list_name, chunk_numbers[del]); + } + } + } + + if (pending_add_del_were_empty && !pending_add_del_.empty()) { + // Only start a transaction for pending AddDel work if we haven't started + // one already. + BeginTransaction(); + StartThrottledWork(); + } + + delete chunk_deletes; + EndTransaction(); +} + +void SafeBrowsingDatabase::AddDel(const std::string& list_name, + int add_chunk_id) { + STATS_COUNTER(L"SB.ChunkSelect", 1); + int list_id = GetListID(list_name); + // Find all the prefixes that came from the given add_chunk_id. + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT hostkeys FROM chunks WHERE " + "list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + std::string hostkeys_str; + statement->bind_int(0, list_id); + statement->bind_int(1, ADD_CHUNK); + statement->bind_int(2, add_chunk_id); + int rv = statement->step(); + if (rv != SQLITE_ROW || !statement->column_string(0, &hostkeys_str)) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + NOTREACHED(); + } + + return; + } + + AddDelWork work; + work.list_id = list_id; + work.add_chunk_id = add_chunk_id; + pending_add_del_.push(work); + SplitString(hostkeys_str, ',', &pending_add_del_.back().hostkeys); +} + +bool SafeBrowsingDatabase::ProcessAddDel() { + if (pending_add_del_.empty()) + return true; + + Time before = Time::Now(); + while (!pending_add_del_.empty()) { + AddDelWork& add_del_work = pending_add_del_.front(); + ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id); + std::vector<std::string>& hostkeys = add_del_work.hostkeys; + for (size_t i = 0; i < hostkeys.size(); ++i) { + SBPrefix host = atoi(hostkeys[i].c_str()); + // Doesn't matter if we use SUB_PREFIX or SUB_FULL_HASH since if there + // are no prefixes it's not used. + SBEntry* entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + entry->set_list_id(add_del_work.list_id); + entry->set_chunk_id(add_del_work.add_chunk_id); + UpdateInfo(host, entry, false); + entry->Destroy(); + if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + hostkeys.erase(hostkeys.begin(), hostkeys.begin() + i); + return false; + } + } + + RemoveChunkId(add_del_work.list_id, ADD_CHUNK, add_del_work.add_chunk_id); + pending_add_del_.pop(); + } + + EndTransaction(); + + return true; +} + +void SafeBrowsingDatabase::SubDel(const std::string& list_name, + int sub_chunk_id) { + RemoveChunkId(GetListID(list_name), SUB_CHUNK, sub_chunk_id); +} + +void SafeBrowsingDatabase::AddChunkInformation(int list_id, + ChunkType type, + int chunk_id, + const std::string& hostkeys) { + STATS_COUNTER(L"SB.ChunkInsert", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO chunks" + "(list_id,chunk_type,chunk_id,hostkeys)" + "VALUES (?,?,?,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + statement->bind_string(3, hostkeys); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabase::GetListsInfo(std::vector<SBListChunkRanges>* lists) { + lists->clear(); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT name,id FROM list_names"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + int list_id = statement->column_int(1); + lists->push_back(SBListChunkRanges(statement->column_string(0))); + GetChunkIds(list_id, ADD_CHUNK, &lists->back().adds); + GetChunkIds(list_id, SUB_CHUNK, &lists->back().subs); + } +} + +void SafeBrowsingDatabase::GetChunkIds(int list_id, + ChunkType type, + std::string* list) { + list->clear(); + STATS_COUNTER(L"SB.ChunkSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk_id FROM chunks WHERE list_id=? AND chunk_type=? " + "ORDER BY chunk_id"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + + std::vector<int> chunk_ids; + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + chunk_ids.push_back(statement->column_int(0)); + } + + std::vector<ChunkRange> ranges; + ChunksToRanges(chunk_ids, &ranges); + RangesToString(ranges, list); +} + +bool SafeBrowsingDatabase::ChunkExists(int list_id, + ChunkType type, + int chunk_id) { + STATS_COUNTER(L"SB.ChunkSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk_id FROM chunks WHERE" + " list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return rv == SQLITE_ROW; +} + +void SafeBrowsingDatabase::RemoveChunkId(int list_id, + ChunkType type, + int chunk_id) { + // Also remove the add chunk id from add_chunks + STATS_COUNTER(L"SB.ChunkDelete", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "DELETE FROM chunks WHERE list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +int SafeBrowsingDatabase::AddList(const std::string& name) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO list_names" + "(id,name)" + "VALUES (NULL,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_string(0, name); + int rv = statement->step(); + if (rv != SQLITE_DONE) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + NOTREACHED(); + } + + return 0; + } + + return static_cast<int>(sqlite3_last_insert_rowid(db_)); +} + +int SafeBrowsingDatabase::GetListID(const std::string& name) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT id FROM list_names WHERE name=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_string(0, name); + int result = statement->step(); + if (result == SQLITE_ROW) + return statement->column_int(0); + + if (result == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + // There isn't an existing entry so add one. + return AddList(name); +} + +std::string SafeBrowsingDatabase::GetListName(int id) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT name FROM list_names WHERE id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_int(0, id); + int result = statement->step(); + if (result != SQLITE_ROW) { + if (result == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return std::string(); + } + + return statement->column_string(0); +} + +std::wstring SafeBrowsingDatabase::BloomFilterFilename( + const std::wstring& db_filename) { + return db_filename + kBloomFilterFile; +} + +void SafeBrowsingDatabase::LoadBloomFilter() { + DCHECK(!bloom_filter_filename_.empty()); + + int64 size_64; + if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || + size_64 == 0) { + BuildBloomFilter(); + return; + } + + int size = static_cast<int>(size_64); + char* data = new char[size]; + CHECK(data); + + Time before = Time::Now(); + file_util::ReadFile(bloom_filter_filename_, data, size); + SB_DLOG(INFO) << "SafeBrowsingDatabase read bloom filter in " << + (Time::Now() - before).InMilliseconds() << " ms"; + + bloom_filter_.reset(new BloomFilter(data, size)); +} + +void SafeBrowsingDatabase::DeleteBloomFilter() { + file_util::Delete(bloom_filter_filename_, false); +} + +void SafeBrowsingDatabase::WriteBloomFilter() { + if (!bloom_filter_.get()) + return; + + Time before = Time::Now(); + file_util::WriteFile(bloom_filter_filename_, + bloom_filter_->data(), + bloom_filter_->size()); + SB_DLOG(INFO) << "SafeBrowsingDatabase wrote bloom filter in " << + (Time::Now() - before).InMilliseconds() << " ms"; +} + +void SafeBrowsingDatabase::AddHostToBloomFilter(int host_key) { + if (bloom_filter_building_) + bloom_filter_temp_hostkeys_.push_back(host_key); + // Even if we're rebuilding the bloom filter, we still need to update the + // current one since we also use it to decide whether to do certain database + // operations during update. + if (bloom_filter_.get()) + bloom_filter_->Insert(host_key); +} + +void SafeBrowsingDatabase::BuildBloomFilter() { + // A bloom filter needs the size at creation, however doing a select count(*) + // is too slow since sqlite would have to enumerate each entry to get the + // count. So instead we load all the hostkeys into memory, and then when + // we've read all of them and have the total count, we can create the bloom + // filter. + bloom_filter_temp_hostkeys_.reserve(kBloomFilterMinSize); + + bloom_filter_building_ = true; + bloom_filter_rebuild_time_ = Time::Now(); + + BeginTransaction(); + + OnReadHostKeys(0); +} + +void SafeBrowsingDatabase::OnReadHostKeys(int start_id) { + // Since reading all the keys in one go could take > 20 seconds, instead we + // read them in small chunks. + STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT host,id FROM hosts WHERE id > ? ORDER BY id"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, start_id); + Time before = Time::Now(); + int count = 0; + + int next_id = start_id + 1; + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + + count++; + bloom_filter_temp_hostkeys_.push_back(statement->column_int(0)); + next_id = statement->column_int(1) + 1; + if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + if (asynchronous_) { + break; + } else { + Sleep(kMaxThreadHoldupMs); + } + } + } + + TimeDelta chunk_time = Time::Now() - before; + int time_ms = static_cast<int>(chunk_time.InMilliseconds()); + SB_DLOG(INFO) << "SafeBrowsingDatabase read " << count << " hostkeys in " << + time_ms << " ms"; + + if (!count || !asynchronous_) { + OnDoneReadingHostKeys(); + return; + } + + // To avoid hammering the disk and disrupting other parts of Chrome that use + // the disk, we throttle the rebuilding. + MessageLoop::current()->PostDelayedTask(FROM_HERE, + bloom_read_factory_.NewRunnableMethod( + &SafeBrowsingDatabase::OnReadHostKeys, next_id), + disk_delay_); +} + +void SafeBrowsingDatabase::OnDoneReadingHostKeys() { + EndTransaction(); + Time before = Time::Now(); + int number_of_keys = std::max(kBloomFilterMinSize, + static_cast<int>(bloom_filter_temp_hostkeys_.size())); + int filter_size = number_of_keys * kBloomFilterSizeRatio; + BloomFilter* filter = new BloomFilter(filter_size); + for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i) + filter->Insert(bloom_filter_temp_hostkeys_[i]); + + bloom_filter_.reset(filter); + + TimeDelta bloom_gen = Time::Now() - before; + TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_; + SB_DLOG(INFO) << "SafeBrowsingDatabase built bloom filter in " << + delta.InMilliseconds() << " ms total (" << bloom_gen.InMilliseconds() + << " ms to generate bloom filter). hostkey count: " << + bloom_filter_temp_hostkeys_.size(); + + WriteBloomFilter(); + bloom_filter_building_ = false; + bloom_filter_temp_hostkeys_.clear(); + bloom_filter_read_count_ = 0; + bloom_filter_fp_count_ = 0; +} + +bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { + if (!bloom_filter_.get()) + return true; + + bloom_filter_read_count_++; + + std::vector<std::string> hosts; + safe_browsing_util::GenerateHostsToCheck(url, &hosts); + if (hosts.size() == 0) + return false; // Could be about:blank. + + SBPrefix host_key; + if (url.HostIsIPAddress()) { + base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; + } else { + base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; + + if (hosts.size() > 1) { + base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; + } + } + return false; +} + +void SafeBrowsingDatabase::BeginTransaction() { + transaction_count_++; + if (transaction_.get() == NULL) { + transaction_.reset(new SQLTransaction(db_)); + if (transaction_->Begin() != SQLITE_OK) { + DCHECK(false) << "Safe browsing database couldn't start transaction"; + transaction_.reset(); + } + } +} + +void SafeBrowsingDatabase::EndTransaction() { + if (--transaction_count_ == 0) { + if (transaction_.get() != NULL) { + STATS_COUNTER(L"SB.TransactionCommit", 1); + transaction_->Commit(); + transaction_.reset(); + } + } +} + +void SafeBrowsingDatabase::GetCachedFullHashes( + const std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update) { + DCHECK(prefix_hits && full_hits); + + Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes); + + for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); + it != prefix_hits->end(); ++it) { + HashCache::iterator hit = hash_cache_.find(*it); + if (hit != hash_cache_.end()) { + HashList& entries = hit->second; + HashList::iterator eit = entries.begin(); + while (eit != entries.end()) { + // An entry is valid if we've received an update in the past 45 minutes, + // or if this particular GetHash was received in the past 45 minutes. + if (max_age < last_update || eit->received > max_age) { + SBFullHashResult full_hash; + memcpy(&full_hash.hash.full_hash, + &eit->full_hash.full_hash, + sizeof(SBFullHash)); + full_hash.list_name = GetListName(eit->list_id); + full_hash.add_chunk_id = eit->add_chunk_id; + full_hits->push_back(full_hash); + ++eit; + } else { + // Evict the expired entry. + eit = entries.erase(eit); + } + } + + if (entries.empty()) + hash_cache_.erase(hit); + } + } +} + +void SafeBrowsingDatabase::CacheHashResults( + const std::vector<SBFullHashResult>& full_hits) { + const Time now = Time::Now(); + for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); + it != full_hits.end(); ++it) { + SBPrefix prefix; + memcpy(&prefix, &it->hash.full_hash, sizeof(prefix)); + HashList& entries = hash_cache_[prefix]; + HashCacheEntry entry; + entry.received = now; + entry.list_id = GetListID(it->list_name); + entry.add_chunk_id = it->add_chunk_id; + memcpy(&entry.full_hash, &it->hash.full_hash, sizeof(SBFullHash)); + entries.push_back(entry); + } +} + +void SafeBrowsingDatabase::ClearCachedHashes(const SBEntry* entry) { + for (int i = 0; i < entry->prefix_count(); ++i) { + SBPrefix prefix; + if (entry->type() == SBEntry::SUB_FULL_HASH) + memcpy(&prefix, &entry->FullHashAt(i), sizeof(SBPrefix)); + else + prefix = entry->PrefixAt(i); + + HashCache::iterator it = hash_cache_.find(prefix); + if (it != hash_cache_.end()) + hash_cache_.erase(it); + } +} + +// This clearing algorithm is a little inefficient, but we don't expect there to +// be too many entries for this to matter. Also, this runs as a background task +// during an update, so no user action is blocking on it. +void SafeBrowsingDatabase::ClearCachedHashesForChunk(int list_id, + int add_chunk_id) { + HashCache::iterator it = hash_cache_.begin(); + while (it != hash_cache_.end()) { + HashList& entries = it->second; + HashList::iterator eit = entries.begin(); + while (eit != entries.end()) { + if (eit->list_id == list_id && eit->add_chunk_id == add_chunk_id) + eit = entries.erase(eit); + else + ++eit; + } + if (entries.empty()) + it = hash_cache_.erase(it); + else + ++it; + } +} + +void SafeBrowsingDatabase::HandleCorruptDatabase() { + MessageLoop::current()->PostTask(FROM_HERE, + reset_factory_.NewRunnableMethod( + &SafeBrowsingDatabase::OnHandleCorruptDatabase)); +} + +void SafeBrowsingDatabase::OnHandleCorruptDatabase() { + ResetDatabase(); + DCHECK(false) << "SafeBrowsing database was corrupt and reset"; +} + +void SafeBrowsingDatabase::HandleResume() { + disk_delay_ = kOnResumeHoldupMs; + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + resume_factory_.NewRunnableMethod(&SafeBrowsingDatabase::OnResumeDone), + kOnResumeHoldupMs); +} + +void SafeBrowsingDatabase::OnResumeDone() { + disk_delay_ = kMaxThreadHoldupMs; +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h new file mode 100644 index 0000000..0dbe5a9 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -0,0 +1,322 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ + +#include <hash_map> +#include <list> +#include <queue> +#include <vector> + +#include "base/scoped_ptr.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" +#include "googleurl/src/gurl.h" + +class BloomFilter; + +// Encapsulates the database that stores information about phishing and malware +// sites. There is one on-disk database for all profiles, as it doesn't +// contain user-specific data. This object is not thread-safe, i.e. all its +// methods should be used on the same thread that it was created on, with the +// exception of NeedToCheckUrl. +class SafeBrowsingDatabase { + public: + SafeBrowsingDatabase(); + ~SafeBrowsingDatabase(); + + // Initializes the database with the given filename. The callback is + // executed after finishing a chunk. + bool Init(const std::wstring& filename, + Callback0::Type* chunk_inserted_callback); + + // Deletes the current database and creates a new one. + bool ResetDatabase(); + + // This function can be called on any thread to check if the given url may be + // in the database. If this function returns false, it is definitely not in + // the database and ContainsUrl doesn't need to be called. If it returns + // true, then the url might be in the database and ContainsUrl needs to be + // called. This function can only be called after Init succeeded. + bool NeedToCheckUrl(const GURL& url); + + // Returns false if the given url is not in the database. If it returns + // true, then either "list" is the name of the matching list, or prefix_hits + // contains the matching hash prefixes. + bool ContainsUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update); + + // Processes add/sub commands. Database will free the chunks when it's done. + void InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks); + + // Processs adddel/subdel commands. Database will free chunk_deletes when + // it's done. + void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes); + + // Returns the lists and their add/sub chunks. + void GetListsInfo(std::vector<SBListChunkRanges>* lists); + + // Call this to make all database operations synchronous. While useful for + // testing, this should never be called in chrome.exe because it can lead + // to blocking user requests. + void set_synchronous() { asynchronous_ = false; } + + // Store the results of a GetHash response. + void CacheHashResults(const std::vector<SBFullHashResult>& full_hits); + + // Called when the user's machine has resumed from a lower power state. + void HandleResume(); + + private: + friend class SafeBrowsing_HashCaching_Test; + + // Opens the database. + bool Open(); + + // Closes the database. + bool Close(); + + // Creates the SQL tables. + bool CreateTables(); + + // Checks the database version and if it's incompatible with the current one, + // resets the database. + bool CheckCompatibleVersion(); + + // Updates, or adds if new, a hostkey's record with the given add/sub entry. + // If this is a sub, removes the given prefixes, or all if prefixes is empty, + // from host_key's record. If persist is true, then if the add_chunk_id isn't + // found the entry will store this sub information for future reference. + // Otherwise the entry will not be modified if there are no matches. + void UpdateInfo(SBPrefix host, SBEntry* entry, bool persist); + + // Returns true if any of the given prefixes exist for the given host. + // Also returns the matching list or any prefix matches. + void CheckUrl(const std::string& host, + SBPrefix host_key, + const std::vector<std::string>& paths, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits); + + enum ChunkType { + ADD_CHUNK = 0, + SUB_CHUNK = 1, + }; + + // Adds information about the given chunk to the chunks table. + void AddChunkInformation(int list_id, + ChunkType type, + int chunk_id, + const std::string& hostkeys); // only used for add + + // Return a comma separated list of chunk ids that are in the database for + // the given list and chunk type. + void GetChunkIds(int list_id, ChunkType type, std::string* list); + + // Checks if a chunk is in the database. + bool ChunkExists(int list_id, ChunkType type, int chunk_id); + + // Removes the given id from our list of chunk ids. + void RemoveChunkId(int list_id, ChunkType type, int chunk_id); + + // Reads the host's information from the database. Returns true if it was + // found, or false otherwise. + bool ReadInfo(int host_key, SBHostInfo* info, int* id); + + // Writes the host's information to the database, overwriting any existing + // information for that host_key if it existed. + void WriteInfo(int host_key, const SBHostInfo& info, int id); + + // Deletes existing information for the given hostkey. + void DeleteInfo(int host_key); + + // Adds the given list to the database. Returns its row id. + int AddList(const std::string& name); + + // Given a list name, returns its internal id. If we haven't seen it before, + // an id is created and stored in the database. On error, returns 0. + int GetListID(const std::string& name); + + // Given a list id, returns its name. + std::string GetListName(int id); + + static std::wstring BloomFilterFilename(const std::wstring& db_filename); + + // Load the bloom filter off disk. Generates one if it can't find it. + void LoadBloomFilter(); + + // Deletes the on-disk bloom filter, i.e. because it's stale. + void DeleteBloomFilter(); + + // Writes the current bloom filter to disk. + void WriteBloomFilter(); + + // Adds the host to the bloom filter. + void AddHostToBloomFilter(int host_key); + + // Generate a bloom filter. + void BuildBloomFilter(); + + // Used when generating the bloom filter. Reads a small number of hostkeys + // starting at the given row id. + void OnReadHostKeys(int start_id); + + // Called when we finished reading all the hostkeys from the database during + // bloom filter generation. + void OnDoneReadingHostKeys(); + + void StartThrottledWork(); + void RunThrottledWork(); + + // Used when processing an add-del, add chunk and sub chunk commands in small + // batches so that the db thread is never blocked. They return true if + // complete, or false if there's still more work to do. + bool ProcessChunks(); + bool ProcessAddDel(); + + bool ProcessAddChunks(std::deque<SBChunk>* chunks); + bool ProcessSubChunks(std::deque<SBChunk>* chunks); + + void BeginTransaction(); + void EndTransaction(); + + // Processes an add-del command, which deletes all the prefixes that came + // from that add chunk id. + void AddDel(const std::string& list_name, int add_chunk_id); + + // Processes a sub-del command, which just removes the sub chunk id from + // our list. + void SubDel(const std::string& list_name, int sub_chunk_id); + + // Looks up any cached full hashes we may have. + void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update); + + // Remove cached entries that have prefixes contained in the entry. + void ClearCachedHashes(const SBEntry* entry); + + // Remove all GetHash entries that match the list and chunk id from an AddDel. + void ClearCachedHashesForChunk(int list_id, int add_chunk_id); + + void HandleCorruptDatabase(); + void OnHandleCorruptDatabase(); + + // Runs a small amount of time after the machine has resumed operation from + // a low power state. + void OnResumeDone(); + + // The database connection. + sqlite3* db_; + + // Cache of compiled statements for our database. + scoped_ptr<SqliteStatementCache> statement_cache_; + + int transaction_count_; + scoped_ptr<SQLTransaction> transaction_; + + // True iff the database has been opened successfully. + bool init_; + + std::wstring filename_; + + // Used by the bloom filter. + std::wstring bloom_filter_filename_; + scoped_ptr<BloomFilter> bloom_filter_; + int bloom_filter_read_count_; + int bloom_filter_fp_count_; + + // These are temp variables used when rebuilding the bloom filter. + bool bloom_filter_building_; + std::vector<int> bloom_filter_temp_hostkeys_; + int bloom_filter_last_hostkey_; + Time bloom_filter_rebuild_time_; + + // Used to store throttled work for commands that write to the database. + std::queue<std::deque<SBChunk>*> pending_chunks_; + + // Used during processing of an add chunk. + std::string add_chunk_modified_hosts_; + + struct AddDelWork { + int list_id; + int add_chunk_id; + std::vector<std::string> hostkeys; + }; + + std::queue<AddDelWork> pending_add_del_; + + // Controls whether database writes are done synchronously in one go or + // asynchronously in small chunks. + bool asynchronous_; + + // Called after an add/sub chunk is processed. + Callback0::Type* chunk_inserted_callback_; + + // Used to schedule small bits of work when writing to the database. + ScopedRunnableMethodFactory<SafeBrowsingDatabase> process_factory_; + + // Used to schedule reading the database to rebuild the bloom filter. + ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_read_factory_; + + // Used to schedule writing the bloom filter after an update. + ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_write_factory_; + + // Used to schedule resetting the database because of corruption. + ScopedRunnableMethodFactory<SafeBrowsingDatabase> reset_factory_; + + // Used to schedule resuming from a lower power state. + ScopedRunnableMethodFactory<SafeBrowsingDatabase> resume_factory_; + + // Used for caching GetHash results. + typedef struct HashCacheEntry { + SBFullHash full_hash; + int list_id; + int add_chunk_id; + Time received; + } HashCacheEntry; + + typedef std::list<HashCacheEntry> HashList; + typedef stdext::hash_map<SBPrefix, HashList> HashCache; + HashCache hash_cache_; + + // The amount of time, in milliseconds, to wait before the next disk write. + int disk_delay_; + + DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingDatabase); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc new file mode 100644 index 0000000..bab0225 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -0,0 +1,652 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Unit tests for the SafeBrowsing storage system (SafeBrowsingDatabase). + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/process_util.h" +#include "base/sha2.h" +#include "base/stats_counters.h" +#include "base/string_util.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/protocol_parser.h" +#include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + SBPrefix Sha256Prefix(const std::string& str) { + SBPrefix hash; + base::SHA256HashString(str, &hash, sizeof(hash)); + return hash; + } +} + +// Helper function to do an AddDel or SubDel command. +void DelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id, + bool is_sub_del) { + std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; + SBChunkDelete chunk_delete; + chunk_delete.list_name = list; + chunk_delete.is_sub_del = is_sub_del; + chunk_delete.chunk_del.push_back(ChunkRange(chunk_id)); + deletes->push_back(chunk_delete); + db->DeleteChunks(deletes); +} + +void AddDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) { + DelChunk(db, list, chunk_id, false); +} + +void SubDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) { + DelChunk(db, list, chunk_id, true); +} + +// Checks database reading/writing. +TEST(SafeBrowsing, Database) { + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(L"SafeBrowsingTestDatabase"); + DeleteFile(filename.c_str()); // In case it existed from a previous run. + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename, NULL)); + + // Add a simple chunk with one hostkey. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); + + SBChunk chunk; + chunk.chunk_number = 1; + chunk.hosts.push_back(host); + + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware", chunks); + + // Add another chunk with two different hostkeys. + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/notevil2.html")); + + chunk.chunk_number = 2; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + host.host = Sha256Prefix("www.good.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.good.com/good1.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.good.com/good2.html")); + + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + + database.InsertChunks("goog-malware", chunks); + + // and a chunk with an IP-based host + host.host = Sha256Prefix("192.168.0.1/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->SetPrefixAt(0, Sha256Prefix("192.168.0.1/malware.html")); + + chunk.chunk_number = 3; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware", chunks); + + + // Make sure they were added correctly. + std::vector<SBListChunkRanges> lists; + database.GetListsInfo(&lists); + EXPECT_EQ(lists.size(), 1); + EXPECT_EQ(lists[0].name, "goog-malware"); + EXPECT_EQ(lists[0].adds, "1-3"); + EXPECT_TRUE(lists[0].subs.empty()); + + const Time now = Time::Now(); + std::vector<SBFullHashResult> full_hashes; + std::vector<SBPrefix> prefix_hits; + std::string matching_list; + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); + EXPECT_EQ(prefix_hits.size(), 1); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://192.168.0.1/malware.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_EQ(prefix_hits.size(), 0); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/robots.txt"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + // Test removing a single prefix from the add chunk. + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); + host.entry->set_chunk_id(2); + host.entry->SetChunkIdAtPrefix(0, 2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); + + chunk.chunk_number = 4; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + + database.InsertChunks("goog-malware", chunks); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); + EXPECT_EQ(prefix_hits.size(), 1); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_EQ(prefix_hits.size(), 0); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + database.GetListsInfo(&lists); + EXPECT_EQ(lists.size(), 1); + EXPECT_EQ(lists[0].name, "goog-malware"); + EXPECT_EQ(lists[0].subs, "4"); + + // Test removing all the prefixes from an add chunk. + AddDelChunk(&database, "goog-malware", 2); + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + database.GetListsInfo(&lists); + EXPECT_EQ(lists.size(), 1); + EXPECT_EQ(lists[0].name, "goog-malware"); + EXPECT_EQ(lists[0].subs, "4"); + + // The adddel command exposed a bug in the transaction code where any + // transaction after it would fail. Add a dummy entry and remove it to + // make sure the transcation work fine. + host.host = Sha256Prefix("www.redherring.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.redherring.com/index.html")); + + chunk.chunk_number = 44; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware", chunks); + + // Now remove the dummy entry. If there are any problems with the + // transactions, asserts will fire. + AddDelChunk(&database, "goog-malware", 44); + + // Test the subdel command. + SubDelChunk(&database, "goog-malware", 4); + database.GetListsInfo(&lists); + EXPECT_EQ(lists.size(), 1); + EXPECT_EQ(lists[0].name, "goog-malware"); + EXPECT_EQ(lists[0].subs, ""); + + // Test a sub command coming in before the add. + host.host = Sha256Prefix("www.notevilanymore.com/"); + host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + host.entry->set_chunk_id(10); + + chunk.chunk_number = 5; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware", chunks); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + // Now insert the tardy add chunk. + host.host = Sha256Prefix("www.notevilanymore.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html")); + + chunk.chunk_number = 10; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + + chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware", chunks); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/good.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + DeleteFile(filename.c_str()); // Clean up. +} + +// Utility function for setting up the database for the caching test. +void PopulateDatabaseForCacheTest(SafeBrowsingDatabase* database) { + // Add a simple chunk with one hostkey and cache it. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); + + SBChunk chunk; + chunk.chunk_number = 1; + chunk.hosts.push_back(host); + + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database->InsertChunks("goog-malware-shavar", chunks); + + // Add the GetHash results to the cache. + SBFullHashResult full_hash; + base::SHA256HashString("www.evil.com/phishing.html", + &full_hash.hash, sizeof(SBFullHash)); + full_hash.list_name = "goog-malware-shavar"; + full_hash.add_chunk_id = 1; + + std::vector<SBFullHashResult> results; + results.push_back(full_hash); + + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + results.push_back(full_hash); + + database->CacheHashResults(results); +} + +TEST(SafeBrowsing, HashCaching) { + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(L"SafeBrowsingTestDatabase"); + DeleteFile(filename.c_str()); // In case it existed from a previous run. + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename, NULL)); + + PopulateDatabaseForCacheTest(&database); + + // We should have both full hashes in the cache. + EXPECT_EQ(database.hash_cache_.size(), 2); + + // Test the cache lookup for the first prefix. + std::string list; + std::vector<SBPrefix> prefixes; + std::vector<SBFullHashResult> full_hashes; + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + + SBFullHashResult full_hash; + base::SHA256HashString("www.evil.com/phishing.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test the cache lookup for the second prefix. + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test removing a prefix via a sub chunk. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetChunkIdAtPrefix(0, 1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); + + SBChunk chunk; + chunk.chunk_number = 2; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware-shavar", chunks); + + // This prefix should still be there. + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // This prefix should be gone. + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test that an AddDel for the original chunk removes the last cached entry. + AddDelChunk(&database, "goog-malware-shavar", 1); + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 0); + EXPECT_EQ(database.hash_cache_.size(), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test that the cache won't return expired values. First we have to adjust + // the cached entries' received time to make them older, since the database + // cache insert uses Time::Now(). First, store some entries. + PopulateDatabaseForCacheTest(&database); + EXPECT_EQ(database.hash_cache_.size(), 2); + + // Now adjust one of the entries times to be in the past. + Time expired = Time::Now() - TimeDelta::FromMinutes(60); + SBPrefix key; + memcpy(&key, &full_hash.hash, sizeof(SBPrefix)); + SafeBrowsingDatabase::HashList& entries = database.hash_cache_[key]; + SafeBrowsingDatabase::HashCacheEntry entry = entries.front(); + entries.pop_front(); + entry.received = expired; + entries.push_back(entry); + + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, expired); + EXPECT_EQ(full_hashes.size(), 0); + + // Expired entry was dumped. + EXPECT_EQ(database.hash_cache_.size(), 1); + + // This entry should still exist. + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, expired); + EXPECT_EQ(full_hashes.size(), 1); +} + +void PrintStat(const wchar_t* name) { + int value = StatsTable::current()->GetCounterValue(name); + std::wstring out = StringPrintf(L"%s %d\r\n", name, value); + OutputDebugStringW(out.c_str()); +} + +std::wstring GetFullSBDataPath(const std::wstring& path) { + std::wstring full_path; + CHECK(PathService::Get(base::DIR_SOURCE_ROOT, &full_path)); + file_util::AppendToPath(&full_path, L"chrome\\test\\data\\safe_browsing"); + file_util::AppendToPath(&full_path, path); + CHECK(file_util::PathExists(full_path)); + return full_path; +} + +struct ChunksInfo { + std::deque<SBChunk>* chunks; + std::string listname; +}; + +void PeformUpdate(const std::wstring& initial_db, + const std::vector<ChunksInfo>& chunks, + std::vector<SBChunkDelete>* deletes) { + IoCounters before, after; + + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(L"SafeBrowsingTestDatabase"); + DeleteFile(filename.c_str()); // In case it existed from a previous run. + + if (!initial_db.empty()) { + std::wstring full_initial_db = GetFullSBDataPath(initial_db); + ASSERT_TRUE(file_util::CopyFile(full_initial_db, filename)); + } + + SafeBrowsingDatabase database; + database.set_synchronous(); + EXPECT_TRUE(database.Init(filename, NULL)); + + Time before_time = Time::Now(); + ProcessHandle handle = Process::Current().handle(); + scoped_ptr<process_util::ProcessMetrics> metric( + process_util::ProcessMetrics::CreateProcessMetrics(handle)); + CHECK(metric->GetIOCounters(&before)); + + database.DeleteChunks(deletes); + + for (size_t i = 0; i < chunks.size(); ++i) + database.InsertChunks(chunks[i].listname, chunks[i].chunks); + + CHECK(metric->GetIOCounters(&after)); + + OutputDebugStringA(StringPrintf("I/O Read Bytes: %d\r\n", + after.ReadTransferCount - before.ReadTransferCount).c_str()); + OutputDebugStringA(StringPrintf("I/O Write Bytes: %d\r\n", + after.WriteTransferCount - before.WriteTransferCount).c_str()); + OutputDebugStringA(StringPrintf("I/O Reads: %d\r\n", + after.ReadOperationCount - before.ReadOperationCount).c_str()); + OutputDebugStringA(StringPrintf("I/O Writes: %d\r\n", + after.WriteOperationCount - before.WriteOperationCount).c_str()); + OutputDebugStringA(StringPrintf("Finished in %d ms\r\n", + (Time::Now() - before_time).InMilliseconds()).c_str()); + + PrintStat(L"c:SB.HostSelect"); + PrintStat(L"c:SB.HostSelectForBloomFilter"); + PrintStat(L"c:SB.HostReplace"); + PrintStat(L"c:SB.HostInsert"); + PrintStat(L"c:SB.HostDelete"); + PrintStat(L"c:SB.ChunkSelect"); + PrintStat(L"c:SB.ChunkInsert"); + PrintStat(L"c:SB.ChunkDelete"); + PrintStat(L"c:SB.TransactionCommit"); +} + +void UpdateDatabase(const std::wstring& initial_db, + const std::wstring& response_path, + const std::wstring& updates_path) { + + // First we read the chunks from disk, so that this isn't counted in IO bytes. + std::vector<ChunksInfo> chunks; + + SafeBrowsingProtocolParser parser; + if (!updates_path.empty()) { + std::wstring data_dir = GetFullSBDataPath(updates_path); + file_util::FileEnumerator file_enum( + data_dir, false, file_util::FileEnumerator::FILES); + while (true) { + std::wstring file = file_enum.Next(); + if (file.empty()) + break; + + int64 size64; + bool result = file_util::GetFileSize(file, &size64); + CHECK(result); + + int size = static_cast<int>(size64); + scoped_array<char> data(new char[size]); + file_util::ReadFile(file, data.get(), size); + + ChunksInfo info; + info.chunks = new std::deque<SBChunk>; + + bool re_key; + result = parser.ParseChunk(data.get(), size, "", "", &re_key, info.chunks); + CHECK(result); + + info.listname = WideToASCII(file_util::GetFilenameFromPath(file)); + size_t index = info.listname.find('_'); // Get rid fo the _s or _a. + info.listname.resize(index); + info.listname.erase(0, 3); // Get rid of the 000 etc. + + chunks.push_back(info); + } + } + + std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; + if (!response_path.empty()) { + std::string update; + std::wstring full_response_path = GetFullSBDataPath(response_path); + if (file_util::ReadFileToString(full_response_path, &update)) { + int next_update; + bool result, rekey, reset; + std::vector<ChunkUrl> urls; + result = parser.ParseUpdate(update.c_str(), + static_cast<int>(update.length()), + "", + &next_update, + &rekey, + &reset, + deletes, + &urls); + DCHECK(result); + if (!updates_path.empty()) + DCHECK(urls.size() == chunks.size()); + } + } + + PeformUpdate(initial_db, chunks, deletes); +} + +// Counts the IO needed for the initial update of a database. +// test\data\safe_browsing\download_update.py was used to fetch the add/sub +// chunks that are read, in order to get repeatable runs. +TEST(SafeBrowsing, DISABLED_DatabaseInitialIO) { + UpdateDatabase(L"", L"", L"initial"); +} + +// Counts the IO needed to update a month old database. +// The data files were generated by running "..\download_update.py postdata" +// in the "safe_browsing\old" directory. +TEST(SafeBrowsing, DISABLED_DatabaseOldIO) { + UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L"old\\updates"); +} + +// Like DatabaseOldIO but only the deletes. +TEST(SafeBrowsing, DISABLED_DatabaseOldDeletesIO) { + UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L""); +} + +// Like DatabaseOldIO but only the updates. +TEST(SafeBrowsing, DISABLED_DatabaseOldUpdatesIO) { + UpdateDatabase(L"old\\SafeBrowsing", L"", L"old\\updates"); +} + +// Does a a lot of addel's on very large chunks. +TEST(SafeBrowsing, DISABLED_DatabaseOldLotsofDeletesIO) { + std::vector<ChunksInfo> chunks; + std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; + SBChunkDelete del; + del.is_sub_del = false; + del.list_name = "goog-malware-shavar"; + del.chunk_del.push_back(ChunkRange(3539, 3579)); + deletes->push_back(del); + PeformUpdate(L"old\\SafeBrowsing", chunks, deletes); +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc new file mode 100644 index 0000000..f7e47f7 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -0,0 +1,567 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "chrome/browser/safe_browsing/safe_browsing_service.h" + +#include "base/histogram.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/profile_manager.h" +#include "chrome/browser/safe_browsing/protocol_manager.h" +#include "chrome/browser/safe_browsing/safe_browsing_blocking_page.h" +#include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include "chrome/common/chrome_constants.h" +#include "chrome/common/chrome_paths.h" +#include "chrome/common/pref_names.h" +#include "chrome/common/pref_service.h" +#include "net/base/registry_controlled_domain.h" + +SafeBrowsingService::SafeBrowsingService() + : io_loop_(NULL), + database_(NULL), + protocol_manager_(NULL), + enabled_(false), + resetting_(false) { +} + +SafeBrowsingService::~SafeBrowsingService() { +} + +// Only called on the UI thread. +void SafeBrowsingService::Initialize(MessageLoop* io_loop) { + io_loop_ = io_loop; + + // Get the profile's preference for SafeBrowsing. + std::wstring user_data_dir; + PathService::Get(chrome::DIR_USER_DATA, &user_data_dir); + ProfileManager* profile_manager = g_browser_process->profile_manager(); + Profile* profile = profile_manager->GetDefaultProfile(user_data_dir); + PrefService* pref_service = profile->GetPrefs(); + if (pref_service->GetBoolean(prefs::kSafeBrowsingEnabled)) + Start(); +} + +// Start up SafeBrowsing objects. This can be called at browser start, or when +// the user checks the "Enable SafeBrowsing" option in the Advanced options UI. +void SafeBrowsingService::Start() { + DCHECK(!db_thread_.get()); + db_thread_.reset(new Thread("Chrome_SafeBrowsingThread")); + if (!db_thread_->Start()) + return; + + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnDBInitialize)); + + // Retrieve client MAC keys. + PrefService* local_state = g_browser_process->local_state(); + std::string client_key, wrapped_key; + if (local_state) { + client_key = + WideToASCII(local_state->GetString(prefs::kSafeBrowsingClientKey)); + wrapped_key = + WideToASCII(local_state->GetString(prefs::kSafeBrowsingWrappedKey)); + } + + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnIOInitialize, MessageLoop::current(), + client_key, wrapped_key)); +} + +void SafeBrowsingService::ShutDown() { + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnIOShutdown)); +} + +void SafeBrowsingService::OnIOInitialize(MessageLoop* notify_loop, + const std::string& client_key, + const std::string& wrapped_key) { + DCHECK(MessageLoop::current() == io_loop_); + enabled_ = true; + protocol_manager_ = new SafeBrowsingProtocolManager(this, + notify_loop, + client_key, + wrapped_key); + protocol_manager_->Initialize(); +} + +void SafeBrowsingService::OnDBInitialize() { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + GetDatabase(); +} + +void SafeBrowsingService::OnIOShutdown() { + DCHECK(MessageLoop::current() == io_loop_); + if (!enabled_) + return; + + enabled_ = false; + + // This cancels all in-flight GetHash requests. + delete protocol_manager_; + + if (db_thread_.get()) + db_thread_->message_loop()->DeleteSoon(FROM_HERE, database_); + + // Flush the database thread. Any in-progress database check results will be + // ignored and cleaned up below. + db_thread_.reset(NULL); + + database_ = NULL; + + // Delete checks once the database thread is done, calling back any clients + // with 'URL_SAFE'. + for (CurrentChecks::iterator it = checks_.begin(); + it != checks_.end(); ++it) { + if ((*it)->client) + (*it)->client->OnUrlCheckResult((*it)->url, URL_SAFE); + delete *it; + } + checks_.clear(); + + gethash_requests_.clear(); +} + +// Runs on the UI thread. +void SafeBrowsingService::OnEnable(bool enabled) { + if (enabled) + Start(); + else + ShutDown(); +} + +bool SafeBrowsingService::CanCheckUrl(const GURL& url) const { + return url.SchemeIs("http") || url.SchemeIs("https"); +} + +bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) { + DCHECK(MessageLoop::current() == io_loop_); + + if (!enabled_ || !database_) + return true; + + if (!resetting_) { + Time start_time = Time::Now(); + bool need_check = database_->NeedToCheckUrl(url); + UMA_HISTOGRAM_TIMES(L"SB.BloomFilter", Time::Now() - start_time); + if (!need_check) + return true; // The url is definitely safe. + } + + // The url may or may not be safe, need to go to the database to be sure. + SafeBrowsingCheck* check = new SafeBrowsingCheck(); + check->url = url; + check->client = client; + check->result = URL_SAFE; + check->need_get_hash = false; + check->start = Time::Now(); + checks_.insert(check); + + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::CheckDatabase, + check, protocol_manager_->last_update())); + return false; +} + +void SafeBrowsingService::DisplayBlockingPage(const GURL& url, + ResourceType::Type resource_type, + UrlCheckResult result, + Client* client, + MessageLoop* ui_loop, + int render_process_host_id, + int render_view_id) { + // Check if the user has already ignored our warning for this render_view + // and domain. + for (size_t i = 0; i < white_listed_entries_.size(); ++i) { + const WhiteListedEntry& entry = white_listed_entries_[i]; + if (entry.render_process_host_id == render_process_host_id && + entry.render_view_id == render_view_id && + entry.result == result && + entry.domain == + RegistryControlledDomainService::GetDomainAndRegistry(url)) { + MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::NotifyClientBlockingComplete, + client, true)); + return; + } + } + + SafeBrowsingBlockingPage* blocking_page = new SafeBrowsingBlockingPage( + this, client, render_process_host_id, render_view_id, url, resource_type, + result); + blocking_page->AddRef(); + ui_loop->PostTask(FROM_HERE, NewRunnableMethod( + blocking_page, &SafeBrowsingBlockingPage::DisplayBlockingPage)); +} + +void SafeBrowsingService::CancelCheck(Client* client) { + DCHECK(MessageLoop::current() == io_loop_); + + for (CurrentChecks::iterator i = checks_.begin(); i != checks_.end(); ++i) { + if ((*i)->client == client) + (*i)->client = NULL; + } +} + +void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info, + Time last_update) { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + // If client == NULL it means it was cancelled, no need for db lookup. + if (info->client && GetDatabase()) { + Time now = Time::Now(); + std::string list; + if (GetDatabase()->ContainsUrl(info->url, + &list, + &info->prefix_hits, + &info->full_hits, + last_update)) { + if (info->prefix_hits.empty()) { + info->result = GetResultFromListname(list); + } else { + if (info->full_hits.empty()) + info->need_get_hash = true; + } + } + info->db_time = Time::Now() - now; + } + + if (io_loop_) + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnCheckDone, info)); +} + +void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) { + DCHECK(MessageLoop::current() == io_loop_); + + // If we've been shutdown during the database lookup, this check will already + // have been deleted (in OnIOShutdown). + if (!enabled_ || checks_.find(info) == checks_.end()) + return; + + UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - info->start); + if (info->client && info->need_get_hash) { + // We have a partial match so we need to query Google for the full hash. + // Clean up will happen in HandleGetHashResults. + + // See if we have a GetHash request already in progress for this particular + // prefix. If so, we just append ourselves to the list of interested parties + // when the results arrive. We only do this for checks involving one prefix, + // since that is the common case (multiple prefixes will issue the request + // as normal). + if (info->prefix_hits.size() == 1) { + SBPrefix prefix = info->prefix_hits[0]; + GetHashRequests::iterator it = gethash_requests_.find(prefix); + if (it != gethash_requests_.end()) { + // There's already a request in progress. + it->second.push_back(info); + return; + } + + // No request in progress, so we're the first for this prefix. + GetHashRequestors requestors; + requestors.push_back(info); + gethash_requests_[prefix] = requestors; + } + + // Reset the start time so that we can measure the network time without the + // database time. + info->start = Time::Now(); + protocol_manager_->GetFullHash(info, info->prefix_hits); + } else { + // We may have cached results for previous GetHash queries. + HandleOneCheck(info, info->full_hits); + } +} + +SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + if (database_) + return database_; + + std::wstring path; + bool result = PathService::Get(chrome::DIR_USER_DATA, &path); + DCHECK(result); + + path.append(L"\\"); + path.append(chrome::kSafeBrowsingFilename); + + Time before = Time::Now(); + SafeBrowsingDatabase* database = new SafeBrowsingDatabase(); + Callback0::Type* callback = + NewCallback(this, &SafeBrowsingService::ChunkInserted); + result = database->Init(path, callback); + if (!result) { + NOTREACHED(); + return NULL; + } + + database_ = database; + + TimeDelta open_time = Time::Now() - before; + SB_DLOG(INFO) << "SafeBrowsing database open took " << + open_time.InMilliseconds() << " ms."; + + return database_; +} + +// Public API called only on the IO thread. +// The SafeBrowsingProtocolManager has received the full hash results for +// prefix hits detected in the database. +void SafeBrowsingService::HandleGetHashResults( + SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes) { + if (checks_.find(check) == checks_.end()) + return; + + DCHECK(enabled_); + + UMA_HISTOGRAM_LONG_TIMES(L"SB.Network", Time::Now() - check->start); + OnHandleGetHashResults(check, full_hashes); // 'check' is deleted here. + + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::CacheHashResults, full_hashes)); +} + +void SafeBrowsingService::OnHandleGetHashResults( + SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes) { + SBPrefix prefix = check->prefix_hits[0]; + GetHashRequests::iterator it = gethash_requests_.find(prefix); + if (check->prefix_hits.size() > 1 || it == gethash_requests_.end()) { + HandleOneCheck(check, full_hashes); + return; + } + + // Call back all interested parties. + GetHashRequestors& requestors = it->second; + for (GetHashRequestors::iterator r = requestors.begin(); + r != requestors.end(); ++r) { + HandleOneCheck(*r, full_hashes); + } + + gethash_requests_.erase(it); +} + +void SafeBrowsingService::HandleOneCheck( + SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes) { + if (check->client) { + UrlCheckResult result = URL_SAFE; + int index = safe_browsing_util::CompareFullHashes(check->url, full_hashes); + if (index != -1) + result = GetResultFromListname(full_hashes[index].list_name); + + // Let the client continue handling the original request. + check->client->OnUrlCheckResult(check->url, result); + } + + checks_.erase(check); + delete check; +} + +void SafeBrowsingService::GetAllChunks() { + DCHECK(MessageLoop::current() == io_loop_); + DCHECK(enabled_); + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::GetAllChunksFromDatabase)); +} + +void SafeBrowsingService::OnBlockingPageDone(SafeBrowsingBlockingPage* page, + Client* client, + bool proceed) { + NotifyClientBlockingComplete(client, proceed); + + if (proceed) { + // Whitelist this domain and warning type for the given tab. + WhiteListedEntry entry; + entry.render_process_host_id = page->render_process_host_id(); + entry.render_view_id = page->render_view_id(); + entry.domain = + RegistryControlledDomainService::GetDomainAndRegistry(page->url()); + entry.result = page->result(); + white_listed_entries_.push_back(entry); + } + + page->Release(); +} + +void SafeBrowsingService::NotifyClientBlockingComplete(Client* client, + bool proceed) { + client->OnBlockingPageComplete(proceed); +} + +// This method runs on the UI loop to access the prefs. +void SafeBrowsingService::OnNewMacKeys(const std::string& client_key, + const std::string& wrapped_key) { + PrefService* prefs = g_browser_process->local_state(); + if (prefs) { + prefs->SetString(prefs::kSafeBrowsingClientKey, ASCIIToWide(client_key)); + prefs->SetString(prefs::kSafeBrowsingWrappedKey, ASCIIToWide(wrapped_key)); + } +} + +void SafeBrowsingService::ChunkInserted() { + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnChunkInserted)); +} + +void SafeBrowsingService::OnChunkInserted() { + DCHECK(MessageLoop::current() == io_loop_); + protocol_manager_->OnChunkInserted(); +} + +// static +void SafeBrowsingService::RegisterUserPrefs(PrefService* prefs) { + prefs->RegisterStringPref(prefs::kSafeBrowsingClientKey, L""); + prefs->RegisterStringPref(prefs::kSafeBrowsingWrappedKey, L""); +} + +void SafeBrowsingService::ResetDatabase() { + DCHECK(MessageLoop::current() == io_loop_); + resetting_ = true; + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnResetDatabase)); +} + +void SafeBrowsingService::OnResetDatabase() { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + GetDatabase()->ResetDatabase(); + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnResetComplete)); +} + +void SafeBrowsingService::OnResetComplete() { + DCHECK(MessageLoop::current() == io_loop_); + resetting_ = false; +} + +void SafeBrowsingService::HandleChunk(const std::string& list, + std::deque<SBChunk>* chunks) { + DCHECK(MessageLoop::current() == io_loop_); + DCHECK(enabled_); + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::HandleChunkForDatabase, list, chunks)); +} + +void SafeBrowsingService::HandleChunkForDatabase( + const std::string& list_name, + std::deque<SBChunk>* chunks) { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + + GetDatabase()->InsertChunks(list_name, chunks); +} + +void SafeBrowsingService::HandleChunkDelete( + std::vector<SBChunkDelete>* chunk_deletes) { + DCHECK(MessageLoop::current() == io_loop_); + DCHECK(enabled_); + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::DeleteChunks, chunk_deletes)); +} + +void SafeBrowsingService::DeleteChunks( + std::vector<SBChunkDelete>* chunk_deletes) { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + + GetDatabase()->DeleteChunks(chunk_deletes); +} + +// Database worker function. +void SafeBrowsingService::GetAllChunksFromDatabase() { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + bool database_error = false; + std::vector<SBListChunkRanges> lists; + if (GetDatabase()) { + GetDatabase()->GetListsInfo(&lists); + } else { + database_error = true; + } + + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnGetAllChunksFromDatabase, lists, + database_error)); +} + +// Called on the io thread with the results of all chunks. +void SafeBrowsingService::OnGetAllChunksFromDatabase( + const std::vector<SBListChunkRanges>& lists, bool database_error) { + DCHECK(MessageLoop::current() == io_loop_); + if (!enabled_) + return; + + protocol_manager_->OnGetChunksComplete(lists, database_error); +} + +SafeBrowsingService::UrlCheckResult SafeBrowsingService::GetResultFromListname( + const std::string& list_name) { + if (safe_browsing_util::IsPhishingList(list_name)) { + return URL_PHISHING; + } + + if (safe_browsing_util::IsMalwareList(list_name)) { + return URL_MALWARE; + } + + SB_DLOG(INFO) << "Unknown safe browsing list " << list_name; + return URL_SAFE; +} + +// static +void SafeBrowsingService::LogPauseDelay(TimeDelta time) { + UMA_HISTOGRAM_LONG_TIMES(L"SB.Delay", time); +} + +void SafeBrowsingService::CacheHashResults( + const std::vector<SBFullHashResult>& full_hashes) { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + GetDatabase()->CacheHashResults(full_hashes); +} + +void SafeBrowsingService::OnSuspend() { +} + +// Tell the SafeBrowsing database not to do expensive disk operations for a few +// minutes after waking up. It's quite likely that the act of resuming from a +// low power state will involve much disk activity, which we don't want to +// exacerbate. +void SafeBrowsingService::OnResume() { + DCHECK(MessageLoop::current() == io_loop_); + if (enabled_) { + db_thread_->message_loop()->PostTask(FROM_HERE, + NewRunnableMethod(this, &SafeBrowsingService::HandleResume)); + } +} + +void SafeBrowsingService::HandleResume() { + DCHECK(MessageLoop::current() == db_thread_->message_loop()); + GetDatabase()->HandleResume(); +}
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h new file mode 100644 index 0000000..77e87ec --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_service.h @@ -0,0 +1,279 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// The Safe Browsing service is responsible for downloading anti-phishing and +// anti-malware tables and checking urls against them. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__ + +#include <deque> +#include <hash_map> +#include <set> +#include <string> +#include <vector> + +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "base/thread.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" +#include "googleurl/src/gurl.h" +#include "webkit/glue/resource_type.h" + +class MessageLoop; +class PrefService; +class SafeBrowsingBlockingPage; +class SafeBrowsingDatabase; +class SafeBrowsingProtocolManager; + +// Construction needs to happen on the main thread. +class SafeBrowsingService + : public base::RefCountedThreadSafe<SafeBrowsingService> { + public: + // Users of this service implement this interface to be notified + // asynchronously of the result. + enum UrlCheckResult { + URL_SAFE, + URL_PHISHING, + URL_MALWARE, + }; + + class Client { + public: + virtual ~Client() {} + + // Called when the result of checking a URL is known. + virtual void OnUrlCheckResult(const GURL& url, UrlCheckResult result) = 0; + + // Called when the user has made a decision about how to handle the + // SafeBrowsing interstitial page. + virtual void OnBlockingPageComplete(bool proceed) = 0; + }; + + // Creates the safe browsing service. Need to initialize before using. + SafeBrowsingService(); + ~SafeBrowsingService(); + + // Initializes the service. io_loop is the message loop that the + // caller of this service (ResourceDispatcherHost) wants to be notified on + // for check results. db_loop is the message loop for the thread to do + // the database work. + void Initialize(MessageLoop* io_loop); + + // Called to initialize objects that are used on the io_thread. + void OnIOInitialize(MessageLoop* notify_loop, + const std::string& client_key, + const std::string& wrapped_key); + + // Called to initialize objects that are used on the db_thread. + void OnDBInitialize(); + + // Called to shutdown operations on the io_thread. + void OnIOShutdown(); + + // Called on the main thread to let us know that the io_thread is going away. + void ShutDown(); + + // Called on the IO thread. + + // Returns true if the url's scheme can be checked. + bool CanCheckUrl(const GURL& url) const; + + // Checks if the given url is safe or not. If we can synchronously determine + // that the url is safe, CheckUrl returns true. Otherwise it returns false, + // and "client" is called asynchronously with the result when it is ready. + bool CheckUrl(const GURL& url, Client* client); + + // Cancels a pending check if the result is no longer needed. + void CancelCheck(Client* client); + + // Displays an interstitial page. + void DisplayBlockingPage(const GURL& url, + ResourceType::Type resource_type, + UrlCheckResult result, + Client* client, + MessageLoop* ui_loop, + int render_process_host_id, + int render_view_id); + + // Bundle of SafeBrowsing state for one URL check. + // TODO(paulg): Make this struct private to SafeBrowsingService and maintain + // request mappings using CancelableRequests instead (which can + // store this state for us). + struct SafeBrowsingCheck { + GURL url; + Client* client; + bool need_get_hash; + Time start; // Time that check was sent to SB service. + TimeDelta db_time; // How long DB look-up took. + UrlCheckResult result; + std::vector<SBPrefix> prefix_hits; + std::vector<SBFullHashResult> full_hits; + }; + + // API used by the SafeBrowsingProtocolManager to interface with the + // SafeBrowsing storage system. + void HandleGetHashResults( + SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes); + void HandleChunk(const std::string& list, std::deque<SBChunk>* chunks); + void HandleChunkDelete(std::vector<SBChunkDelete>* chunk_deletes); + void GetAllChunks(); + + // The blocking page on the UI thread has completed. + void OnBlockingPageDone(SafeBrowsingBlockingPage* page, + Client* client, + bool proceed); + + // Called when the SafeBrowsingProtocolManager has received updated MAC keys. + void OnNewMacKeys(const std::string& client_key, + const std::string& wrapped_key); + + // Notification from the advanced options UI. + void OnEnable(bool enabled); + bool enabled() const { return enabled_; } + + // Called by the database (on the db thread) when a chunk insertion is + // complete. + void ChunkInserted(); + + // Preference handling. + static void RegisterUserPrefs(PrefService* prefs); + + // The SafeBrowsing system has instructed us to reset our database. + void ResetDatabase(); + + // Log the user perceived delay caused by SafeBrowsing. This delay is the time + // delta starting from when we would have started reading data from the + // network, and ending when the SafeBrowsing check completes indicating that + // the current page is 'safe'. + static void LogPauseDelay(TimeDelta time); + + // We defer SafeBrowsing work for a short duration when the computer comes + // out of a suspend state to avoid thrashing the disk. + void OnSuspend(); + void OnResume(); + + private: + // Should only be called on db thread as SafeBrowsingDatabase is not + // threadsafe. + SafeBrowsingDatabase* GetDatabase(); + + // Called on the database thread to check a url. + void CheckDatabase(SafeBrowsingCheck* info, Time last_update); + + // Called on the IO thread with the check result. + void OnCheckDone(SafeBrowsingCheck* info); + + // Called on the database thread to retrieve chunks. + void GetAllChunksFromDatabase(); + + // Called on the IOthread with the results of all chunks. + void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists, + bool database_error); + + // Called on the IO thread after the database reports that it added a chunk. + void OnChunkInserted(); + + // Called on the database thread to add/remove chunks and host keys. + // Callee will free the data when it's done. + void HandleChunkForDatabase(const std::string& list, + std::deque<SBChunk>* chunks); + void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes); + + static UrlCheckResult GetResultFromListname(const std::string& list_name); + + void NotifyClientBlockingComplete(Client* client, bool proceed); + + void Start(); + void Stop(); + + // Runs on the db thread to reset the database. We assume that resetting the + // database is a synchronous operation. + void OnResetDatabase(); + + // Runs on the io thread when the reset is complete. + void OnResetComplete(); + + // Store the results of a GetHash request. Runs on the database thread. + void CacheHashResults(const std::vector<SBFullHashResult>& full_hashes); + + // Internal worker function for processing full hashes. + void OnHandleGetHashResults(SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes); + + void HandleOneCheck(SafeBrowsingCheck* check, + const std::vector<SBFullHashResult>& full_hashes); + + // Runs on the database thread to inform the database we've resumed from a low + // power state. + void HandleResume(); + + MessageLoop* io_loop_; + + typedef std::set<SafeBrowsingCheck*> CurrentChecks; + CurrentChecks checks_; + + // Used for issuing only one GetHash request for a given prefix. + typedef std::vector<SafeBrowsingCheck*> GetHashRequestors; + typedef stdext::hash_map<SBPrefix, GetHashRequestors> GetHashRequests; + GetHashRequests gethash_requests_; + + // The sqlite database. We don't use a scoped_ptr because it needs to be + // destructed on a different thread than this object. + SafeBrowsingDatabase* database_; + + // Handles interaction with SafeBrowsing servers. + SafeBrowsingProtocolManager* protocol_manager_; + + // Used for whitelisting a render view when the user ignores our warning. + struct WhiteListedEntry { + int render_process_host_id; + int render_view_id; + std::string domain; + UrlCheckResult result; + }; + + std::vector<WhiteListedEntry> white_listed_entries_; + + // Whether the service is running. 'enabled_' is used by SafeBrowsingService + // on the IO thread during normal operations. + bool enabled_; + + // The SafeBrowsing thread that runs database operations. + scoped_ptr<Thread> db_thread_; + + // Indicates if we are in the process of resetting the database. + bool resetting_; + + DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingService); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__ diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc new file mode 100644 index 0000000..53339a1e --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_util.cc @@ -0,0 +1,630 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/safe_browsing/safe_browsing_util.h" + +#include "base/hmac.h" +#include "base/logging.h" +#include "base/sha2.h" +#include "base/string_util.h" +#include "chrome/browser/google_util.h" +#include "net/base/base64.h" +#include "net/base/escape.h" +#include "unicode/locid.h" + +static const int kSafeBrowsingMacDigestSize = 20; + +// Continue to this URL after submitting the phishing report form. +// TODO(paulg): Change to a Chrome specific URL. +static const char kContinueUrlFormat[] = + "http://www.google.com/tools/firefox/toolbar/FT2/intl/%s/submit_success.html"; + +static const char kReportParams[] = "?tpl=generic&continue=%s&url=%s"; + +namespace safe_browsing_util { + +void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) { + // Per Safe Browsing Protocol 2 spec, first we try the host. Then we try up + // to 4 hostnames starting with the last 5 components and successively + // removing the leading component. The TLD is skipped. + hosts->clear(); + int hostnames_checked = 0; + + std::string host = url.host(); + if (host.empty()) + return; + + const char* host_start = host.c_str(); + const char* index = host_start + host.size() - 1; + bool skipped_tld = false; + while (index != host_start && hostnames_checked < 4) { + if (*index == '.') { + if (!skipped_tld) { + skipped_tld = true; + } else { + const char* host_to_check = index + 1; + hosts->push_back(host_to_check); + hostnames_checked++; + } + } + + index--; + } + + // Check the full host too. + hosts->push_back(host.c_str()); +} + +// Per the Safe Browsing 2 spec, we try the exact path with/without the query +// parameters, and also the 4 paths formed by starting at the root and adding +// more path components. +void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) { + paths->clear(); + std::string path = url.path(); + if (path.empty()) + return; + + if (url.has_query()) + paths->push_back(path + "?" + url.query()); + + paths->push_back(path); + if (path == "/") + return; + + int path_components_checked = 0; + const char* path_start = path.c_str(); + const char* index = path_start; + const char* last_char = path_start + path.size() - 1; + while (*index && index != last_char && path_components_checked < 4) { + if (*index == '/') { + paths->push_back(std::string(path_start, index - path_start + 1)); + path_components_checked++; + } + + index++; + } +} + +int CompareFullHashes(const GURL& url, + const std::vector<SBFullHashResult>& full_hashes) { + if (full_hashes.empty()) + return -1; + + std::vector<std::string> hosts, paths; + GenerateHostsToCheck(url, &hosts); + GeneratePathsToCheck(url, &paths); + + for (size_t h = 0; h < hosts.size(); ++h) { + for (size_t p = 0; p < paths.size(); ++p) { + SBFullHash key; + base::SHA256HashString(hosts[h] + paths[p], + key.full_hash, + sizeof(SBFullHash)); + + for (size_t i = 0; i < full_hashes.size(); ++i) { + if (key == full_hashes[i].hash) + return static_cast<int>(i); + } + } + } + + return -1; +} + +bool IsPhishingList(const std::string& list_name) { + return list_name.find("-phish-") != std::string::npos; +} + +bool IsMalwareList(const std::string& list_name) { + return list_name.find("-malware-") != std::string::npos; +} + +static void DecodeWebSafe(std::string* decoded) { + DCHECK(decoded); + for (size_t i = 0; i < decoded->size(); ++i) { + switch ((*decoded)[i]) { + case '_': + (*decoded)[i] = '/'; + break; + case '-': + (*decoded)[i] = '+'; + break; + } + } +} + +bool VerifyMAC(const std::string& key, const std::string& mac, + const char* data, int data_length) { + std::string key_copy = key; + DecodeWebSafe(&key_copy); + std::string decoded_key; + Base64Decode(key_copy, &decoded_key); + + std::string mac_copy = mac; + DecodeWebSafe(&mac_copy); + std::string decoded_mac; + Base64Decode(mac_copy, &decoded_mac); + + HMAC hmac(HMAC::SHA1, + reinterpret_cast<const unsigned char*>(decoded_key.data()), + static_cast<int>(decoded_key.length())); + const std::string data_str(data, data_length); + unsigned char digest[kSafeBrowsingMacDigestSize]; + if (!hmac.Sign(data_str, digest, kSafeBrowsingMacDigestSize)) + return false; + + return memcmp(digest, decoded_mac.data(), kSafeBrowsingMacDigestSize) == 0; +} + +void FreeChunks(std::deque<SBChunk>* chunks) { + while (!chunks->empty()) { + while (!chunks->front().hosts.empty()) { + chunks->front().hosts.front().entry->Destroy(); + chunks->front().hosts.pop_front(); + } + chunks->pop_front(); + } +} + +GURL GeneratePhishingReportUrl(const std::string& report_page, + const std::string& url_to_report) { + Locale locale = Locale::getDefault(); + const char* lang = locale.getLanguage(); + if (!lang) + lang = "en"; // fallback + const std::string continue_esc = + EscapeQueryParamValue(StringPrintf(kContinueUrlFormat, lang)); + const std::string current_esc = EscapeQueryParamValue(url_to_report); + const std::string format = report_page + kReportParams; + GURL report_url(StringPrintf(format.c_str(), + continue_esc.c_str(), + current_esc.c_str())); + return google_util::AppendGoogleLocaleParam(report_url); +} + +} // namespace safe_browsing_util + +const int SBEntry::kMinSize = sizeof(SBEntry::Data); + +SBEntry* SBEntry::Create(Type type, int prefix_count) { + int size = Size(type, prefix_count); + SBEntry *rv = static_cast<SBEntry*>(malloc(size)); + memset(rv, 0, size); + rv->set_type(type); + rv->set_prefix_count(prefix_count); + return rv; +} + +void SBEntry::Destroy() { + free(this); +} + +bool SBEntry::IsValid() const { + switch (type()) { + case ADD_PREFIX: + case ADD_FULL_HASH: + case SUB_PREFIX: + case SUB_FULL_HASH: + return true; + default: + return false; + } +} + +int SBEntry::Size() const { + return Size(type(), prefix_count()); +} + +int SBEntry::Size(Type type, int prefix_count) { + return sizeof(Data) + prefix_count * PrefixSize(type); +} + +SBEntry* SBEntry::Enlarge(int extra_prefixes) { + int new_prefix_count = prefix_count() + extra_prefixes; + SBEntry* rv = SBEntry::Create(type(), new_prefix_count); + memcpy(rv, this, Size()); + rv->set_prefix_count(new_prefix_count); + Destroy(); + return rv; +} + +void SBEntry::RemovePrefix(int index) { + DCHECK(index < prefix_count()); + int bytes_to_copy = PrefixSize(type()) * (prefix_count() - index - 1); + void* to; + switch (type()) { + case ADD_PREFIX: + to = &add_prefixes_[index]; + break; + case ADD_FULL_HASH: + to = &add_full_hashes_[index]; + break; + case SUB_PREFIX: + to = &sub_prefixes_[index]; + break; + case SUB_FULL_HASH: + to = &sub_full_hashes_[index]; + break; + default: + NOTREACHED(); + } + + char* from = reinterpret_cast<char*>(to) + PrefixSize(type()); + memmove(to, from, bytes_to_copy); + set_prefix_count(prefix_count() - 1); +} + +bool SBEntry::PrefixesMatch( + int index, const SBEntry* that, int that_index) const { + // If they're of different hash sizes, or if they're both adds or subs, then + // they can't match. + if (HashLen() != that->HashLen() || IsAdd() == that->IsAdd()) + return false; + + if (ChunkIdAtPrefix(index) != that->ChunkIdAtPrefix(that_index)) + return false; + + if (HashLen() == sizeof(SBPrefix)) + return PrefixAt(index) == that->PrefixAt(that_index); + + return FullHashAt(index) == that->FullHashAt(that_index); +} + +bool SBEntry::AddPrefixMatches(int index, const SBFullHash& full_hash) const { + DCHECK(IsAdd()); + + if (HashLen() == sizeof(SBFullHash)) + return full_hash == add_full_hashes_[index]; + + SBPrefix prefix; + memcpy(&prefix, &full_hash, sizeof(SBPrefix)); + return prefix == add_prefixes_[index]; +} + +bool SBEntry::IsAdd() const { + return type() == ADD_PREFIX || type() == ADD_FULL_HASH; +} + +bool SBEntry::IsSub() const { + return type() == SUB_PREFIX || type() == SUB_FULL_HASH; +} + +int SBEntry::HashLen() const { + if (type() == ADD_PREFIX || type() == SUB_PREFIX) + return sizeof(SBPrefix); + + return sizeof(SBFullHash); +} + +int SBEntry::PrefixSize(Type type) { + switch (type) { + case ADD_PREFIX: + return sizeof(SBPrefix); + case ADD_FULL_HASH: + return sizeof(SBFullHash); + case SUB_PREFIX: + return sizeof(SBSubPrefix); + case SUB_FULL_HASH: + return sizeof(SBSubFullHash); + default: + NOTREACHED(); + return 0; + } +} + +int SBEntry::ChunkIdAtPrefix(int index) const { + if (type() == SUB_PREFIX) + return sub_prefixes_[index].add_chunk; + + if (type() == SUB_FULL_HASH) + return sub_full_hashes_[index].add_chunk; + + return chunk_id(); +} + +void SBEntry::SetChunkIdAtPrefix(int index, int chunk_id) { + DCHECK(IsSub()); + + if (type() == SUB_PREFIX) { + sub_prefixes_[index].add_chunk = chunk_id; + } else { + sub_full_hashes_[index].add_chunk = chunk_id; + } +} + +const SBPrefix& SBEntry::PrefixAt(int index) const { + DCHECK(HashLen() == sizeof(SBPrefix)); + + if (IsAdd()) + return add_prefixes_[index]; + + return sub_prefixes_[index].prefix; +} + +const SBFullHash& SBEntry::FullHashAt(int index) const { + DCHECK(HashLen() == sizeof(SBFullHash)); + + if (IsAdd()) + return add_full_hashes_[index]; + + return sub_full_hashes_[index].prefix; +} + +void SBEntry::SetPrefixAt(int index, const SBPrefix& prefix) { + DCHECK(HashLen() == sizeof(SBPrefix)); + + if (IsAdd()) { + add_prefixes_[index] = prefix; + } else { + sub_prefixes_[index].prefix = prefix; + } +} + +void SBEntry::SetFullHashAt(int index, const SBFullHash& full_hash) { + DCHECK(HashLen() == sizeof(SBFullHash)); + + if (IsAdd()) { + add_full_hashes_[index] = full_hash; + } else { + sub_full_hashes_[index].prefix = full_hash; + } +} + + + +SBHostInfo::SBHostInfo() : size_(0) { +} + +bool SBHostInfo::Initialize(const void* data, int size) { + size_ = size; + if (!size_) + return true; + + data_.reset(new char[size_]); + memcpy(data_.get(), data, size_); + if (!IsValid()) { + size_ = 0; + data_.reset(); + return false; + } + + return true; +} + +bool SBHostInfo::IsValid() { + const SBEntry* entry = NULL; + while (GetNextEntry(&entry)) { + if (!entry->IsValid()) + return false; + } + return true; +} + +void SBHostInfo::Add(const SBEntry* entry) { + int new_size = size_ + entry->Size(); + char* new_data = new char[new_size]; + memcpy(new_data, data_.get(), size_); + memcpy(new_data + size_, entry, entry->Size()); + data_.reset(new_data); + size_ = new_size; + DCHECK(IsValid()); +} + +void SBHostInfo::AddPrefixes(SBEntry* entry) { + DCHECK(entry->IsAdd()); + const SBEntry* sub_entry = NULL; + // Remove any prefixes for which a sub already came. + while (GetNextEntry(&sub_entry)) { + if (sub_entry->IsAdd() || entry->list_id() != sub_entry->list_id()) + continue; + + if (sub_entry->prefix_count() == 0) { + if (entry->chunk_id() != sub_entry->chunk_id()) + continue; + + // We don't want to add any of these prefixes so just return. Also no + // more need to store the sub chunk data around for this chunk_id so + // remove it. + RemoveSubEntry(entry->list_id(), entry->chunk_id()); + return; + } + + // Remove any matching prefixes. + for (int i = 0; i < sub_entry->prefix_count(); ++i) { + for (int j = 0; j < entry->prefix_count(); ++j) { + if (entry->PrefixesMatch(j, sub_entry, i)) + entry->RemovePrefix(j--); + } + } + + RemoveSubEntry(entry->list_id(), entry->chunk_id()); + break; + } + + Add(entry); + DCHECK(IsValid()); +} + +void SBHostInfo::RemoveSubEntry(int list_id, int chunk_id) { + scoped_array<char> new_data(new char[size_]); // preallocate new data + char* write_ptr = new_data.get(); + int new_size = 0; + const SBEntry* entry = NULL; + while (GetNextEntry(&entry)) { + if (entry->list_id() == list_id && + entry->chunk_id() == chunk_id && + entry->IsSub() && + entry->prefix_count() == 0) { + continue; + } + + SBEntry* new_sub_entry = const_cast<SBEntry*>(entry); + scoped_array<char> data; + if (entry->IsSub() && entry->list_id() == list_id && entry->prefix_count()) { + // Make a copy of the entry so that we can modify it. + data.reset(new char[entry->Size()]); + new_sub_entry = reinterpret_cast<SBEntry*>(data.get()); + memcpy(new_sub_entry, entry, entry->Size()); + int new_prefix_count = 0; + // Remove any matching prefixes. + for (int i = 0; i < new_sub_entry->prefix_count(); ++i) { + if (new_sub_entry->ChunkIdAtPrefix(i) == chunk_id) + new_sub_entry->RemovePrefix(i--); + } + + if (new_sub_entry->prefix_count() == 0) + continue; // We removed the last prefix in the entry, so remove it. + } + + memcpy(write_ptr, new_sub_entry, new_sub_entry->Size()); + new_size += new_sub_entry->Size(); + write_ptr += new_sub_entry->Size(); + } + + size_ = new_size; + data_.reset(new_data.release()); + DCHECK(IsValid()); +} + +void SBHostInfo::RemovePrefixes(SBEntry* sub_entry, bool persist) { + DCHECK(sub_entry->IsSub()); + scoped_array<char> new_data(new char[size_]); + char* write_ptr = new_data.get(); + int new_size = 0; + const SBEntry* add_entry = NULL; + // Remove any of the prefixes that are in the database. + while (GetNextEntry(&add_entry)) { + SBEntry* new_add_entry = const_cast<SBEntry*>(add_entry); + scoped_array<char> data; + if (add_entry->IsAdd() && add_entry->list_id() == sub_entry->list_id()) { + if (sub_entry->prefix_count() == 0 && + add_entry->chunk_id() == sub_entry->chunk_id()) { + // When prefixes are empty, that means we want to remove the entry for + // that host key completely. No need to add this sub chunk to the db. + persist = false; + continue; + } else if (sub_entry->prefix_count()) { + // Create another entry that doesn't have these prefixes. + data.reset(new char[add_entry->Size()]); + new_add_entry = reinterpret_cast<SBEntry*>(data.get()); + memcpy(new_add_entry, add_entry, add_entry->Size()); + + for (int i = 0; i < new_add_entry->prefix_count(); ++i) { + for (int j = 0; j < sub_entry->prefix_count(); ++j) { + if (!sub_entry->PrefixesMatch(j, new_add_entry, i)) + continue; + + new_add_entry->RemovePrefix(i--); + sub_entry->RemovePrefix(j--); + if (sub_entry->prefix_count() == 0) + persist = false; // Sub entry is all used up. + + break; + } + } + } + } + + // If we didn't modify the entry, then add it. Else if we modified it, + // then only add it if there are prefixes left. Otherwise, it it had n + // prefixes and now it has 0, if we were to add it that would mean all + // prefixes from that host are in the database. + if (new_add_entry == add_entry || new_add_entry->prefix_count()) { + memcpy(write_ptr, new_add_entry, new_add_entry->Size()); + new_size += new_add_entry->Size(); + write_ptr += new_add_entry->Size(); + } + } + + if (persist && new_size == size_) { + // We didn't find any matches because the sub came before the add, so save + // it for later. + Add(sub_entry); + return; + } + + size_ = new_size; + data_.reset(new_data.release()); + DCHECK(IsValid()); +} + +bool SBHostInfo::Contains(const std::vector<SBFullHash>& prefixes, + int* list_id, + std::vector<SBPrefix>* prefix_hits) { + prefix_hits->clear(); + *list_id = -1; + bool hits = false; + const SBEntry* add_entry = NULL; + while (GetNextEntry(&add_entry)) { + if (add_entry->IsSub()) + continue; + + if (add_entry->prefix_count() == 0) { + // This means all paths for this url are blacklisted. + return true; + } + + for (int i = 0; i < add_entry->prefix_count(); ++i) { + for (size_t j = 0; j < prefixes.size(); ++j) { + if (!add_entry->AddPrefixMatches(i, prefixes[j])) + continue; + + hits = true; + if (add_entry->HashLen() == sizeof(SBFullHash)) { + *list_id = add_entry->list_id(); + } else { + prefix_hits->push_back(add_entry->PrefixAt(i)); + } + } + } + } + + return hits; +} + +bool SBHostInfo::GetNextEntry(const SBEntry** entry) { + const char* current = reinterpret_cast<const char*>(*entry); + + // It is an error to call this function with a |*entry| outside of |data_|. + DCHECK(!current || current >= data_.get()); + DCHECK(!current || current + (*entry)->Size() <= data_.get() + size_); + + // Compute the address of the next entry. + const char* next = current ? current + (*entry)->Size() : data_.get(); + const SBEntry* next_entry = reinterpret_cast<const SBEntry*>(next); + + // Validate that the next entry is wholly contained inside of |data_|. + const char* end = data_.get() + size_; + if (next + SBEntry::kMinSize <= end && next + next_entry->Size() <= end) { + *entry = next_entry; + return true; + } + + return false; +} diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h new file mode 100644 index 0000000..b1b8151 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -0,0 +1,324 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Utilities for the SafeBrowsing code. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_UTIL_H__ +#define CHROME_BROWSER_SAFE_BROWSING_UTIL_H__ + +#include <deque> +#include <string> +#include <vector> + +#include "base/scoped_ptr.h" +#include "chrome/browser/safe_browsing/chunk_range.h" +#include "googleurl/src/gurl.h" + +//#define SB_LOGGING_ENABLED +#ifdef SB_LOGGING_ENABLED +#define SB_DLOG(severity) DLOG_IF(INFO, 1) +#else +#define SB_DLOG(severity) DLOG_IF(INFO, 0) +#endif + +// forward declaration +class SBEntry; + +// Widely used typedefs ------------------------------------------------------- + +// Container for holding a chunk URL and the MAC of the contents of the URL. +typedef struct { + std::string url; + std::string mac; +} ChunkUrl; + +// A truncated hash's type. +typedef int SBPrefix; + +// A full hash. +typedef struct { + char full_hash[32]; +} SBFullHash; + +inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { + return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; +} + +// Container for information about a specific host in an add/sub chunk. +struct SBChunkHost { + SBPrefix host; + SBEntry* entry; +}; + +// Container for an add/sub chunk. +struct SBChunk { + int chunk_number; + std::deque<SBChunkHost> hosts; +}; + +// Used when we get a gethash response. +struct SBFullHashResult { + SBFullHash hash; + std::string list_name; + int add_chunk_id; +}; + +// Contains information about a list in the database. +struct SBListChunkRanges { + std::string name; // The list name. + std::string adds; // The ranges for add chunks. + std::string subs; // The ranges for sub chunks. + + SBListChunkRanges(const std::string& n) : name(n) { } +}; + +// Container for deleting chunks from the database. +struct SBChunkDelete { + std::string list_name; + bool is_sub_del; + std::vector<ChunkRange> chunk_del; +}; + + +// Holds information about the prefixes for a hostkey. prefixes can either be +// 4 bytes (truncated hash) or 32 bytes (full hash). +// For adds: +// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] +// For subs: +// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] +// [add chunk][prefix][add chunk][prefix] +class SBEntry { + public: + enum Type { + ADD_PREFIX, // 4 byte add entry. + SUB_PREFIX, // 4 byte sub entry. + ADD_FULL_HASH, // 32 byte add entry. + SUB_FULL_HASH, // 32 byte sub entry. + }; + + // The minimum size of an SBEntry. + static const int kMinSize; + + // Creates a SBEntry with the necessary size for the given number of prefixes. + // Caller ownes the object and needs to free it by calling Destroy. + static SBEntry* Create(Type type, int prefix_count); + + // Frees the entry's memory. + void Destroy(); + + // Returns whether this entry is internally consistent. + bool IsValid() const; + + // Returns how many bytes this entry is. + int Size() const; + + // Helper to return how much memory a given Entry would require. + static int Size(Type type, int prefix_count); + + void set_list_id(int list_id) { data_.list_id = list_id; } + int list_id() const { return data_.list_id; } + void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } + int chunk_id() const { return data_.chunk_id; } + int prefix_count() const { return data_.prefix_count; } + Type type() const { return data_.type; } + + // Returns a new entry that is larger by the given number of prefixes, with + // all the existing data already copied over. The old entry is destroyed. + SBEntry* Enlarge(int extra_prefixes); + + // Removes the prefix at the given index. + void RemovePrefix(int index); + + // Returns true if the prefix/hash at the given index is equal to a + // prefix/hash at another entry's index. Works with all combinations of + // add/subs as long as they're the same size. Also checks chunk_ids. + bool PrefixesMatch(int index, const SBEntry* that, int that_index) const; + + // Returns true if the add prefix/hash at the given index is equal to the + // given full hash. + bool AddPrefixMatches(int index, const SBFullHash& full_hash) const; + + // Returns true if this is an add entry. + bool IsAdd() const; + + // Returns true if this is a sub entry. + bool IsSub() const; + + // Helper to return the size of the prefixes. + int HashLen() const; + + // Helper to return the size of each prefix entry (i.e. for subs this + // includes an add chunk id). + static int PrefixSize(Type type); + + // For add entries, returns the add chunk id. For sub entries, returns the + // add_chunk id for the prefix at the given index. + int ChunkIdAtPrefix(int index) const; + + // Used for sub chunks to set the chunk id at a given index. + void SetChunkIdAtPrefix(int index, int chunk_id); + + // Return the prefix/full hash at the given index. Caller is expected to + // call the right function based on the hash length. + const SBPrefix& PrefixAt(int index) const; + const SBFullHash& FullHashAt(int index) const; + + // Return the prefix/full hash at the given index. Caller is expected to + // call the right function based on the hash length. + void SetPrefixAt(int index, const SBPrefix& prefix); + void SetFullHashAt(int index, const SBFullHash& full_hash); + + private: + SBEntry(); + ~SBEntry(); + + void set_prefix_count(int count) { data_.prefix_count = count; } + void set_type(Type type) { data_.type = type; } + + // Container for a sub prefix. + struct SBSubPrefix { + int add_chunk; + SBPrefix prefix; + }; + + // Container for a sub full hash. + struct SBSubFullHash { + int add_chunk; + SBFullHash prefix; + }; + + // Keep the fixed data together in one struct so that we can get its size + // easily. If any of this is modified, the database will have to be cleared. + struct Data { + int list_id; + // For adds, this is the add chunk number. + // For subs: if prefix_count is 0 then this is the add chunk that this sub + // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes + // or sub_full_hashes is used for each corresponding prefix. + int chunk_id; + Type type; + int prefix_count; + }; + + // The prefixes union must follow the fixed data so that they're contiguous + // in memory. + Data data_; + union { + SBPrefix add_prefixes_[1]; + SBSubPrefix sub_prefixes_[1]; + SBFullHash add_full_hashes_[1]; + SBSubFullHash sub_full_hashes_[1]; + }; +}; + + +// Holds the hostkey specific information in the database. This is basically a +// collection of SBEntry objects. +class SBHostInfo { + public: + SBHostInfo(); + // By default, an empty SBHostInfo is created. Call this to deserialize from + // the database. Returns false if |data| is not internally consistent. + bool Initialize(const void* data, int size); + + // Adds the given prefixes to the unsafe list. Note that the prefixes array + // might be modified internally. + void AddPrefixes(SBEntry* entry); + + // Remove the given prefixes. If prefixes is empty, then all entries from + // sub.add_chunk_number are removed. Otherwise sub. add_chunk_id is ignored + // and the chunk_id from each element in sub.prefixes is checked. If persist + // is true and no matches are found, then the sub information will be stored + // and checked in case a future add comes in with that chunk_id. + void RemovePrefixes(SBEntry* entry, bool persist); + + // Returns true if the host entry contains any of the prefixes. If a full + // hash matched, then list_id contains the list id. Otherwise list_id is -1 + // and prefix_hits contains the matching prefixes if any are matched, or is + // empty if the entire host is blacklisted. + bool Contains(const std::vector<SBFullHash>& prefixes, + int* list_id, + std::vector<SBPrefix>* prefix_hits); + + // Used for serialization. + const void* data() const { return data_.get(); } + const int size() const { return size_; } + + private: + // Checks data_ for internal consistency. + bool IsValid(); + + // Allows enumeration of Entry structs. To start off, pass NULL for *entry, + // and then afterwards return the previous pointer. + bool GetNextEntry(const SBEntry** entry); + + void Add(const SBEntry* entry); + + void RemoveSubEntry(int list_id, int chunk_id); + + // Collection of SBEntry objects. + scoped_array<char> data_; + int size_; +}; + + +// Helper functions ----------------------------------------------------------- + +namespace safe_browsing_util { + +void FreeChunks(std::deque<SBChunk>* chunks); + +// Given a URL, returns all the hosts we need to check. They are returned +// in order of size (i.e. b.c is first, then a.b.c). +void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); + +// Given a URL, returns all the paths we need to check. +void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); + +// Given a URL, compare all the possible host + path full hashes to the set of +// provided full hashes. Returns the index of the match if one is found, or -1 +// otherwise. +int CompareFullHashes(const GURL& url, + const std::vector<SBFullHashResult>& full_hashes); + +bool IsPhishingList(const std::string& list_name); +bool IsMalwareList(const std::string& list_name); + +// Returns 'true' if 'mac' can be verified using 'key' and 'data'. +bool VerifyMAC(const std::string& key, + const std::string& mac, + const char* data, + int data_length); + +GURL GeneratePhishingReportUrl(const std::string& report_page, + const std::string& url_to_report); + +} // namespace safe_browsing_util + +#endif // CHROME_BROWSER_SAFE_BROWSING_UTIL_H__ diff --git a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc new file mode 100644 index 0000000..c67e554 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc @@ -0,0 +1,260 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "base/logging.h" +#include "base/sha2.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + bool VectorContains(const std::vector<std::string>& data, + const std::string& str) { + for (size_t i = 0; i < data.size(); ++i) { + if (data[i] == str) + return true; + } + + return false; + } + +SBFullHash CreateFullHash(SBPrefix prefix) { + SBFullHash result; + memset(&result, 0, sizeof(result)); + memcpy(&result, &prefix, sizeof(result)); + return result; +} +} + +// Tests that we generate the required host/path combinations for testing +// according to the Safe Browsing spec. +// See section 6.2 in +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. +TEST(SafeBrowsingUtilTest, UrlParsing) { + std::vector<std::string> hosts, paths; + + GURL url("http://a.b.c/1/2.html?param=1"); + safe_browsing_util::GenerateHostsToCheck(url, &hosts); + safe_browsing_util::GeneratePathsToCheck(url, &paths); + EXPECT_EQ(hosts.size(), 2); + EXPECT_EQ(paths.size(), 4); + EXPECT_EQ(hosts[0], "b.c"); + EXPECT_EQ(hosts[1], "a.b.c"); + + EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1")); + EXPECT_TRUE(VectorContains(paths, "/1/2.html")); + EXPECT_TRUE(VectorContains(paths, "/1/")); + EXPECT_TRUE(VectorContains(paths, "/")); + + url = GURL("http://a.b.c.d.e.f.g/1.html"); + safe_browsing_util::GenerateHostsToCheck(url, &hosts); + safe_browsing_util::GeneratePathsToCheck(url, &paths); + EXPECT_EQ(hosts.size(), 5); + EXPECT_EQ(paths.size(), 2); + EXPECT_EQ(hosts[0], "f.g"); + EXPECT_EQ(hosts[1], "e.f.g"); + EXPECT_EQ(hosts[2], "d.e.f.g"); + EXPECT_EQ(hosts[3], "c.d.e.f.g"); + EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g"); + EXPECT_TRUE(VectorContains(paths, "/1.html")); + EXPECT_TRUE(VectorContains(paths, "/")); + + url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/"); + safe_browsing_util::GeneratePathsToCheck(url, &paths); + EXPECT_EQ(paths.size(), 3); + EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/")); + EXPECT_TRUE(VectorContains(paths, "/saw-cgi/")); + EXPECT_TRUE(VectorContains(paths, "/")); +} + + +TEST(SafeBrowsingUtilTest, FullHashCompare) { + GURL url("http://www.evil.com/phish.html"); + SBFullHashResult full_hash; + base::SHA256HashString(url.host() + url.path(), + &full_hash.hash, + sizeof(SBFullHash)); + std::vector<SBFullHashResult> full_hashes; + full_hashes.push_back(full_hash); + + EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), 0); + + url = GURL("http://www.evil.com/okay_path.html"); + EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), -1); +} + +// Checks the reading/writing code of the database information for a hostkey. +TEST(SafeBrowsing, HostInfo) { + // Test a simple case of adding a prefix from scratch. + SBEntry* entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + entry->SetPrefixAt(0, 0x01000000); + entry->set_list_id(1); + entry->set_chunk_id(1); + + SBHostInfo info; + info.AddPrefixes(entry); + entry->Destroy(); + + int list_id; + std::vector<SBFullHash> full_hashes; + full_hashes.push_back(CreateFullHash(0x01000000)); + std::vector<SBPrefix> prefix_hits; + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Test appending prefixes to an existing entry. + entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + entry->SetPrefixAt(0, 0x02000000); + entry->SetPrefixAt(1, 0x02000001); + entry->set_list_id(1); + entry->set_chunk_id(2); + info.AddPrefixes(entry); + entry->Destroy(); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x01000000)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000000)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000001)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + + // Test removing the entire first entry. + entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + entry->set_list_id(1); + entry->set_chunk_id(1); + info.RemovePrefixes(entry, false); + entry->Destroy(); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x01000000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000000)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000001)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Test removing one prefix from the second entry. + entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); + entry->SetPrefixAt(0,0x02000000); + entry->SetChunkIdAtPrefix(0, 2); + entry->set_list_id(1); + info.RemovePrefixes(entry, false); + entry->Destroy(); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x02000001)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Test adding a sub that specifies a prefix before the add. + entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); + entry->SetPrefixAt(0, 0x1000); + entry->SetChunkIdAtPrefix(0, 100); + entry->set_list_id(1); + info.RemovePrefixes(entry, true); + entry->Destroy(); + + // Make sure we don't get a match from a sub. + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x1000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Now add the prefixes. + entry = SBEntry::Create(SBEntry::ADD_PREFIX, 3); + entry->SetPrefixAt(0, 0x10000); + entry->SetPrefixAt(1, 0x1000); + entry->SetPrefixAt(2, 0x100000); + entry->set_list_id(1); + entry->set_chunk_id(100); + info.AddPrefixes(entry); + entry->Destroy(); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x10000)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x1000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x100000)); + EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Now try adding a sub that deletes all prefixes from the chunk. + entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + entry->set_list_id(1); + entry->set_chunk_id(100); + info.RemovePrefixes(entry, true); + entry->Destroy(); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x10000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x100000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); + + // Add a sub for all prefixes before the add comes. + entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + entry->set_list_id(1); + entry->set_chunk_id(200); + info.RemovePrefixes(entry, true); + entry->Destroy(); + + // Now add the prefixes. + entry = SBEntry::Create(SBEntry::ADD_PREFIX, 3); + entry->SetPrefixAt(0, 0x2000); + entry->SetPrefixAt(1, 0x20000); + entry->SetPrefixAt(2, 0x200000); + entry->set_list_id(1); + entry->set_chunk_id(200); + info.AddPrefixes(entry); + entry->Destroy(); + + // None of the prefixes should be found. + full_hashes.clear(); + full_hashes.push_back(CreateFullHash(0x2000)); + full_hashes.push_back(CreateFullHash(0x20000)); + full_hashes.push_back(CreateFullHash(0x200000)); + EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits)); +} |