diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-08 19:10:05 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-08 19:10:05 +0000 |
commit | eeb0af285a61c37c0fbcb046174bf29741324a08 (patch) | |
tree | 2dc7dc0ca940cd7d86bd934626ed8123017e196e /chrome/browser/safe_browsing | |
parent | 11901ef2cd6ef17b08116d3dc19d03e9be87dd25 (diff) | |
download | chromium_src-eeb0af285a61c37c0fbcb046174bf29741324a08.zip chromium_src-eeb0af285a61c37c0fbcb046174bf29741324a08.tar.gz chromium_src-eeb0af285a61c37c0fbcb046174bf29741324a08.tar.bz2 |
safe_browsing::PrefixSet persistence code.
Just the ability to read/write the data structure to a file. File
format is a simple header, the contents of the vectors, and a
checksum.
BUG=71832
TEST=none
Review URL: http://codereview.chromium.org/6625002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@77314 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.cc | 147 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.h | 14 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set_unittest.cc | 299 |
3 files changed, 424 insertions, 36 deletions
diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc index 9ddd123..72ceb6f 100644 --- a/chrome/browser/safe_browsing/prefix_set.cc +++ b/chrome/browser/safe_browsing/prefix_set.cc @@ -7,11 +7,28 @@ #include <algorithm> #include <math.h> +#include "base/file_util.h" #include "base/logging.h" +#include "base/md5.h" #include "base/metrics/histogram.h" namespace { +// |kMagic| should be reasonably unique, and not match itself across +// endianness changes. I generated this value with: +// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 +static uint32 kMagic = 0x864088dd; + +// Current version the code writes out. +static uint32 kVersion = 0x1; + +typedef struct { + uint32 magic; + uint32 version; + uint32 index_size; + uint32 deltas_size; +} FileHeader; + // For |std::upper_bound()| to find a prefix w/in a vector of pairs. bool PrefixLess(const std::pair<SBPrefix,size_t>& a, const std::pair<SBPrefix,size_t>& b) { @@ -66,6 +83,13 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { } } +PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, + std::vector<uint16> *deltas) { + DCHECK(index && deltas); + index_.swap(*index); + deltas_.swap(*deltas); +} + PrefixSet::~PrefixSet() {} bool PrefixSet::Exists(SBPrefix prefix) const { @@ -117,4 +141,127 @@ void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) { } } +// static +PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) { + int64 size_64; + if (!file_util::GetFileSize(filter_name, &size_64)) + return NULL; + if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) + return NULL; + + file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb")); + if (!file.get()) + return NULL; + + FileHeader header; + size_t read = fread(&header, sizeof(header), 1, file.get()); + if (read != 1) + return NULL; + + if (header.magic != kMagic || header.version != kVersion) + return NULL; + + std::vector<std::pair<SBPrefix,size_t> > index; + const size_t index_bytes = sizeof(index[0]) * header.index_size; + + std::vector<uint16> deltas; + const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; + + // Check for bogus sizes before allocating any space. + const size_t expected_bytes = + sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); + if (static_cast<int64>(expected_bytes) != size_64) + return NULL; + + // The file looks valid, start building the digest. + MD5Context context; + MD5Init(&context); + MD5Update(&context, &header, sizeof(header)); + + // Read the index vector. Herb Sutter indicates that vectors are + // guaranteed to be contiuguous, so reading to where element 0 lives + // is valid. + index.resize(header.index_size); + read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); + if (read != index.size()) + return NULL; + MD5Update(&context, &(index[0]), index_bytes); + + // Read vector of deltas. + deltas.resize(header.deltas_size); + read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); + if (read != deltas.size()) + return NULL; + MD5Update(&context, &(deltas[0]), deltas_bytes); + + MD5Digest calculated_digest; + MD5Final(&calculated_digest, &context); + + MD5Digest file_digest; + read = fread(&file_digest, sizeof(file_digest), 1, file.get()); + if (read != 1) + return NULL; + + if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) + return NULL; + + // Steals contents of |index| and |deltas| via swap(). + return new PrefixSet(&index, &deltas); +} + +bool PrefixSet::WriteFile(const FilePath& filter_name) const { + FileHeader header; + header.magic = kMagic; + header.version = kVersion; + header.index_size = static_cast<uint32>(index_.size()); + header.deltas_size = static_cast<uint32>(deltas_.size()); + + // Sanity check that the 32-bit values never mess things up. + if (static_cast<size_t>(header.index_size) != index_.size() || + static_cast<size_t>(header.deltas_size) != deltas_.size()) { + NOTREACHED(); + return false; + } + + file_util::ScopedFILE file(file_util::OpenFile(filter_name, "wb")); + if (!file.get()) + return false; + + MD5Context context; + MD5Init(&context); + + // TODO(shess): The I/O code in safe_browsing_store_file.cc would + // sure be useful about now. + size_t written = fwrite(&header, sizeof(header), 1, file.get()); + if (written != 1) + return false; + MD5Update(&context, &header, sizeof(header)); + + // As for reads, the standard guarantees the ability to access the + // contents of the vector by a pointer to an element. + const size_t index_bytes = sizeof(index_[0]) * index_.size(); + written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(), file.get()); + if (written != index_.size()) + return false; + MD5Update(&context, &(index_[0]), index_bytes); + + const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size(); + written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), + file.get()); + if (written != deltas_.size()) + return false; + MD5Update(&context, &(deltas_[0]), deltas_bytes); + + MD5Digest digest; + MD5Final(&digest, &context); + written = fwrite(&digest, sizeof(digest), 1, file.get()); + if (written != 1) + return false; + + // TODO(shess): Can this code check that the close was successful? + file.reset(); + + return true; +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h index cb2cd9f..246beea 100644 --- a/chrome/browser/safe_browsing/prefix_set.h +++ b/chrome/browser/safe_browsing/prefix_set.h @@ -37,8 +37,7 @@ // 2^16 apart, which would need 512k (versus 256k to store the raw // data). // -// TODO(shess): Write serialization code. Something like this should -// work: +// The on-disk format looks like: // 4 byte magic number // 4 byte version number // 4 byte |index_.size()| @@ -55,6 +54,8 @@ #include "chrome/browser/safe_browsing/safe_browsing_util.h" +class FilePath; + namespace safe_browsing { class PrefixSet { @@ -65,6 +66,10 @@ class PrefixSet { // |true| if |prefix| was in |prefixes| passed to the constructor. bool Exists(SBPrefix prefix) const; + // Persist the set on disk. + static PrefixSet* LoadFile(const FilePath& filter_name); + bool WriteFile(const FilePath& filter_name) const; + // Regenerate the vector of prefixes passed to the constructor into // |prefixes|. Prefixes will be added in sorted order. void GetPrefixes(std::vector<SBPrefix>* prefixes); @@ -78,6 +83,11 @@ class PrefixSet { // for |Exists()| under control. static const size_t kMaxRun = 100; + // Helper for |LoadFile()|. Steals the contents of |index| and + // |deltas| using |swap()|. + PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, + std::vector<uint16> *deltas); + // Top-level index of prefix to offset in |deltas_|. Each pair // indicates a base prefix and where the deltas from that prefix // begin in |deltas_|. The deltas for a pair end at the next pair's diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc index f7be5da..1547117 100644 --- a/chrome/browser/safe_browsing/prefix_set_unittest.cc +++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc @@ -6,58 +6,161 @@ #include <algorithm> +#include "base/file_util.h" #include "base/logging.h" +#include "base/md5.h" #include "base/rand_util.h" +#include "base/scoped_ptr.h" +#include "base/scoped_temp_dir.h" #include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" namespace { -SBPrefix GenPrefix() { - return static_cast<SBPrefix>(base::RandUint64()); -} +class PrefixSetTest : public PlatformTest { + protected: + // Constants for the v1 format. + static const size_t kMagicOffset = 0 * sizeof(uint32); + static const size_t kVersionOffset = 1 * sizeof(uint32); + static const size_t kIndexSizeOffset = 2 * sizeof(uint32); + static const size_t kDeltasSizeOffset = 3 * sizeof(uint32); + static const size_t kPayloadOffset = 4 * sizeof(uint32); -// Test that a small sparse random input works. -TEST(PrefixSetTest, Baseline) { - std::vector<SBPrefix> prefixes; + // Generate a set of random prefixes to share between tests. For + // most tests this generation was a large fraction of the test time. + static void SetUpTestCase() { + for (size_t i = 0; i < 50000; ++i) { + const SBPrefix prefix = static_cast<SBPrefix>(base::RandUint64()); + shared_prefixes_.push_back(prefix); + } + + // Sort for use with PrefixSet constructor. + std::sort(shared_prefixes_.begin(), shared_prefixes_.end()); + } + + // Check that all elements of |prefixes| are in |prefix_set|, and + // that nearby elements are not (for lack of a more sensible set of + // items to check for absence). + static void CheckPrefixes(safe_browsing::PrefixSet* prefix_set, + const std::vector<SBPrefix> &prefixes) { + // The set can generate the prefixes it believes it has, so that's + // a good starting point. + std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + EXPECT_EQ(prefixes_copy.size(), check.size()); + EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); - static const size_t kCount = 50000; + for (size_t i = 0; i < prefixes.size(); ++i) { + EXPECT_TRUE(prefix_set->Exists(prefixes[i])); - for (size_t i = 0; i < kCount; ++i) { - prefixes.push_back(GenPrefix()); + const SBPrefix left_sibling = prefixes[i] - 1; + if (check.count(left_sibling) == 0) + EXPECT_FALSE(prefix_set->Exists(left_sibling)); + + const SBPrefix right_sibling = prefixes[i] + 1; + if (check.count(right_sibling) == 0) + EXPECT_FALSE(prefix_set->Exists(right_sibling)); + } } - std::sort(prefixes.begin(), prefixes.end()); - safe_browsing::PrefixSet prefix_set(prefixes); + // Generate a |PrefixSet| file from |shared_prefixes_|, store it in + // a temporary file, and return the filename in |filenamep|. + // Returns |true| on success. + bool GetPrefixSetFile(FilePath* filenamep) { + if (!temp_dir_.IsValid() && !temp_dir_.CreateUniqueTempDir()) + return false; - // Check that |GetPrefixes()| returns exactly the same set of - // prefixes as was passed in. - std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); - std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); - EXPECT_EQ(prefixes_copy.size(), check.size()); - EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); + FilePath filename = temp_dir_.path().AppendASCII("PrefixSetTest"); - // Check that the set flags all of the inputs, and also check items - // just above and below the inputs to make sure they aren't there. - for (size_t i = 0; i < prefixes.size(); ++i) { - EXPECT_TRUE(prefix_set.Exists(prefixes[i])); + safe_browsing::PrefixSet prefix_set(shared_prefixes_); + if (!prefix_set.WriteFile(filename)) + return false; + + *filenamep = filename; + return true; + } + + // Helper function to read the int32 value at |offset|, increment it + // by |inc|, and write it back in place. |fp| should be opened in + // r+ mode. + static void IncrementIntAt(FILE* fp, long offset, int inc) { + int32 value = 0; - const SBPrefix left_sibling = prefixes[i] - 1; - if (check.count(left_sibling) == 0) - EXPECT_FALSE(prefix_set.Exists(left_sibling)); + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fread(&value, sizeof(value), 1, fp)); - const SBPrefix right_sibling = prefixes[i] + 1; - if (check.count(right_sibling) == 0) - EXPECT_FALSE(prefix_set.Exists(right_sibling)); + value += inc; + + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&value, sizeof(value), 1, fp)); + } + + // Helper function to re-generated |fp|'s checksum to be correct for + // the file's contents. |fp| should be opened in r+ mode. + static void CleanChecksum(FILE* fp) { + MD5Context context; + MD5Init(&context); + + ASSERT_NE(-1, fseek(fp, 0, SEEK_END)); + long file_size = ftell(fp); + + size_t payload_size = static_cast<size_t>(file_size) - sizeof(MD5Digest); + size_t digested_size = 0; + ASSERT_NE(-1, fseek(fp, 0, SEEK_SET)); + while (digested_size < payload_size) { + char buf[1024]; + size_t nitems = std::min(payload_size - digested_size, sizeof(buf)); + ASSERT_EQ(nitems, fread(buf, 1, nitems, fp)); + MD5Update(&context, &buf, nitems); + digested_size += nitems; + } + ASSERT_EQ(digested_size, payload_size); + ASSERT_EQ(static_cast<long>(digested_size), ftell(fp)); + + MD5Digest new_digest; + MD5Final(&new_digest, &context); + ASSERT_NE(-1, fseek(fp, digested_size, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&new_digest, sizeof(new_digest), 1, fp)); + ASSERT_EQ(file_size, ftell(fp)); } + + // Open |filename| and increment the int32 at |offset| by |inc|. + // Then re-generate the checksum to account for the new contents. + void ModifyAndCleanChecksum(const FilePath& filename, long offset, int inc) { + int64 size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &size_64)); + + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), offset, inc); + CleanChecksum(file.get()); + file.reset(); + + int64 new_size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &new_size_64)); + ASSERT_EQ(new_size_64, size_64); + } + + // Tests should not modify this shared resource. + static std::vector<SBPrefix> shared_prefixes_; + + ScopedTempDir temp_dir_; +}; + +std::vector<SBPrefix> PrefixSetTest::shared_prefixes_; + +// Test that a small sparse random input works. +TEST_F(PrefixSetTest, Baseline) { + safe_browsing::PrefixSet prefix_set(shared_prefixes_); + CheckPrefixes(&prefix_set, shared_prefixes_); } // Test that the empty set doesn't appear to have anything in it. -TEST(PrefixSetTest, Empty) { - std::vector<SBPrefix> prefixes; - safe_browsing::PrefixSet prefix_set(prefixes); - for (size_t i = 0; i < 500; ++i) { - EXPECT_FALSE(prefix_set.Exists(GenPrefix())); +TEST_F(PrefixSetTest, Empty) { + const std::vector<SBPrefix> empty; + safe_browsing::PrefixSet prefix_set(empty); + for (size_t i = 0; i < shared_prefixes_.size(); ++i) { + EXPECT_FALSE(prefix_set.Exists(shared_prefixes_[i])); } } @@ -67,7 +170,7 @@ TEST(PrefixSetTest, Empty) { // deltas above and below 2^16, and make sure they're all present. // Create a very long sequence with deltas below 2^16 to test crossing // |kMaxRun|. -TEST(PrefixSetTest, EdgeCases) { +TEST_F(PrefixSetTest, EdgeCases) { std::vector<SBPrefix> prefixes; const SBPrefix kVeryPositive = 1000 * 1000 * 1000; @@ -132,4 +235,132 @@ TEST(PrefixSetTest, EdgeCases) { } } +// Similar to Baseline test, but write the set out to a file and read +// it back in before testing. +TEST_F(PrefixSetTest, ReadWrite) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_TRUE(prefix_set.get()); + + CheckPrefixes(prefix_set.get(), shared_prefixes_); +} + +// Check that |CleanChecksum()| makes an acceptable checksum. +TEST_F(PrefixSetTest, CorruptionHelpers) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // This will modify data in |index_|, which will fail the digest check. + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), kPayloadOffset, 1); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); + + // Fix up the checksum and it will read successfully (though the + // data will be wrong). + file.reset(file_util::OpenFile(filename, "r+b")); + CleanChecksum(file.get()); + file.reset(); + prefix_set.reset(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_TRUE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionMagic) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kMagicOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionVersion) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kVersionOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionIndexSize) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kIndexSizeOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |deltas_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionDeltasSize) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kDeltasSizeOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test that the digest catches corruption in the middle of the file +// (in the payload between the header and the digest). +TEST_F(PrefixSetTest, CorruptionPayload) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), 666, 1)); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test corruption in the digest itself. +TEST_F(PrefixSetTest, CorruptionDigest) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + int64 size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &size_64)); + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + long digest_offset = static_cast<long>(size_64 - sizeof(MD5Digest)); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), digest_offset, 1)); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test excess data after the digest (fails the size test). +TEST_F(PrefixSetTest, CorruptionExcess) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // Add some junk to the trunk. + file_util::ScopedFILE file(file_util::OpenFile(filename, "ab")); + const char buf[] = "im in ur base, killing ur d00dz."; + ASSERT_EQ(strlen(buf), fwrite(buf, 1, strlen(buf), file.get())); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + } // namespace |