// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" #include "base/callback.h" #include "base/md5.h" // TODO(shess): Remove after migration. #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" namespace { // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so // that byte-order changes force corruption. const int32 kFileMagic = 0x600D71FE; const int32 kFileVersion = 7; // SQLite storage was 6... // Header at the front of the main database file. struct FileHeader { int32 magic, version; uint32 add_chunk_count, sub_chunk_count; uint32 add_prefix_count, sub_prefix_count; uint32 add_hash_count, sub_hash_count; }; // Header for each chunk in the chunk-accumulation file. struct ChunkHeader { uint32 add_prefix_count, sub_prefix_count; uint32 add_hash_count, sub_hash_count; }; // Rewind the file. Using fseek(2) because rewind(3) errors are // weird. bool FileRewind(FILE* fp) { int rv = fseek(fp, 0, SEEK_SET); DCHECK_EQ(rv, 0); return rv == 0; } // Read an array of |nmemb| items from |fp| into |ptr|, and fold the // input data into the checksum in |context|, if non-NULL. Return // true on success. template bool ReadArray(T* ptr, size_t nmemb, FILE* fp, MD5Context* context) { const size_t ret = fread(ptr, sizeof(T), nmemb, fp); if (ret != nmemb) return false; if (context) MD5Update(context, ptr, sizeof(T) * nmemb); return true; } // Write an array of |nmemb| items from |ptr| to |fp|, and fold the // output data into the checksum in |context|, if non-NULL. Return // true on success. template bool WriteArray(const T* ptr, size_t nmemb, FILE* fp, MD5Context* context) { const size_t ret = fwrite(ptr, sizeof(T), nmemb, fp); if (ret != nmemb) return false; if (context) MD5Update(context, ptr, sizeof(T) * nmemb); return true; } // Expand |values| to fit |count| new items, read those items from // |fp| and fold them into the checksum in |context|. Returns true on // success. template bool ReadToVector(std::vector* values, size_t count, FILE* fp, MD5Context* context) { // Pointers into an empty vector may not be valid. if (!count) return true; // Grab the size for purposes of finding where to read to. The // resize could invalidate any iterator captured here. const size_t original_size = values->size(); values->resize(original_size + count); // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous. So // get a pointer to where to read the data to. T* ptr = &((*values)[original_size]); if (!ReadArray(ptr, count, fp, context)) { values->resize(original_size); return false; } return true; } // Write all of |values| to |fp|, and fold the data into the checksum // in |context|, if non-NULL. Returns true on succsess. template bool WriteVector(const std::vector& values, FILE* fp, MD5Context* context) { // Pointers into empty vectors may not be valid. if (values.empty()) return true; // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous. So // get a pointer to where to write from. const T* ptr = &(values[0]); return WriteArray(ptr, values.size(), fp, context); } // Remove deleted items (|chunk_id| in |del_set|) from the vector // starting at |offset| running to |end()|. template void RemoveDeleted(std::vector* vec, size_t offset, const base::hash_set& del_set) { DCHECK(vec); // Scan through the items read, dropping the items in |del_set|. typename std::vector::iterator add_iter = vec->begin() + offset; for (typename std::vector::iterator iter = add_iter; iter != vec->end(); ++iter) { if (del_set.count(iter->chunk_id) == 0) { *add_iter = *iter; ++add_iter; } } vec->erase(add_iter, vec->end()); } // Combine |ReadToVector()| and |RemoveDeleted()|. Returns true on // success. template bool ReadToVectorAndDelete(std::vector* values, size_t count, FILE* fp, MD5Context* context, const base::hash_set& del_set) { const size_t original_size = values->size(); if (!ReadToVector(values, count, fp, context)) return false; RemoveDeleted(values, original_size, del_set); return true; } // Read an array of |count| integers and add them to |values|. // Returns true on success. bool ReadToChunkSet(std::set* values, size_t count, FILE* fp, MD5Context* context) { if (!count) return true; std::vector flat_values; if (!ReadToVector(&flat_values, count, fp, context)) return false; values->insert(flat_values.begin(), flat_values.end()); return true; } // Write the contents of |values| as an array of integers. Returns // true on success. bool WriteChunkSet(const std::set& values, FILE* fp, MD5Context* context) { if (values.empty()) return true; const std::vector flat_values(values.begin(), values.end()); return WriteVector(flat_values, fp, context); } // Delete the chunks in |deleted| from |chunks|. void DeleteChunksFromSet(const base::hash_set& deleted, std::set* chunks) { for (std::set::iterator iter = chunks->begin(); iter != chunks->end();) { std::set::iterator prev = iter++; if (deleted.count(*prev) > 0) chunks->erase(prev); } } } // namespace SafeBrowsingStoreFile::SafeBrowsingStoreFile() : chunks_written_(0), file_(NULL) { } SafeBrowsingStoreFile::~SafeBrowsingStoreFile() { Close(); } bool SafeBrowsingStoreFile::Delete() { // The database should not be open at this point. But, just in // case, close everything before deleting. if (!Close()) { NOTREACHED(); return false; } if (!file_util::Delete(filename_, false) && file_util::PathExists(filename_)) { NOTREACHED(); return false; } const FilePath new_filename = TemporaryFileForFilename(filename_); if (!file_util::Delete(new_filename, false) && file_util::PathExists(new_filename)) { NOTREACHED(); return false; } // Also make sure any SQLite data is deleted. This should only be // needed if a journal file is left from a crash and the database is // reset before SQLite gets a chance to straighten things out. // TODO(shess): Remove after migration. SafeBrowsingStoreSqlite old_store; old_store.Init( filename_, NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase)); if (!old_store.Delete()) return false; return true; } void SafeBrowsingStoreFile::Init(const FilePath& filename, Callback0::Type* corruption_callback) { filename_ = filename; corruption_callback_.reset(corruption_callback); } bool SafeBrowsingStoreFile::OnCorruptDatabase() { if (corruption_callback_.get()) corruption_callback_->Run(); // Return false as a convenience to callers. return false; } bool SafeBrowsingStoreFile::Close() { ClearUpdateBuffers(); // Make sure the files are closed. file_.reset(); new_file_.reset(); old_store_.reset(); return true; } bool SafeBrowsingStoreFile::BeginUpdate() { DCHECK(!file_.get() && !new_file_.get() && !old_store_.get()); // Structures should all be clear unless something bad happened. DCHECK(add_chunks_cache_.empty()); DCHECK(sub_chunks_cache_.empty()); DCHECK(add_del_cache_.empty()); DCHECK(sub_del_cache_.empty()); DCHECK(add_prefixes_.empty()); DCHECK(sub_prefixes_.empty()); DCHECK(add_hashes_.empty()); DCHECK(sub_hashes_.empty()); DCHECK_EQ(chunks_written_, 0); const FilePath new_filename = TemporaryFileForFilename(filename_); file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+")); if (new_file.get() == NULL) return false; file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb")); empty_ = (file.get() == NULL); if (empty_) { // If the file exists but cannot be opened, try to delete it (not // deleting directly, the bloom filter needs to be deleted, too). if (file_util::PathExists(filename_)) return OnCorruptDatabase(); new_file_.swap(new_file); return true; } FileHeader header; if (!ReadArray(&header, 1, file.get(), NULL)) return OnCorruptDatabase(); if (header.magic != kFileMagic || header.version != kFileVersion) { // Something about having the file open causes a problem with // SQLite opening it. Perhaps PRAGMA locking_mode = EXCLUSIVE? file.reset(); // Magic numbers didn't match, maybe it's a SQLite database. scoped_ptr sqlite_store(new SafeBrowsingStoreSqlite()); sqlite_store->Init( filename_, NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase)); if (!sqlite_store->BeginUpdate()) return OnCorruptDatabase(); // Pull chunks-seen data into local structures, rather than // optionally wiring various calls through to the SQLite store. std::vector chunks; sqlite_store->GetAddChunks(&chunks); add_chunks_cache_.insert(chunks.begin(), chunks.end()); sqlite_store->GetSubChunks(&chunks); sub_chunks_cache_.insert(chunks.begin(), chunks.end()); new_file_.swap(new_file); old_store_.swap(sqlite_store); return true; } // Check that the file size makes sense given the header. This is a // cheap way to protect against header corruption while deferring // the checksum calculation until the end of the update. // TODO(shess): Under POSIX it is possible that this could size a // file different from the file which was opened. int64 size = 0; if (!file_util::GetFileSize(filename_, &size)) return OnCorruptDatabase(); int64 expected_size = sizeof(FileHeader); expected_size += header.add_chunk_count * sizeof(int32); expected_size += header.sub_chunk_count * sizeof(int32); expected_size += header.add_prefix_count * sizeof(SBAddPrefix); expected_size += header.sub_prefix_count * sizeof(SBSubPrefix); expected_size += header.add_hash_count * sizeof(SBAddFullHash); expected_size += header.sub_hash_count * sizeof(SBSubFullHash); expected_size += sizeof(MD5Digest); if (size != expected_size) return OnCorruptDatabase(); // Pull in the chunks-seen data for purposes of implementing // |GetAddChunks()| and |GetSubChunks()|. This data is sent up to // the server at the beginning of an update. if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count, file.get(), NULL) || !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count, file.get(), NULL)) return OnCorruptDatabase(); file_.swap(file); new_file_.swap(new_file); return true; } bool SafeBrowsingStoreFile::FinishChunk() { if (!add_prefixes_.size() && !sub_prefixes_.size() && !add_hashes_.size() && !sub_hashes_.size()) return true; ChunkHeader header; header.add_prefix_count = add_prefixes_.size(); header.sub_prefix_count = sub_prefixes_.size(); header.add_hash_count = add_hashes_.size(); header.sub_hash_count = sub_hashes_.size(); if (!WriteArray(&header, 1, new_file_.get(), NULL)) return false; if (!WriteVector(add_prefixes_, new_file_.get(), NULL) || !WriteVector(sub_prefixes_, new_file_.get(), NULL) || !WriteVector(add_hashes_, new_file_.get(), NULL) || !WriteVector(sub_hashes_, new_file_.get(), NULL)) return false; ++chunks_written_; // Clear everything to save memory. return ClearChunkBuffers(); } bool SafeBrowsingStoreFile::DoUpdate( const std::vector& pending_adds, std::vector* add_prefixes_result, std::vector* add_full_hashes_result) { DCHECK(old_store_.get() || file_.get() || empty_); DCHECK(new_file_.get()); std::vector add_prefixes; std::vector sub_prefixes; std::vector add_full_hashes; std::vector sub_full_hashes; // Read |old_store_| into the vectors. if (old_store_.get()) { // Push deletions to |old_store_| so they can be applied to the // data being read. for (base::hash_set::const_iterator iter = add_del_cache_.begin(); iter != add_del_cache_.end(); ++iter) { old_store_->DeleteAddChunk(*iter); } for (base::hash_set::const_iterator iter = sub_del_cache_.begin(); iter != sub_del_cache_.end(); ++iter) { old_store_->DeleteSubChunk(*iter); } if (!old_store_->ReadAddPrefixes(&add_prefixes) || !old_store_->ReadSubPrefixes(&sub_prefixes) || !old_store_->ReadAddHashes(&add_full_hashes) || !old_store_->ReadSubHashes(&sub_full_hashes)) return OnCorruptDatabase(); // Do not actually update the old store. if (!old_store_->CancelUpdate()) return OnCorruptDatabase(); } else if (!empty_) { // Read |file_| into the vectors. DCHECK(file_.get()); if (!FileRewind(file_.get())) return OnCorruptDatabase(); MD5Context context; MD5Init(&context); // Read the file header and make sure it looks right. FileHeader header; if (!ReadArray(&header, 1, file_.get(), &context)) return OnCorruptDatabase(); if (header.magic != kFileMagic || header.version != kFileVersion) return OnCorruptDatabase(); // Re-read the chunks-seen data to get to the later data in the // file and calculate the checksum. No new elements should be // added to the sets. if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count, file_.get(), &context) || !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count, file_.get(), &context)) return OnCorruptDatabase(); if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count, file_.get(), &context, add_del_cache_) || !ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count, file_.get(), &context, sub_del_cache_) || !ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count, file_.get(), &context, add_del_cache_) || !ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count, file_.get(), &context, sub_del_cache_)) return OnCorruptDatabase(); // Calculate the digest to this point. MD5Digest calculated_digest; MD5Final(&calculated_digest, &context); // Read the stored checksum and verify it. MD5Digest file_digest; if (!ReadArray(&file_digest, 1, file_.get(), NULL)) return OnCorruptDatabase(); if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) return OnCorruptDatabase(); // Close the file so we can later rename over it. file_.reset(); } DCHECK(!file_.get()); // Rewind the temporary storage. if (!FileRewind(new_file_.get())) return false; // Append the accumulated chunks onto the vectors read from |file_|. for (int i = 0; i < chunks_written_; ++i) { ChunkHeader header; if (!ReadArray(&header, 1, new_file_.get(), NULL)) return false; // TODO(shess): If the vectors were kept sorted, then this code // could use std::inplace_merge() to merge everything together in // sorted order. That might still be slower than just sorting at // the end if there were a large number of chunks. In that case // some sort of recursive binary merge might be in order (merge // chunks pairwise, merge those chunks pairwise, and so on, then // merge the result with the main list). if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count, new_file_.get(), NULL, add_del_cache_) || !ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count, new_file_.get(), NULL, sub_del_cache_) || !ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count, new_file_.get(), NULL, add_del_cache_) || !ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count, new_file_.get(), NULL, sub_del_cache_)) return false; } // Append items from |pending_adds| which haven't been deleted. for (std::vector::const_iterator iter = pending_adds.begin(); iter != pending_adds.end(); ++iter) { if (add_del_cache_.count(iter->chunk_id) == 0) add_full_hashes.push_back(*iter); } // Knock the subs from the adds. SBProcessSubs(&add_prefixes, &sub_prefixes, &add_full_hashes, &sub_full_hashes); // We no longer need to track deleted chunks. DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); // Write the new data to new_file_. if (!FileRewind(new_file_.get())) return false; MD5Context context; MD5Init(&context); // Write a file header. FileHeader header; header.magic = kFileMagic; header.version = kFileVersion; header.add_chunk_count = add_chunks_cache_.size(); header.sub_chunk_count = sub_chunks_cache_.size(); header.add_prefix_count = add_prefixes.size(); header.sub_prefix_count = sub_prefixes.size(); header.add_hash_count = add_full_hashes.size(); header.sub_hash_count = sub_full_hashes.size(); if (!WriteArray(&header, 1, new_file_.get(), &context)) return false; // Write all the chunk data. if (!WriteChunkSet(add_chunks_cache_, new_file_.get(), &context) || !WriteChunkSet(sub_chunks_cache_, new_file_.get(), &context) || !WriteVector(add_prefixes, new_file_.get(), &context) || !WriteVector(sub_prefixes, new_file_.get(), &context) || !WriteVector(add_full_hashes, new_file_.get(), &context) || !WriteVector(sub_full_hashes, new_file_.get(), &context)) return false; // Write the checksum at the end. MD5Digest digest; MD5Final(&digest, &context); if (!WriteArray(&digest, 1, new_file_.get(), NULL)) return false; // Trim any excess left over from the temporary chunk data. if (!file_util::TruncateFile(new_file_.get())) return false; // Close the file handle and swizzle the file into place. new_file_.reset(); if (old_store_.get()) { const bool deleted = old_store_->Delete(); old_store_.reset(); if (!deleted) return false; } else { if (!file_util::Delete(filename_, false) && file_util::PathExists(filename_)) return false; } const FilePath new_filename = TemporaryFileForFilename(filename_); if (!file_util::Move(new_filename, filename_)) return false; // Pass the resulting data off to the caller. add_prefixes_result->swap(add_prefixes); add_full_hashes_result->swap(add_full_hashes); return true; } bool SafeBrowsingStoreFile::FinishUpdate( const std::vector& pending_adds, std::vector* add_prefixes_result, std::vector* add_full_hashes_result) { bool ret = DoUpdate(pending_adds, add_prefixes_result, add_full_hashes_result); if (!ret) { CancelUpdate(); return false; } DCHECK(!new_file_.get()); DCHECK(!file_.get()); DCHECK(!old_store_.get()); return Close(); } bool SafeBrowsingStoreFile::CancelUpdate() { old_store_.reset(); return Close(); }