summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
blob: df3e1a60873b058ae441a5c33665e9f9d70e5d87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_
#pragma once

#include <list>
#include <set>

#include "base/hash_tables.h"
#include "base/lock.h"
#include "base/ref_counted.h"
#include "base/scoped_ptr.h"
#include "base/task.h"
#include "base/time.h"
#include "chrome/browser/safe_browsing/safe_browsing_database.h"
#include "testing/gtest/include/gtest/gtest_prod.h"

class BloomFilter;
struct sqlite3;
class SqliteCompiledStatement;
class SqliteStatementCache;
class SQLTransaction;

// The reference implementation database using SQLite.
class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
 public:
  SafeBrowsingDatabaseBloom();
  virtual ~SafeBrowsingDatabaseBloom();

  // Implement SafeBrowsingDatabase interface.
  virtual void Init(const FilePath& filename);
  virtual bool ResetDatabase();
  virtual bool ContainsUrl(const GURL& url,
                           std::string* matching_list,
                           std::vector<SBPrefix>* prefix_hits,
                           std::vector<SBFullHashResult>* full_hits,
                           base::Time last_update);
  virtual void InsertChunks(const std::string& list_name,
                            const SBChunkList& chunks);
  virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes);
  virtual void CacheHashResults(
      const std::vector<SBPrefix>& prefixes,
      const std::vector<SBFullHashResult>& full_hits);
  virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists);
  virtual void UpdateFinished(bool update_succeeded);

 private:
  friend class SafeBrowsingDatabaseBloomTest;
  FRIEND_TEST(SafeBrowsingDatabaseBloomTest, HashCaching);

  struct HashCacheEntry {
    SBFullHash full_hash;
    int list_id;
    int add_chunk_id;
    int sub_chunk_id;
    base::Time received;
  };

  typedef std::list<HashCacheEntry> HashList;
  typedef base::hash_map<SBPrefix, HashList> HashCache;

  // Load the bloom filter off disk, or generates one if it doesn't exist.
  virtual void LoadBloomFilter();

  // Deletes the on-disk bloom filter, i.e. because it's stale.
  virtual void DeleteBloomFilter();

  // Writes the current bloom filter to disk.
  virtual void WriteBloomFilter();

  struct SBPair {
    int chunk_id;
    SBPrefix prefix;
  };

  enum ChunkType {
    ADD_CHUNK = 0,
    SUB_CHUNK = 1,
  };

  // Opens the database.
  bool Open();

  // Closes the database.
  bool Close();

  // Creates the SQL tables.
  bool CreateTables();

  // Checks the database version and if it's incompatible with the current one,
  // resets the database.
  bool CheckCompatibleVersion();

  // Returns true if any of the given prefixes exist for the given host.
  // Also returns the matching list or any prefix matches.
  void CheckUrl(const std::string& host,
                SBPrefix host_key,
                const std::vector<std::string>& paths,
                std::vector<SBPrefix>* prefix_hits);

  // Checks if a chunk is in the database.
  bool ChunkExists(int list_id, ChunkType type, int chunk_id);

  // Return a comma separated list of chunk ids that are in the database for
  // the given list and chunk type.
  void GetChunkIds(int list_id, ChunkType type, std::string* list);

  // Old implementation methods which have been consolidated into new
  // |UpdateStarted()| interface.  Retained to minimize changes to
  // this code.
  void GetListsInfo(std::vector<SBListChunkRanges>* lists);
  bool UpdateStarted();

  // Generate a bloom filter.
  virtual void BuildBloomFilter();

  // Helpers for building the bloom filter.
  static int PairCompare(const void* arg1, const void* arg2);

  bool BuildAddPrefixList(SBPair* adds);
  bool BuildAddFullHashCache(HashCache* add_cache);
  bool BuildSubFullHashCache(HashCache* sub_cache);
  bool RemoveSubs(SBPair* adds,
                  std::vector<bool>* adds_removed,
                  HashCache* add_cache,
                  HashCache* sub_cache,
                  int* subs);

  bool UpdateTables();
  bool WritePrefixes(SBPair* adds, const std::vector<bool>& adds_removed,
                     int* new_add_count, scoped_refptr<BloomFilter>* filter);
  void WriteFullHashes(HashCache* hash_cache, bool is_add);
  void WriteFullHashList(const HashList& hash_list, bool is_add);

  // Looks up any cached full hashes we may have.
  void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits,
                           std::vector<SBFullHashResult>* full_hits,
                           base::Time last_update);

  // Remove cached entries that have prefixes contained in the entry.
  bool ClearCachedEntry(SBPrefix, int add_chunk_id, HashCache* hash_cache);

  void HandleCorruptDatabase();
  void OnHandleCorruptDatabase();

  // Adding add entries to the database.
  void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
  void InsertAddPrefix(SBPrefix prefix, int encoded_chunk);
  void InsertAddFullHash(SBPrefix prefix,
                         int encoded_chunk,
                         base::Time received_time,
                         SBFullHash full_prefix);

  // Adding sub entries to the database.
  void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
  void InsertSubPrefix(SBPrefix prefix,
                       int encoded_chunk,
                       int encoded_add_chunk);
  void InsertSubFullHash(SBPrefix prefix,
                         int encoded_chunk,
                         int encoded_add_chunk,
                         SBFullHash full_prefix,
                         bool use_temp_table);

  // Used for reading full hashes from the database.
  void ReadFullHash(SqliteCompiledStatement* statement,
                    int column,
                    SBFullHash* full_hash);

  // Returns the number of chunk + prefix pairs in the add prefix table.
  int GetAddPrefixCount();

  // Reads and writes chunk numbers to and from persistent store.
  void ReadChunkNumbers();
  bool WriteChunkNumbers();

  // Flush in-memory temporary caches.  |lookup_lock_| must be locked
  // by caller.
  void ClearUpdateCaches();

  // Encode the list id in the lower bit of the chunk.
  static inline int EncodeChunkId(int chunk, int list_id) {
    DCHECK(list_id == 0 || list_id == 1);
    chunk = chunk << 1;
    chunk |= list_id;
    return chunk;
  }

  // Split an encoded chunk id and return the original chunk id and list id.
  static inline void DecodeChunkId(int encoded, int* chunk, int* list_id) {
    *list_id = encoded & 0x1;
    *chunk = encoded >> 1;
  }

  // The database connection.
  sqlite3* db_;

  // Cache of compiled statements for our database.
  scoped_ptr<SqliteStatementCache> statement_cache_;

  // Used to schedule resetting the database because of corruption.
  ScopedRunnableMethodFactory<SafeBrowsingDatabaseBloom> reset_factory_;

  // Caches for all of the existing add and sub chunks.
  std::set<int> add_chunk_cache_;
  std::set<int> sub_chunk_cache_;

  // Caches for the AddDel and SubDel commands.
  base::hash_set<int> add_del_cache_;
  base::hash_set<int> sub_del_cache_;

  // The number of entries in the add_prefix table. Used to pick the correct
  // size for the bloom filter and stats gathering.
  int add_count_;

  // Transaction for protecting database integrity during updates.
  scoped_ptr<SQLTransaction> insert_transaction_;

  // Lock for protecting access to variables that may be used on the IO thread.
  // This includes |bloom_filter_|, |hash_cache_| and |prefix_miss_cache_|.
  Lock lookup_lock_;

  // True if we're in the middle of a reset.  This is used to prevent possible
  // infinite recursion.
  bool performing_reset_;

  // A store for GetHash results that have not yet been written to the database.
  HashList pending_full_hashes_;

  scoped_ptr<HashCache> hash_cache_;
  HashCache* hash_cache() { return hash_cache_.get(); }

  // Cache of prefixes that returned empty results (no full hash match).
  typedef std::set<SBPrefix> PrefixCache;
  PrefixCache prefix_miss_cache_;
  PrefixCache* prefix_miss_cache() { return &prefix_miss_cache_; }

  FilePath filename_;
  FilePath bloom_filter_filename_;
  scoped_refptr<BloomFilter> bloom_filter_;

  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseBloom);
};

#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_