summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/safe_browsing_database.h
blob: 872f76ef229322b85c13a970f27f8838e9689daf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__

#include <list>
#include <queue>
#include <vector>

#include "base/hash_tables.h"
#include "base/scoped_ptr.h"
#include "base/task.h"
#include "base/time.h"
#include "chrome/browser/safe_browsing/safe_browsing_util.h"
#include "chrome/common/sqlite_compiled_statement.h"
#include "chrome/common/sqlite_utils.h"

class BloomFilter;
class GURL;

// Encapsulates the database that stores information about phishing and malware
// sites.  There is one on-disk database for all profiles, as it doesn't
// contain user-specific data.  This object is not thread-safe, i.e. all its
// methods should be used on the same thread that it was created on, with the
// exception of NeedToCheckUrl.
class SafeBrowsingDatabase {
 public:
  SafeBrowsingDatabase();
  ~SafeBrowsingDatabase();

  // Initializes the database with the given filename.  The callback is
  // executed after finishing a chunk.
  bool Init(const std::wstring& filename,
            Callback0::Type* chunk_inserted_callback);

  // Deletes the current database and creates a new one.
  bool ResetDatabase();

  // This function can be called on any thread to check if the given url may be
  // in the database.  If this function returns false, it is definitely not in
  // the database and ContainsUrl doesn't need to be called.  If it returns
  // true, then the url might be in the database and ContainsUrl needs to be
  // called.  This function can only be called after Init succeeded.
  bool NeedToCheckUrl(const GURL& url);

  // Returns false if the given url is not in the database.  If it returns
  // true, then either "list" is the name of the matching list, or prefix_hits
  // contains the matching hash prefixes.
  bool ContainsUrl(const GURL& url,
                   std::string* matching_list,
                   std::vector<SBPrefix>* prefix_hits,
                   std::vector<SBFullHashResult>* full_hits,
                   Time last_update);

  // Processes add/sub commands.  Database will free the chunks when it's done.
  void InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks);

  // Processs adddel/subdel commands.  Database will free chunk_deletes when
  // it's done.
  void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes);

  // Returns the lists and their add/sub chunks.
  void GetListsInfo(std::vector<SBListChunkRanges>* lists);

  // Call this to make all database operations synchronous.  While useful for
  // testing, this should never be called in chrome.exe because it can lead
  // to blocking user requests.
  void set_synchronous() { asynchronous_ = false; }

  // Store the results of a GetHash response. In the case of empty results, we
  // cache the prefixes until the next update so that we don't have to issue
  // further GetHash requests we know will be empty.
  void CacheHashResults(const std::vector<SBPrefix>& prefixes,
                        const std::vector<SBFullHashResult>& full_hits);

  // Called when the user's machine has resumed from a lower power state.
  void HandleResume();

 private:
  friend class SafeBrowsing_HashCaching_Test;

  // Opens the database.
  bool Open();

  // Closes the database.
  bool Close();

  // Creates the SQL tables.
  bool CreateTables();

  // Checks the database version and if it's incompatible with the current one,
  // resets the database.
  bool CheckCompatibleVersion();

  // Updates, or adds if new, a hostkey's record with the given add/sub entry.
  // If this is a sub, removes the given prefixes, or all if prefixes is empty,
  // from host_key's record.  If persist is true, then if the add_chunk_id isn't
  // found the entry will store this sub information for future reference.
  // Otherwise the entry will not be modified if there are no matches.
  void UpdateInfo(SBPrefix host, SBEntry* entry, bool persist);

  // Returns true if any of the given prefixes exist for the given host.
  // Also returns the matching list or any prefix matches.
  void CheckUrl(const std::string& host,
                SBPrefix host_key,
                const std::vector<std::string>& paths,
                std::string* matching_list,
                std::vector<SBPrefix>* prefix_hits);

  enum ChunkType {
    ADD_CHUNK = 0,
    SUB_CHUNK = 1,
  };

  // Adds information about the given chunk to the chunks table.
  void AddChunkInformation(int list_id,
                           ChunkType type,
                           int chunk_id,
                           const std::string& hostkeys);  // only used for add

  // Return a comma separated list of chunk ids that are in the database for
  // the given list and chunk type.
  void GetChunkIds(int list_id, ChunkType type, std::string* list);

  // Checks if a chunk is in the database.
  bool ChunkExists(int list_id, ChunkType type, int chunk_id);

  // Removes the given id from our list of chunk ids.
  void RemoveChunkId(int list_id, ChunkType type, int chunk_id);

  // Reads the host's information from the database.  Returns true if it was
  // found, or false otherwise.
  bool ReadInfo(int host_key, SBHostInfo* info, int* id);

  // Writes the host's information to the database, overwriting any existing
  // information for that host_key if it existed.
  void WriteInfo(int host_key, const SBHostInfo& info, int id);

  // Deletes existing information for the given hostkey.
  void DeleteInfo(int host_key);

  // Adds the given list to the database.  Returns its row id.
  int AddList(const std::string& name);

  // Given a list name, returns its internal id.  If we haven't seen it before,
  // an id is created and stored in the database.  On error, returns 0.
  int GetListID(const std::string& name);

  // Given a list id, returns its name.
  std::string GetListName(int id);

  static std::wstring BloomFilterFilename(const std::wstring& db_filename);

  // Load the bloom filter off disk.  Generates one if it can't find it.
  void LoadBloomFilter();

  // Deletes the on-disk bloom filter, i.e. because it's stale.
  void DeleteBloomFilter();

  // Writes the current bloom filter to disk.
  void WriteBloomFilter();

  // Adds the host to the bloom filter.
  void AddHostToBloomFilter(int host_key);

  // Generate a bloom filter.
  void BuildBloomFilter();

  // Used when generating the bloom filter.  Reads a small number of hostkeys
  // starting at the given row id.
  void OnReadHostKeys(int start_id);

  // Called when we finished reading all the hostkeys from the database during
  // bloom filter generation.
  void OnDoneReadingHostKeys();

  void StartThrottledWork();
  void RunThrottledWork();

  // Used when processing an add-del, add chunk and sub chunk commands in small
  // batches so that the db thread is never blocked.  They return true if
  // complete, or false if there's still more work to do.
  bool ProcessChunks();
  bool ProcessAddDel();

  bool ProcessAddChunks(std::deque<SBChunk>* chunks);
  bool ProcessSubChunks(std::deque<SBChunk>* chunks);

  void BeginTransaction();
  void EndTransaction();

  // Processes an add-del command, which deletes all the prefixes that came
  // from that add chunk id.
  void AddDel(const std::string& list_name, int add_chunk_id);

  // Processes a sub-del command, which just removes the sub chunk id from
  // our list.
  void SubDel(const std::string& list_name, int sub_chunk_id);

  // Looks up any cached full hashes we may have.
  void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits,
                           std::vector<SBFullHashResult>* full_hits,
                           Time last_update);

  // Remove cached entries that have prefixes contained in the entry.
  void ClearCachedHashes(const SBEntry* entry);

  // Remove all GetHash entries that match the list and chunk id from an AddDel.
  void ClearCachedHashesForChunk(int list_id, int add_chunk_id);

  void HandleCorruptDatabase();
  void OnHandleCorruptDatabase();

  // Runs a small amount of time after the machine has resumed operation from
  // a low power state.
  void OnResumeDone();

  // The database connection.
  sqlite3* db_;

  // Cache of compiled statements for our database.
  scoped_ptr<SqliteStatementCache> statement_cache_;

  int transaction_count_;
  scoped_ptr<SQLTransaction> transaction_;

  // True iff the database has been opened successfully.
  bool init_;

  std::wstring filename_;

  // Used by the bloom filter.
  std::wstring bloom_filter_filename_;
  scoped_ptr<BloomFilter> bloom_filter_;
  int bloom_filter_read_count_;
  int bloom_filter_fp_count_;

  // These are temp variables used when rebuilding the bloom filter.
  bool bloom_filter_building_;
  std::vector<int> bloom_filter_temp_hostkeys_;
  int bloom_filter_last_hostkey_;
  Time bloom_filter_rebuild_time_;

  // Used to store throttled work for commands that write to the database.
  std::queue<std::deque<SBChunk>*> pending_chunks_;

  // Used during processing of an add chunk.
  std::string add_chunk_modified_hosts_;

  struct AddDelWork {
    int list_id;
    int add_chunk_id;
    std::vector<std::string> hostkeys;
  };

  std::queue<AddDelWork> pending_add_del_;

  // Controls whether database writes are done synchronously in one go or
  // asynchronously in small chunks.
  bool asynchronous_;

  // Called after an add/sub chunk is processed.
  Callback0::Type* chunk_inserted_callback_;

  // Used to schedule small bits of work when writing to the database.
  ScopedRunnableMethodFactory<SafeBrowsingDatabase> process_factory_;

  // Used to schedule reading the database to rebuild the bloom filter.
  ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_read_factory_;

  // Used to schedule writing the bloom filter after an update.
  ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_write_factory_;

  // Used to schedule resetting the database because of corruption.
  ScopedRunnableMethodFactory<SafeBrowsingDatabase> reset_factory_;

  // Used to schedule resuming from a lower power state.
  ScopedRunnableMethodFactory<SafeBrowsingDatabase> resume_factory_;

  // Used for caching GetHash results.
  typedef struct HashCacheEntry {
    SBFullHash full_hash;
    int list_id;
    int add_chunk_id;
    Time received;
  } HashCacheEntry;

  typedef std::list<HashCacheEntry> HashList;
  typedef base::hash_map<SBPrefix, HashList> HashCache;
  HashCache hash_cache_;

  // Cache of prefixes that returned empty results (no full hash match).
  std::set<SBPrefix> prefix_miss_cache_;

  // The amount of time, in milliseconds, to wait before the next disk write.
  int disk_delay_;

  DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingDatabase);
};

#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__