summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/safe_browsing_util.h
blob: f43d85b2c0960cad737e9ab6a582a4e565e34280 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Utilities for the SafeBrowsing code.

#ifndef CHROME_BROWSER_SAFE_BROWSING_UTIL_H__
#define CHROME_BROWSER_SAFE_BROWSING_UTIL_H__

#include <string.h>

#include <deque>
#include <string>
#include <vector>

#include "base/scoped_ptr.h"
#include "chrome/browser/safe_browsing/chunk_range.h"

class GURL;

//#define SB_LOGGING_ENABLED
#ifdef SB_LOGGING_ENABLED
#define SB_DLOG(severity) DLOG_IF(INFO, 1)
#else
#define SB_DLOG(severity) DLOG_IF(INFO, 0)
#endif

// forward declaration
class SBEntry;

// Widely used typedefs -------------------------------------------------------

// Container for holding a chunk URL and the MAC of the contents of the URL.
typedef struct {
  std::string url;
  std::string mac;
} ChunkUrl;

// A truncated hash's type.
typedef int SBPrefix;

// A full hash.
typedef struct {
  char full_hash[32];
} SBFullHash;

inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
  return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
}

// Container for information about a specific host in an add/sub chunk.
struct SBChunkHost {
  SBPrefix host;
  SBEntry* entry;
};

// Container for an add/sub chunk.
struct SBChunk {
  int chunk_number;
  std::deque<SBChunkHost> hosts;
};

// Used when we get a gethash response.
struct SBFullHashResult {
  SBFullHash hash;
  std::string list_name;
  int add_chunk_id;
};

// Contains information about a list in the database.
struct SBListChunkRanges {
  std::string name;  // The list name.
  std::string adds;  // The ranges for add chunks.
  std::string subs;  // The ranges for sub chunks.

  SBListChunkRanges(const std::string& n) : name(n) { }
};

// Container for deleting chunks from the database.
struct SBChunkDelete {
  std::string list_name;
  bool is_sub_del;
  std::vector<ChunkRange> chunk_del;
};


// Holds information about the prefixes for a hostkey.  prefixes can either be
// 4 bytes (truncated hash) or 32 bytes (full hash).
// For adds:
//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
// For subs:
//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
//       [add chunk][prefix][add chunk][prefix]
class SBEntry {
 public:
  enum Type {
    ADD_PREFIX,     // 4 byte add entry.
    SUB_PREFIX,     // 4 byte sub entry.
    ADD_FULL_HASH,  // 32 byte add entry.
    SUB_FULL_HASH,  // 32 byte sub entry.
  };

  // The minimum size of an SBEntry.
  static const int kMinSize;

  // Creates a SBEntry with the necessary size for the given number of prefixes.
  // Caller ownes the object and needs to free it by calling Destroy.
  static SBEntry* Create(Type type, int prefix_count);

  // Frees the entry's memory.
  void Destroy();

  // Returns whether this entry is internally consistent.
  bool IsValid() const;

  // Returns how many bytes this entry is.
  int Size() const;

  // Helper to return how much memory a given Entry would require.
  static int Size(Type type, int prefix_count);

  void set_list_id(int list_id) { data_.list_id = list_id; }
  int list_id() const { return data_.list_id; }
  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
  int chunk_id() const { return data_.chunk_id; }
  int prefix_count() const { return data_.prefix_count; }
  Type type() const { return data_.type; }

  // Returns a new entry that is larger by the given number of prefixes, with
  // all the existing data already copied over.  The old entry is destroyed.
  SBEntry* Enlarge(int extra_prefixes);

  // Removes the prefix at the given index.
  void RemovePrefix(int index);

  // Returns true if the prefix/hash at the given index is equal to a
  // prefix/hash at another entry's index.  Works with all combinations of
  // add/subs as long as they're the same size.  Also checks chunk_ids.
  bool PrefixesMatch(int index, const SBEntry* that, int that_index) const;

  // Returns true if the add prefix/hash at the given index is equal to the
  // given full hash.
  bool AddPrefixMatches(int index, const SBFullHash& full_hash) const;

  // Returns true if this is an add entry.
  bool IsAdd() const;

  // Returns true if this is a sub entry.
  bool IsSub() const;

  // Helper to return the size of the prefixes.
  int HashLen() const;

  // Helper to return the size of each prefix entry (i.e. for subs this
  // includes an add chunk id).
  static int PrefixSize(Type type);

  // For add entries, returns the add chunk id.  For sub entries, returns the
  // add_chunk id for the prefix at the given index.
  int ChunkIdAtPrefix(int index) const;

  // Used for sub chunks to set the chunk id at a given index.
  void SetChunkIdAtPrefix(int index, int chunk_id);

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  const SBPrefix& PrefixAt(int index) const;
  const SBFullHash& FullHashAt(int index) const;

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  void SetPrefixAt(int index, const SBPrefix& prefix);
  void SetFullHashAt(int index, const SBFullHash& full_hash);

 private:
  SBEntry();
  ~SBEntry();

  void set_prefix_count(int count) { data_.prefix_count = count; }
  void set_type(Type type) { data_.type = type; }

  // Container for a sub prefix.
  struct SBSubPrefix {
    int add_chunk;
    SBPrefix prefix;
  };

  // Container for a sub full hash.
  struct SBSubFullHash {
    int add_chunk;
    SBFullHash prefix;
  };

  // Keep the fixed data together in one struct so that we can get its size
  // easily.  If any of this is modified, the database will have to be cleared.
  struct Data {
    int list_id;
    // For adds, this is the add chunk number.
    // For subs: if prefix_count is 0 then this is the add chunk that this sub
    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    //     or sub_full_hashes is used for each corresponding prefix.
    int chunk_id;
    Type type;
    int prefix_count;
  };

  // The prefixes union must follow the fixed data so that they're contiguous
  // in memory.
  Data data_;
  union {
    SBPrefix add_prefixes_[1];
    SBSubPrefix sub_prefixes_[1];
    SBFullHash add_full_hashes_[1];
    SBSubFullHash sub_full_hashes_[1];
  };
};


// Holds the hostkey specific information in the database.  This is basically a
// collection of SBEntry objects.
class SBHostInfo {
 public:
  SBHostInfo();
  // By default, an empty SBHostInfo is created.  Call this to deserialize from
  // the database.  Returns false if |data| is not internally consistent.
  bool Initialize(const void* data, int size);

  // Adds the given prefixes to the unsafe list.  Note that the prefixes array
  // might be modified internally.
  void AddPrefixes(SBEntry* entry);

  // Remove the given prefixes.  If prefixes is empty, then all entries from
  // sub.add_chunk_number are removed.  Otherwise sub. add_chunk_id is ignored
  // and the chunk_id from each element in sub.prefixes is checked.  If persist
  // is true and no matches are found, then the sub information will be stored
  // and checked in case a future add comes in with that chunk_id.
  void RemovePrefixes(SBEntry* entry, bool persist);

  // Returns true if the host entry contains any of the prefixes.  If a full
  // hash matched, then list_id contains the list id.  Otherwise list_id is -1
  // and prefix_hits contains the matching prefixes if any are matched, or is
  // empty if the entire host is blacklisted.
  bool Contains(const std::vector<SBFullHash>& prefixes,
                int* list_id,
                std::vector<SBPrefix>* prefix_hits);

  // Used for serialization.
  const void* data() const { return data_.get(); }
  const int size() const { return size_; }

 private:
  // Checks data_ for internal consistency.
  bool IsValid();

  // Allows enumeration of Entry structs.  To start off, pass NULL for *entry,
  // and then afterwards return the previous pointer.
  bool GetNextEntry(const SBEntry** entry);

  void Add(const SBEntry* entry);

  void RemoveSubEntry(int list_id, int chunk_id);

  // Collection of SBEntry objects.
  scoped_array<char> data_;
  int size_;
};


// Helper functions -----------------------------------------------------------

namespace safe_browsing_util {

void FreeChunks(std::deque<SBChunk>* chunks);

// Given a URL, returns all the hosts we need to check.  They are returned
// in order of size (i.e. b.c is first, then a.b.c).
void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);

// Given a URL, returns all the paths we need to check.
void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);

// Given a URL, compare all the possible host + path full hashes to the set of
// provided full hashes.  Returns the index of the match if one is found, or -1
// otherwise.
int CompareFullHashes(const GURL& url,
                      const std::vector<SBFullHashResult>& full_hashes);

bool IsPhishingList(const std::string& list_name);
bool IsMalwareList(const std::string& list_name);

// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
bool VerifyMAC(const std::string& key,
               const std::string& mac,
               const char* data,
               int data_length);

GURL GeneratePhishingReportUrl(const std::string& report_page,
                               const std::string& url_to_report);

}  // namespace safe_browsing_util

#endif  // CHROME_BROWSER_SAFE_BROWSING_UTIL_H__