summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/safe_browsing_util.h
blob: 5b01b44c3a8fa3437ce63d0f24a3909461a1688f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Utilities for the SafeBrowsing code.

#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
#pragma once

#include <cstring>
#include <deque>
#include <string>
#include <vector>

#include "base/basictypes.h"
#include "chrome/browser/safe_browsing/chunk_range.h"

class GURL;

class SBEntry;

// A truncated hash's type.
typedef int SBPrefix;

// Container for holding a chunk URL and the MAC of the contents of the URL.
struct ChunkUrl {
  std::string url;
  std::string mac;
  std::string list_name;
};

// A full hash.
union SBFullHash {
  char full_hash[32];
  SBPrefix prefix;
};

inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
  return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
}

// Container for information about a specific host in an add/sub chunk.
struct SBChunkHost {
  SBPrefix host;
  SBEntry* entry;
};

// Container for an add/sub chunk.
struct SBChunk {
  SBChunk();
  ~SBChunk();

  int chunk_number;
  int list_id;
  bool is_add;
  std::deque<SBChunkHost> hosts;
};

// Container for a set of chunks.  Interim wrapper to replace use of
// |std::deque<SBChunk>| with something having safer memory semantics.
// management.
// TODO(shess): |SBEntry| is currently a very roundabout way to hold
// things pending storage.  It could be replaced with the structures
// used in SafeBrowsingStore, then lots of bridging code could
// dissappear.
class SBChunkList {
 public:
  SBChunkList();
  ~SBChunkList();

  // Implement that subset of the |std::deque<>| interface which
  // callers expect.
  bool empty() const { return chunks_.empty(); }
  size_t size() { return chunks_.size(); }

  void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
  SBChunk& back() { return chunks_.back(); }
  SBChunk& front() { return chunks_.front(); }
  const SBChunk& front() const { return chunks_.front(); }

  typedef std::vector<SBChunk>::const_iterator const_iterator;
  const_iterator begin() const { return chunks_.begin(); }
  const_iterator end() const { return chunks_.end(); }

  typedef std::vector<SBChunk>::iterator iterator;
  iterator begin() { return chunks_.begin(); }
  iterator end() { return chunks_.end(); }

  SBChunk& operator[](size_t n) { return chunks_[n]; }
  const SBChunk& operator[](size_t n) const { return chunks_[n]; }

  // Calls |SBEvent::Destroy()| before clearing |chunks_|.
  void clear();

 private:
  std::vector<SBChunk> chunks_;

  DISALLOW_COPY_AND_ASSIGN(SBChunkList);
};

// Used when we get a gethash response.
struct SBFullHashResult {
  SBFullHash hash;
  std::string list_name;
  int add_chunk_id;
};

// Contains information about a list in the database.
struct SBListChunkRanges {
  explicit SBListChunkRanges(const std::string& n);

  std::string name;  // The list name.
  std::string adds;  // The ranges for add chunks.
  std::string subs;  // The ranges for sub chunks.
};

// Container for deleting chunks from the database.
struct SBChunkDelete {
  SBChunkDelete();
  ~SBChunkDelete();

  std::string list_name;
  bool is_sub_del;
  std::vector<ChunkRange> chunk_del;
};


// SBEntry ---------------------------------------------------------------------

// Holds information about the prefixes for a hostkey.  prefixes can either be
// 4 bytes (truncated hash) or 32 bytes (full hash).
// For adds:
//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
// For subs:
//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
//       [add chunk][prefix][add chunk][prefix]
class SBEntry {
 public:
  enum Type {
    ADD_PREFIX,     // 4 byte add entry.
    SUB_PREFIX,     // 4 byte sub entry.
    ADD_FULL_HASH,  // 32 byte add entry.
    SUB_FULL_HASH,  // 32 byte sub entry.
  };

  // Creates a SBEntry with the necessary size for the given number of prefixes.
  // Caller ownes the object and needs to free it by calling Destroy.
  static SBEntry* Create(Type type, int prefix_count);

  // Frees the entry's memory.
  void Destroy();

  void set_list_id(int list_id) { data_.list_id = list_id; }
  int list_id() const { return data_.list_id; }
  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
  int chunk_id() const { return data_.chunk_id; }
  int prefix_count() const { return data_.prefix_count; }

  // Returns true if this is a prefix as opposed to a full hash.
  bool IsPrefix() const {
    return type() == ADD_PREFIX || type() == SUB_PREFIX;
  }

  // Returns true if this is an add entry.
  bool IsAdd() const {
    return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
  }

  // Returns true if this is a sub entry.
  bool IsSub() const {
    return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
  }

  // Helper to return the size of the prefixes.
  int HashLen() const {
    return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
  }

  // For add entries, returns the add chunk id.  For sub entries, returns the
  // add_chunk id for the prefix at the given index.
  int ChunkIdAtPrefix(int index) const;

  // Used for sub chunks to set the chunk id at a given index.
  void SetChunkIdAtPrefix(int index, int chunk_id);

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  const SBPrefix& PrefixAt(int index) const;
  const SBFullHash& FullHashAt(int index) const;

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  void SetPrefixAt(int index, const SBPrefix& prefix);
  void SetFullHashAt(int index, const SBFullHash& full_hash);

 private:
  // Container for a sub prefix.
  struct SBSubPrefix {
    int add_chunk;
    SBPrefix prefix;
  };

  // Container for a sub full hash.
  struct SBSubFullHash {
    int add_chunk;
    SBFullHash prefix;
  };

  // Keep the fixed data together in one struct so that we can get its size
  // easily.  If any of this is modified, the database will have to be cleared.
  struct Data {
    int list_id;
    // For adds, this is the add chunk number.
    // For subs: if prefix_count is 0 then this is the add chunk that this sub
    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    //     or sub_full_hashes is used for each corresponding prefix.
    int chunk_id;
    Type type;
    int prefix_count;
  };

  SBEntry();
  ~SBEntry();

  // Helper to return the size of each prefix entry (i.e. for subs this
  // includes an add chunk id).
  static int PrefixSize(Type type);

  // Helper to return how much memory a given Entry would require.
  static int Size(Type type, int prefix_count);

  // Returns how many bytes this entry is.
  int Size() const;

  Type type() const { return data_.type; }

  void set_prefix_count(int count) { data_.prefix_count = count; }
  void set_type(Type type) { data_.type = type; }

  // The prefixes union must follow the fixed data so that they're contiguous
  // in memory.
  Data data_;
  union {
    SBPrefix add_prefixes_[1];
    SBSubPrefix sub_prefixes_[1];
    SBFullHash add_full_hashes_[1];
    SBSubFullHash sub_full_hashes_[1];
  };
};


// Utility functions -----------------------------------------------------------

namespace safe_browsing_util {

// SafeBrowsing list names.
extern const char kMalwareList[];
extern const char kPhishingList[];
// Binary Download list names.
extern const char kBinUrlList[];
extern const char kBinHashList[];

enum ListType {
  INVALID = -1,
  MALWARE = 0,
  PHISH = 1,
  BINURL = 2,
  BINHASH = 3,
};

// Maps a list name to ListType.
int GetListId(const std::string& name);
// Maps a ListId to list name. Return false if fails.
bool GetListName(int list_id, std::string* list);


// Canonicalizes url as per Google Safe Browsing Specification.
// See section 6.1 in
// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
                     std::string* canonicalized_path,
                     std::string* canonicalized_query);

// Given a URL, returns all the hosts we need to check.  They are returned
// in order of size (i.e. b.c is first, then a.b.c).
void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);

// Given a URL, returns all the paths we need to check.
void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);

int GetHashIndex(const SBFullHash& hash,
                 const std::vector<SBFullHashResult>& full_hashes);

// Given a URL, compare all the possible host + path full hashes to the set of
// provided full hashes.  Returns the index of the match if one is found, or -1
// otherwise.
int GetUrlHashIndex(const GURL& url,
                    const std::vector<SBFullHashResult>& full_hashes);

bool IsPhishingList(const std::string& list_name);
bool IsMalwareList(const std::string& list_name);
bool IsBadbinurlList(const std::string& list_name);
bool IsBadbinhashList(const std::string& list_name);

// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
bool VerifyMAC(const std::string& key,
               const std::string& mac,
               const char* data,
               int data_length);

GURL GeneratePhishingReportUrl(const std::string& report_page,
                               const std::string& url_to_report);

void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out);

}  // namespace safe_browsing_util

#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_