summaryrefslogtreecommitdiffstats
path: root/chrome/common/visitedlink_common.h
blob: 546bd4fa3cbaeadaae5a07f014bf070fb929f9a3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
#define CHROME_COMMON_VISITEDLINK_COMMON_H__

#include <vector>

#include "base/basictypes.h"

class GURL;

// number of bytes in the salt
#define LINK_SALT_LENGTH 8

// A multiprocess-safe database of the visited links for the browser. There
// should be exactly one process that has write access (implemented by
// VisitedLinkMaster), while all other processes should be read-only
// (implemented by VisitedLinkSlave). These other processes add links by calling
// the writer process to add them for it. The writer may also notify the readers
// to replace their table when the table is resized.
//
// IPC is not implemented in these classes. This is done through callback
// functions supplied by the creator of these objects to allow more flexibility,
// especially for testing.
//
// This class defines the common base for these others. We implement accessors
// for looking things up in the hash table, and for computing hash values and
// fingerprints. Both the master and the slave inherit from this, and add their
// own code to set up and change these values as their design requires. The
// slave pretty much just sets up the shared memory and saves the pointer. The
// master does a lot of work to manage the table, reading and writing it to and
// from disk, and resizing it when it gets too full.
//
// To ask whether a page is in history, we compute a 64-bit fingerprint of the
// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
// we consider it visited. Otherwise, it is unvisited. Note that it is possible
// to get collisions, which is the penalty for not storing all URL strings in
// memory (which could get to be more than we want to have in memory). We use
// a salt value for the links on one computer so that an attacker can not
// manually create a link that causes a collision.
class VisitedLinkCommon {
 public:
  // A number that identifies the URL.
  typedef uint64 Fingerprint;
  typedef std::vector<Fingerprint> Fingerprints;

  // A hash value of a fingerprint
  typedef int32 Hash;

  // A fingerprint or hash value that does not exist
  static const Fingerprint null_fingerprint_;
  static const Hash null_hash_;

  VisitedLinkCommon();
  virtual ~VisitedLinkCommon();

  // Returns the fingerprint for the given URL.
  Fingerprint ComputeURLFingerprint(const char* canonical_url,
                                    size_t url_len) const {
    return ComputeURLFingerprint(canonical_url, url_len, salt_);
  }

  // Looks up the given key in the table. The fingerprint for the URL is
  // computed if you call one with the string argument. Returns true if found.
  // Does not modify the hastable.
  bool IsVisited(const char* canonical_url, size_t url_len) const;
  bool IsVisited(const GURL& url) const;
  bool IsVisited(Fingerprint fingerprint) const;

#ifdef UNIT_TEST
  // Returns statistics about DB usage
  void GetUsageStatistics(int32* table_size,
                          VisitedLinkCommon::Fingerprint** fingerprints) {
    *table_size = table_length_;
    *fingerprints = hash_table_;
  }
#endif

 protected:
  // This structure is at the beginning of the shared memory so that the slaves
  // can get stats on the table
  struct SharedHeader {
    // see goes into table_length_
    uint32 length;

    // goes into salt_
    uint8 salt[LINK_SALT_LENGTH];
  };

  // Returns the fingerprint at the given index into the URL table. This
  // function should be called instead of accessing the table directly to
  // contain endian issues.
  Fingerprint FingerprintAt(int32 table_offset) const {
    if (!hash_table_)
      return null_fingerprint_;
    return hash_table_[table_offset];
  }

  // Computes the fingerprint of the given canonical URL. It is static so the
  // same algorithm can be re-used by the table rebuilder, so you will have to
  // pass the salt as a parameter. See the non-static version above if you
  // want to use the current class' salt.
  static Fingerprint ComputeURLFingerprint(const char* canonical_url,
                                           size_t url_len,
                                           const uint8 salt[LINK_SALT_LENGTH]);

  // Computes the hash value of the given fingerprint, this is used as a lookup
  // into the hashtable.
  static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
    if (table_length == 0)
      return null_hash_;
    return static_cast<Hash>(fingerprint % table_length);
  }
  // Uses the current hashtable.
  Hash HashFingerprint(Fingerprint fingerprint) const {
    return HashFingerprint(fingerprint, table_length_);
  }

  // pointer to the first item
  VisitedLinkCommon::Fingerprint* hash_table_;

  // the number of items in the hash table
  int32 table_length_;

  // salt used for each URL when computing the fingerprint
  uint8 salt_[LINK_SALT_LENGTH];

 private:
  DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
};

#endif  // CHROME_COMMON_VISITEDLINK_COMMON_H_