summaryrefslogtreecommitdiffstats
path: root/chrome/common/visitedlink_common.h
blob: 65f540b37ba42c31954c017e82ae12c2f00c6641 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
#define CHROME_COMMON_VISITEDLINK_COMMON_H__

#include <string>

#include "base/basictypes.h"
#include "base/logging.h"
#include "googleurl/src/gurl.h"

// number of bytes in the salt
#define LINK_SALT_LENGTH 8

// A multiprocess-safe database of the visited links for the browser. There
// should be exactly one process that has write access (implemented by
// VisitedLinkMaster), while all other processes should be read-only
// (implemented by VisitedLinkSlave). These other processes add links by calling
// the writer process to add them for it. The writer may also notify the readers
// to replace their table when the table is resized.
//
// IPC is not implemented in these classes. This is done through callback
// functions supplied by the creator of these objects to allow more flexibility,
// especially for testing.
//
// This class defines the common base for these others. We implement accessors
// for looking things up in the hash table, and for computing hash values and
// fingerprints. Both the master and the slave inherit from this, and add their
// own code to set up and change these values as their design requires. The
// slave pretty much just sets up the shared memory and saves the pointer. The
// master does a lot of work to manage the table, reading and writing it to and
// from disk, and resizing it when it gets too full.
//
// To ask whether a page is in history, we compute a 64-bit fingerprint of the
// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
// we consider it visited. Otherwise, it is unvisited. Note that it is possible
// to get collisions, which is the penalty for not storing all URL strings in
// memory (which could get to be more than we want to have in memory). We use
// a salt value for the links on one computer so that an attacker can not
// manually create a link that causes a collision.
class VisitedLinkCommon {
 public:
  // A number that identifies the URL.
  typedef uint64 Fingerprint;

  // A hash value of a fingerprint
  typedef int32 Hash;

  // A fingerprint or hash value that does not exist
  static const Fingerprint null_fingerprint_;
  static const Hash null_hash_;

  VisitedLinkCommon();
  virtual ~VisitedLinkCommon();

  // Computes the fingerprint of the key and looks it up in the table. We
  // return true if found. Does not modify the hastable. The input should be
  // the canonical 16-bit URL.
  bool IsVisited(const char* canonical_url, size_t url_len) const;
  bool IsVisited(const GURL& url) const {
    return IsVisited(url.spec().data(), url.spec().size());
  }

#ifdef UNIT_TEST
  // Returns statistics about DB usage
  void GetUsageStatistics(int32* table_size,
                          VisitedLinkCommon::Fingerprint** fingerprints) {
    *table_size = table_length_;
    *fingerprints = hash_table_;
  }
#endif

 protected:
  // This structure is at the beginning of the shared memory so that the slaves
  // can get stats on the table
  struct SharedHeader {
    // see goes into table_length_
    uint32 length;

    // goes into salt_
    uint8 salt[LINK_SALT_LENGTH];
  };

  // Returns the fingerprint at the given index into the URL table. This
  // function should be called instead of accessing the table directly to contain
  // endian issues.
  Fingerprint FingerprintAt(int32 table_offset) const {
    DCHECK(hash_table_);
    if (!hash_table_)
      return 0;
    return hash_table_[table_offset];
  }

  // Returns true if the given fingerprint is in the table.
  bool IsVisited(Fingerprint fingerprint) const;

  // Computes the fingerprint of the given canonical URL. It is static so the
  // same algorithm can be re-used by the table rebuilder, so you will have to
  // pass the salt as a parameter.
  static Fingerprint ComputeURLFingerprint(const char* canonical_url,
                                           size_t url_len,
                                           const uint8 salt[LINK_SALT_LENGTH]);

  // Computes the hash value of the given fingerprint, this is used as a lookup
  // into the hashtable.
  static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
    return static_cast<Hash>(fingerprint % table_length);
  }
  Hash HashFingerprint(Fingerprint fingerprint) const { // uses the current hashtable
    return HashFingerprint(fingerprint, table_length_);
  }

  // pointer to the first item
  VisitedLinkCommon::Fingerprint* hash_table_;

  // the number of items in the hash table
  int32 table_length_;

  // salt used for each URL when computing the fingerprint
  uint8 salt_[LINK_SALT_LENGTH];

 private:
  DISALLOW_EVIL_CONSTRUCTORS(VisitedLinkCommon);
};

#endif // WIN_COMMON_VISITEDLINK_COMMON_H__