summaryrefslogtreecommitdiffstats
path: root/base/containers/hash_tables.h
blob: 6f37c49c3e476f7ccf9f0685d8ada16befa1b03d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//

//
// Deal with the differences between Microsoft and GNU implemenations
// of hash_map. Allows all platforms to use |base::hash_map| and
// |base::hash_set|.
//  eg:
//   base::hash_map<int> my_map;
//   base::hash_set<int> my_set;
//
// NOTE: It is an explicit non-goal of this class to provide a generic hash
// function for pointers.  If you want to hash a pointers to a particular class,
// please define the template specialization elsewhere (for example, in its
// header file) and keep it specific to just pointers to that class.  This is
// because identity hashes are not desirable for all types that might show up
// in containers as pointers.

#ifndef BASE_CONTAINERS_HASH_TABLES_H_
#define BASE_CONTAINERS_HASH_TABLES_H_

#include <utility>

#include "base/basictypes.h"
#include "base/strings/string16.h"
#include "build/build_config.h"

#if defined(COMPILER_MSVC)
#include <hash_map>
#include <hash_set>

#define BASE_HASH_NAMESPACE stdext

#elif defined(COMPILER_GCC)
#if defined(OS_ANDROID)
#define BASE_HASH_NAMESPACE std
#else
#define BASE_HASH_NAMESPACE __gnu_cxx
#endif

// This is a hack to disable the gcc 4.4 warning about hash_map and hash_set
// being deprecated.  We can get rid of this when we upgrade to VS2008 and we
// can use <tr1/unordered_map> and <tr1/unordered_set>.
#ifdef __DEPRECATED
#define CHROME_OLD__DEPRECATED __DEPRECATED
#undef __DEPRECATED
#endif

#if defined(OS_ANDROID)
#include <hash_map>
#include <hash_set>
#else
#include <ext/hash_map>
#include <ext/hash_set>
#endif

#include <string>

#ifdef CHROME_OLD__DEPRECATED
#define __DEPRECATED CHROME_OLD__DEPRECATED
#undef CHROME_OLD__DEPRECATED
#endif

namespace BASE_HASH_NAMESPACE {

#if !defined(OS_ANDROID)
// The GNU C++ library provides identity hash functions for many integral types,
// but not for |long long|.  This hash function will truncate if |size_t| is
// narrower than |long long|.  This is probably good enough for what we will
// use it for.

#define DEFINE_TRIVIAL_HASH(integral_type) \
    template<> \
    struct hash<integral_type> { \
      std::size_t operator()(integral_type value) const { \
        return static_cast<std::size_t>(value); \
      } \
    }

DEFINE_TRIVIAL_HASH(long long);
DEFINE_TRIVIAL_HASH(unsigned long long);

#undef DEFINE_TRIVIAL_HASH
#endif  // !defined(OS_ANDROID)

// Implement string hash functions so that strings of various flavors can
// be used as keys in STL maps and sets.  The hash algorithm comes from the
// GNU C++ library, in <tr1/functional>.  It is duplicated here because GCC
// versions prior to 4.3.2 are unable to compile <tr1/functional> when RTTI
// is disabled, as it is in our build.

#define DEFINE_STRING_HASH(string_type) \
    template<> \
    struct hash<string_type> { \
      std::size_t operator()(const string_type& s) const { \
        std::size_t result = 0; \
        for (string_type::const_iterator i = s.begin(); i != s.end(); ++i) \
          result = (result * 131) + *i; \
        return result; \
      } \
    }

DEFINE_STRING_HASH(std::string);
DEFINE_STRING_HASH(base::string16);

#undef DEFINE_STRING_HASH

}  // namespace BASE_HASH_NAMESPACE

#else  // COMPILER
#error define BASE_HASH_NAMESPACE for your compiler
#endif  // COMPILER

namespace base {
using BASE_HASH_NAMESPACE::hash_map;
using BASE_HASH_NAMESPACE::hash_multimap;
using BASE_HASH_NAMESPACE::hash_multiset;
using BASE_HASH_NAMESPACE::hash_set;

// Implement hashing for pairs of at-most 32 bit integer values.
// When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
// multiply-add hashing. This algorithm, as described in
// Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
// eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
//
//   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
//
// Contact danakj@chromium.org for any questions.
inline std::size_t HashInts32(uint32 value1, uint32 value2) {
  uint64 value1_64 = value1;
  uint64 hash64 = (value1_64 << 32) | value2;

  if (sizeof(std::size_t) >= sizeof(uint64))
    return static_cast<std::size_t>(hash64);

  uint64 odd_random = 481046412LL << 32 | 1025306955LL;
  uint32 shift_random = 10121U << 16;

  hash64 = hash64 * odd_random + shift_random;
  std::size_t high_bits = static_cast<std::size_t>(
      hash64 >> (8 * (sizeof(uint64) - sizeof(std::size_t))));
  return high_bits;
}

// Implement hashing for pairs of up-to 64-bit integer values.
// We use the compound integer hash method to produce a 64-bit hash code, by
// breaking the two 64-bit inputs into 4 32-bit values:
// http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
// Then we reduce our result to 32 bits if required, similar to above.
inline std::size_t HashInts64(uint64 value1, uint64 value2) {
  uint32 short_random1 = 842304669U;
  uint32 short_random2 = 619063811U;
  uint32 short_random3 = 937041849U;
  uint32 short_random4 = 3309708029U;

  uint32 value1a = static_cast<uint32>(value1 & 0xffffffff);
  uint32 value1b = static_cast<uint32>((value1 >> 32) & 0xffffffff);
  uint32 value2a = static_cast<uint32>(value2 & 0xffffffff);
  uint32 value2b = static_cast<uint32>((value2 >> 32) & 0xffffffff);

  uint64 product1 = static_cast<uint64>(value1a) * short_random1;
  uint64 product2 = static_cast<uint64>(value1b) * short_random2;
  uint64 product3 = static_cast<uint64>(value2a) * short_random3;
  uint64 product4 = static_cast<uint64>(value2b) * short_random4;

  uint64 hash64 = product1 + product2 + product3 + product4;

  if (sizeof(std::size_t) >= sizeof(uint64))
    return static_cast<std::size_t>(hash64);

  uint64 odd_random = 1578233944LL << 32 | 194370989LL;
  uint32 shift_random = 20591U << 16;

  hash64 = hash64 * odd_random + shift_random;
  std::size_t high_bits = static_cast<std::size_t>(
      hash64 >> (8 * (sizeof(uint64) - sizeof(std::size_t))));
  return high_bits;
}

#define DEFINE_32BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts32(value1, value2); \
}

DEFINE_32BIT_PAIR_HASH(int16, int16);
DEFINE_32BIT_PAIR_HASH(int16, uint16);
DEFINE_32BIT_PAIR_HASH(int16, int32);
DEFINE_32BIT_PAIR_HASH(int16, uint32);
DEFINE_32BIT_PAIR_HASH(uint16, int16);
DEFINE_32BIT_PAIR_HASH(uint16, uint16);
DEFINE_32BIT_PAIR_HASH(uint16, int32);
DEFINE_32BIT_PAIR_HASH(uint16, uint32);
DEFINE_32BIT_PAIR_HASH(int32, int16);
DEFINE_32BIT_PAIR_HASH(int32, uint16);
DEFINE_32BIT_PAIR_HASH(int32, int32);
DEFINE_32BIT_PAIR_HASH(int32, uint32);
DEFINE_32BIT_PAIR_HASH(uint32, int16);
DEFINE_32BIT_PAIR_HASH(uint32, uint16);
DEFINE_32BIT_PAIR_HASH(uint32, int32);
DEFINE_32BIT_PAIR_HASH(uint32, uint32);

#undef DEFINE_32BIT_PAIR_HASH

#define DEFINE_64BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts64(value1, value2); \
}

DEFINE_64BIT_PAIR_HASH(int16, int64);
DEFINE_64BIT_PAIR_HASH(int16, uint64);
DEFINE_64BIT_PAIR_HASH(uint16, int64);
DEFINE_64BIT_PAIR_HASH(uint16, uint64);
DEFINE_64BIT_PAIR_HASH(int32, int64);
DEFINE_64BIT_PAIR_HASH(int32, uint64);
DEFINE_64BIT_PAIR_HASH(uint32, int64);
DEFINE_64BIT_PAIR_HASH(uint32, uint64);
DEFINE_64BIT_PAIR_HASH(int64, int16);
DEFINE_64BIT_PAIR_HASH(int64, uint16);
DEFINE_64BIT_PAIR_HASH(int64, int32);
DEFINE_64BIT_PAIR_HASH(int64, uint32);
DEFINE_64BIT_PAIR_HASH(int64, int64);
DEFINE_64BIT_PAIR_HASH(int64, uint64);
DEFINE_64BIT_PAIR_HASH(uint64, int16);
DEFINE_64BIT_PAIR_HASH(uint64, uint16);
DEFINE_64BIT_PAIR_HASH(uint64, int32);
DEFINE_64BIT_PAIR_HASH(uint64, uint32);
DEFINE_64BIT_PAIR_HASH(uint64, int64);
DEFINE_64BIT_PAIR_HASH(uint64, uint64);

#undef DEFINE_64BIT_PAIR_HASH
}  // namespace base

namespace BASE_HASH_NAMESPACE {

// Implement methods for hashing a pair of integers, so they can be used as
// keys in STL containers.

#if defined(COMPILER_MSVC)

template<typename Type1, typename Type2>
inline std::size_t hash_value(const std::pair<Type1, Type2>& value) {
  return base::HashPair(value.first, value.second);
}

#elif defined(COMPILER_GCC)
template<typename Type1, typename Type2>
struct hash<std::pair<Type1, Type2> > {
  std::size_t operator()(std::pair<Type1, Type2> value) const {
    return base::HashPair(value.first, value.second);
  }
};

#else
#error define hash<std::pair<Type1, Type2> > for your compiler
#endif  // COMPILER

}

#undef DEFINE_PAIR_HASH_FUNCTION_START
#undef DEFINE_PAIR_HASH_FUNCTION_END

#endif  // BASE_CONTAINERS_HASH_TABLES_H_