summaryrefslogtreecommitdiffstats
path: root/runtime/base
diff options
context:
space:
mode:
authorMathieu Chartier <mathieuc@google.com>2015-03-18 10:39:00 -0700
committerMathieu Chartier <mathieuc@google.com>2015-03-18 13:39:18 -0700
commit47f867a0ae34d743f6159c2261e5b11e39693e15 (patch)
tree3b9129e77ab3e99b211fccc8ab2eba06d0c12957 /runtime/base
parenta2b02f7bb474549ae356b5edfbb27a76e5460c58 (diff)
downloadart-47f867a0ae34d743f6159c2261e5b11e39693e15.zip
art-47f867a0ae34d743f6159c2261e5b11e39693e15.tar.gz
art-47f867a0ae34d743f6159c2261e5b11e39693e15.tar.bz2
Clean up hash set
Added vertical whitespace, const iterators, made some functions const. Change-Id: I188dc0384a98d6dae2822f0ac38b740f2356c23d
Diffstat (limited to 'runtime/base')
-rw-r--r--runtime/base/hash_set.h157
-rw-r--r--runtime/base/hash_set_test.cc4
2 files changed, 105 insertions, 56 deletions
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 992e5b1..ab63ddd 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -50,91 +50,101 @@ class DefaultEmptyFn<T*> {
};
// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
-// boxed. Uses linear probing.
-// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item)
+// boxed. Uses linear probing to resolve collisions.
+// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
+// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
+// and more complicated.
template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
class HashSet {
- public:
- static constexpr double kDefaultMinLoadFactor = 0.5;
- static constexpr double kDefaultMaxLoadFactor = 0.9;
- static constexpr size_t kMinBuckets = 1000;
-
- class Iterator {
+ template <class Elem, class HashSetType>
+ class BaseIterator {
public:
- Iterator(const Iterator&) = default;
- Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) {
+ BaseIterator(const BaseIterator&) = default;
+ BaseIterator(BaseIterator&&) = default;
+ BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {
}
- Iterator& operator=(const Iterator&) = default;
- bool operator==(const Iterator& other) const {
- return hash_set_ == other.hash_set_ && index_ == other.index_;
+ BaseIterator& operator=(const BaseIterator&) = default;
+ BaseIterator& operator=(BaseIterator&&) = default;
+
+ bool operator==(const BaseIterator& other) const {
+ return hash_set_ == other.hash_set_ && this->index_ == other.index_;
}
- bool operator!=(const Iterator& other) const {
+
+ bool operator!=(const BaseIterator& other) const {
return !(*this == other);
}
- Iterator operator++() { // Value after modification.
- index_ = NextNonEmptySlot(index_);
+
+ BaseIterator operator++() { // Value after modification.
+ this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
return *this;
}
- Iterator operator++(int) {
+
+ BaseIterator operator++(int) {
Iterator temp = *this;
- index_ = NextNonEmptySlot(index_);
+ this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
return temp;
}
- T& operator*() {
- DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
- return hash_set_->ElementForIndex(index_);
- }
- const T& operator*() const {
- DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
- return hash_set_->ElementForIndex(index_);
- }
- T* operator->() {
- return &**this;
+
+ Elem& operator*() const {
+ DCHECK(!hash_set_->IsFreeSlot(this->index_));
+ return hash_set_->ElementForIndex(this->index_);
}
- const T* operator->() const {
+
+ Elem* operator->() const {
return &**this;
}
+
// TODO: Operator -- --(int)
private:
- HashSet* hash_set_;
size_t index_;
+ HashSetType* hash_set_;
- size_t GetIndex() const {
- return index_;
- }
- size_t NextNonEmptySlot(size_t index) const {
- const size_t num_buckets = hash_set_->NumBuckets();
+ size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const {
+ const size_t num_buckets = hash_set->NumBuckets();
DCHECK_LT(index, num_buckets);
do {
++index;
- } while (index < num_buckets && hash_set_->IsFreeSlot(index));
+ } while (index < num_buckets && hash_set->IsFreeSlot(index));
return index;
}
friend class HashSet;
};
+ public:
+ static constexpr double kDefaultMinLoadFactor = 0.5;
+ static constexpr double kDefaultMaxLoadFactor = 0.9;
+ static constexpr size_t kMinBuckets = 1000;
+
+ typedef BaseIterator<T, HashSet> Iterator;
+ typedef BaseIterator<const T, const HashSet> ConstIterator;
+
void Clear() {
DeallocateStorage();
AllocateStorage(1);
num_elements_ = 0;
elements_until_expand_ = 0;
}
+
HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
Clear();
}
+
HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
*this = other;
}
+
HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
*this = std::move(other);
}
+
~HashSet() {
DeallocateStorage();
}
+
HashSet& operator=(HashSet&& other) {
std::swap(data_, other.data_);
std::swap(num_buckets_, other.num_buckets_);
@@ -144,6 +154,7 @@ class HashSet {
std::swap(max_load_factor_, other.max_load_factor_);
return *this;
}
+
HashSet& operator=(const HashSet& other) {
DeallocateStorage();
AllocateStorage(other.NumBuckets());
@@ -156,21 +167,25 @@ class HashSet {
max_load_factor_ = other.max_load_factor_;
return *this;
}
+
// Lower case for c++11 for each.
Iterator begin() {
Iterator ret(this, 0);
- if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) {
+ if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
++ret; // Skip all the empty slots.
}
return ret;
}
+
// Lower case for c++11 for each.
Iterator end() {
return Iterator(this, NumBuckets());
}
+
bool Empty() {
- return begin() == end();
+ return Size() == 0;
}
+
// Erase algorithm:
// Make an empty slot where the iterator is pointing.
// Scan fowards until we hit another empty slot.
@@ -181,7 +196,7 @@ class HashSet {
// element to its actual location/index.
Iterator Erase(Iterator it) {
// empty_index is the index that will become empty.
- size_t empty_index = it.GetIndex();
+ size_t empty_index = it.index_;
DCHECK(!IsFreeSlot(empty_index));
size_t next_index = empty_index;
bool filled = false; // True if we filled the empty index.
@@ -224,33 +239,36 @@ class HashSet {
}
return it;
}
+
// Find an element, returns end() if not found.
- // Allows custom K types, example of when this is useful.
+ // Allows custom key (K) types, example of when this is useful:
// Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
// object in the heap for performance solution.
template <typename K>
Iterator Find(const K& element) {
return FindWithHash(element, hashfn_(element));
}
+
+ template <typename K>
+ ConstIterator Find(const K& element) const {
+ return FindWithHash(element, hashfn_(element));
+ }
+
template <typename K>
Iterator FindWithHash(const K& element, size_t hash) {
- DCHECK_EQ(hashfn_(element), hash);
- size_t index = IndexForHash(hash);
- while (true) {
- T& slot = ElementForIndex(index);
- if (emptyfn_.IsEmpty(slot)) {
- return end();
- }
- if (pred_(slot, element)) {
- return Iterator(this, index);
- }
- index = NextIndex(index);
- }
+ return Iterator(this, FindIndex(element, hash));
+ }
+
+ template <typename K>
+ ConstIterator FindWithHash(const K& element, size_t hash) const {
+ return ConstIterator(this, FindIndex(element, hash));
}
+
// Insert an element, allows duplicates.
void Insert(const T& element) {
InsertWithHash(element, hashfn_(element));
}
+
void InsertWithHash(const T& element, size_t hash) {
DCHECK_EQ(hash, hashfn_(element));
if (num_elements_ >= elements_until_expand_) {
@@ -261,12 +279,15 @@ class HashSet {
data_[index] = element;
++num_elements_;
}
+
size_t Size() const {
return num_elements_;
}
+
void ShrinkToMaximumLoad() {
Resize(Size() / max_load_factor_);
}
+
// To distance that inserted elements were probed. Used for measuring how good hash functions
// are.
size_t TotalProbeDistance() const {
@@ -284,10 +305,12 @@ class HashSet {
}
return total;
}
+
// Calculate the current load factor and return it.
double CalculateLoadFactor() const {
return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
}
+
// Make sure that everything reinserts in the right spot. Returns the number of errors.
size_t Verify() {
size_t errors = 0;
@@ -314,14 +337,17 @@ class HashSet {
DCHECK(data_ != nullptr);
return data_[index];
}
+
const T& ElementForIndex(size_t index) const {
DCHECK_LT(index, NumBuckets());
DCHECK(data_ != nullptr);
return data_[index];
}
+
size_t IndexForHash(size_t hash) const {
return hash % num_buckets_;
}
+
size_t NextIndex(size_t index) const {
if (UNLIKELY(++index >= num_buckets_)) {
DCHECK_EQ(index, NumBuckets());
@@ -329,12 +355,33 @@ class HashSet {
}
return index;
}
+
+ // Find the hash table slot for an element, or return NumBuckets() if not found.
+ // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
+ template <typename K>
+ size_t FindIndex(const K& element, size_t hash) const {
+ DCHECK_EQ(hashfn_(element), hash);
+ size_t index = IndexForHash(hash);
+ while (true) {
+ const T& slot = ElementForIndex(index);
+ if (emptyfn_.IsEmpty(slot)) {
+ return NumBuckets();
+ }
+ if (pred_(slot, element)) {
+ return index;
+ }
+ index = NextIndex(index);
+ }
+ }
+
bool IsFreeSlot(size_t index) const {
return emptyfn_.IsEmpty(ElementForIndex(index));
}
+
size_t NumBuckets() const {
return num_buckets_;
}
+
// Allocate a number of buckets.
void AllocateStorage(size_t num_buckets) {
num_buckets_ = num_buckets;
@@ -344,6 +391,7 @@ class HashSet {
emptyfn_.MakeEmpty(data_[i]);
}
}
+
void DeallocateStorage() {
if (num_buckets_ != 0) {
for (size_t i = 0; i < NumBuckets(); ++i) {
@@ -354,6 +402,7 @@ class HashSet {
num_buckets_ = 0;
}
}
+
// Expand the set based on the load factors.
void Expand() {
size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
@@ -365,6 +414,7 @@ class HashSet {
// When we hit elements_until_expand_, we are at the max load factor and must expand again.
elements_until_expand_ = NumBuckets() * max_load_factor_;
}
+
// Expand / shrink the table to the new specified size.
void Resize(size_t new_size) {
DCHECK_GE(new_size, Size());
@@ -381,6 +431,7 @@ class HashSet {
}
allocfn_.deallocate(old_data, old_num_buckets);
}
+
ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
while (!emptyfn_.IsEmpty(data_[index])) {
index = NextIndex(index);
@@ -398,8 +449,6 @@ class HashSet {
T* data_; // Backing storage.
double min_load_factor_;
double max_load_factor_;
-
- friend class Iterator;
};
} // namespace art
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 5f498d9..e88637f 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -21,7 +21,7 @@
#include <string>
#include <unordered_set>
-#include "common_runtime_test.h"
+#include <gtest/gtest.h>
#include "hash_map.h"
namespace art {
@@ -35,7 +35,7 @@ struct IsEmptyFnString {
}
};
-class HashSetTest : public CommonRuntimeTest {
+class HashSetTest : public testing::Test {
public:
HashSetTest() : seed_(97421), unique_number_(0) {
}