diff options
author | Mathieu Chartier <mathieuc@google.com> | 2015-03-18 10:39:00 -0700 |
---|---|---|
committer | Mathieu Chartier <mathieuc@google.com> | 2015-03-18 13:39:18 -0700 |
commit | 47f867a0ae34d743f6159c2261e5b11e39693e15 (patch) | |
tree | 3b9129e77ab3e99b211fccc8ab2eba06d0c12957 /runtime/base | |
parent | a2b02f7bb474549ae356b5edfbb27a76e5460c58 (diff) | |
download | art-47f867a0ae34d743f6159c2261e5b11e39693e15.zip art-47f867a0ae34d743f6159c2261e5b11e39693e15.tar.gz art-47f867a0ae34d743f6159c2261e5b11e39693e15.tar.bz2 |
Clean up hash set
Added vertical whitespace, const iterators, made some functions
const.
Change-Id: I188dc0384a98d6dae2822f0ac38b740f2356c23d
Diffstat (limited to 'runtime/base')
-rw-r--r-- | runtime/base/hash_set.h | 157 | ||||
-rw-r--r-- | runtime/base/hash_set_test.cc | 4 |
2 files changed, 105 insertions, 56 deletions
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h index 992e5b1..ab63ddd 100644 --- a/runtime/base/hash_set.h +++ b/runtime/base/hash_set.h @@ -50,91 +50,101 @@ class DefaultEmptyFn<T*> { }; // Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't -// boxed. Uses linear probing. -// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item) +// boxed. Uses linear probing to resolve collisions. +// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item). +// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower +// and more complicated. template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>, class Pred = std::equal_to<T>, class Alloc = std::allocator<T>> class HashSet { - public: - static constexpr double kDefaultMinLoadFactor = 0.5; - static constexpr double kDefaultMaxLoadFactor = 0.9; - static constexpr size_t kMinBuckets = 1000; - - class Iterator { + template <class Elem, class HashSetType> + class BaseIterator { public: - Iterator(const Iterator&) = default; - Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) { + BaseIterator(const BaseIterator&) = default; + BaseIterator(BaseIterator&&) = default; + BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) { } - Iterator& operator=(const Iterator&) = default; - bool operator==(const Iterator& other) const { - return hash_set_ == other.hash_set_ && index_ == other.index_; + BaseIterator& operator=(const BaseIterator&) = default; + BaseIterator& operator=(BaseIterator&&) = default; + + bool operator==(const BaseIterator& other) const { + return hash_set_ == other.hash_set_ && this->index_ == other.index_; } - bool operator!=(const Iterator& other) const { + + bool operator!=(const BaseIterator& other) const { return !(*this == other); } - Iterator operator++() { // Value after modification. - index_ = NextNonEmptySlot(index_); + + BaseIterator operator++() { // Value after modification. + this->index_ = this->NextNonEmptySlot(this->index_, hash_set_); return *this; } - Iterator operator++(int) { + + BaseIterator operator++(int) { Iterator temp = *this; - index_ = NextNonEmptySlot(index_); + this->index_ = this->NextNonEmptySlot(this->index_, hash_set_); return temp; } - T& operator*() { - DCHECK(!hash_set_->IsFreeSlot(GetIndex())); - return hash_set_->ElementForIndex(index_); - } - const T& operator*() const { - DCHECK(!hash_set_->IsFreeSlot(GetIndex())); - return hash_set_->ElementForIndex(index_); - } - T* operator->() { - return &**this; + + Elem& operator*() const { + DCHECK(!hash_set_->IsFreeSlot(this->index_)); + return hash_set_->ElementForIndex(this->index_); } - const T* operator->() const { + + Elem* operator->() const { return &**this; } + // TODO: Operator -- --(int) private: - HashSet* hash_set_; size_t index_; + HashSetType* hash_set_; - size_t GetIndex() const { - return index_; - } - size_t NextNonEmptySlot(size_t index) const { - const size_t num_buckets = hash_set_->NumBuckets(); + size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const { + const size_t num_buckets = hash_set->NumBuckets(); DCHECK_LT(index, num_buckets); do { ++index; - } while (index < num_buckets && hash_set_->IsFreeSlot(index)); + } while (index < num_buckets && hash_set->IsFreeSlot(index)); return index; } friend class HashSet; }; + public: + static constexpr double kDefaultMinLoadFactor = 0.5; + static constexpr double kDefaultMaxLoadFactor = 0.9; + static constexpr size_t kMinBuckets = 1000; + + typedef BaseIterator<T, HashSet> Iterator; + typedef BaseIterator<const T, const HashSet> ConstIterator; + void Clear() { DeallocateStorage(); AllocateStorage(1); num_elements_ = 0; elements_until_expand_ = 0; } + HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr), min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) { Clear(); } + HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { *this = other; } + HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { *this = std::move(other); } + ~HashSet() { DeallocateStorage(); } + HashSet& operator=(HashSet&& other) { std::swap(data_, other.data_); std::swap(num_buckets_, other.num_buckets_); @@ -144,6 +154,7 @@ class HashSet { std::swap(max_load_factor_, other.max_load_factor_); return *this; } + HashSet& operator=(const HashSet& other) { DeallocateStorage(); AllocateStorage(other.NumBuckets()); @@ -156,21 +167,25 @@ class HashSet { max_load_factor_ = other.max_load_factor_; return *this; } + // Lower case for c++11 for each. Iterator begin() { Iterator ret(this, 0); - if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) { + if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) { ++ret; // Skip all the empty slots. } return ret; } + // Lower case for c++11 for each. Iterator end() { return Iterator(this, NumBuckets()); } + bool Empty() { - return begin() == end(); + return Size() == 0; } + // Erase algorithm: // Make an empty slot where the iterator is pointing. // Scan fowards until we hit another empty slot. @@ -181,7 +196,7 @@ class HashSet { // element to its actual location/index. Iterator Erase(Iterator it) { // empty_index is the index that will become empty. - size_t empty_index = it.GetIndex(); + size_t empty_index = it.index_; DCHECK(!IsFreeSlot(empty_index)); size_t next_index = empty_index; bool filled = false; // True if we filled the empty index. @@ -224,33 +239,36 @@ class HashSet { } return it; } + // Find an element, returns end() if not found. - // Allows custom K types, example of when this is useful. + // Allows custom key (K) types, example of when this is useful: // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy // object in the heap for performance solution. template <typename K> Iterator Find(const K& element) { return FindWithHash(element, hashfn_(element)); } + + template <typename K> + ConstIterator Find(const K& element) const { + return FindWithHash(element, hashfn_(element)); + } + template <typename K> Iterator FindWithHash(const K& element, size_t hash) { - DCHECK_EQ(hashfn_(element), hash); - size_t index = IndexForHash(hash); - while (true) { - T& slot = ElementForIndex(index); - if (emptyfn_.IsEmpty(slot)) { - return end(); - } - if (pred_(slot, element)) { - return Iterator(this, index); - } - index = NextIndex(index); - } + return Iterator(this, FindIndex(element, hash)); + } + + template <typename K> + ConstIterator FindWithHash(const K& element, size_t hash) const { + return ConstIterator(this, FindIndex(element, hash)); } + // Insert an element, allows duplicates. void Insert(const T& element) { InsertWithHash(element, hashfn_(element)); } + void InsertWithHash(const T& element, size_t hash) { DCHECK_EQ(hash, hashfn_(element)); if (num_elements_ >= elements_until_expand_) { @@ -261,12 +279,15 @@ class HashSet { data_[index] = element; ++num_elements_; } + size_t Size() const { return num_elements_; } + void ShrinkToMaximumLoad() { Resize(Size() / max_load_factor_); } + // To distance that inserted elements were probed. Used for measuring how good hash functions // are. size_t TotalProbeDistance() const { @@ -284,10 +305,12 @@ class HashSet { } return total; } + // Calculate the current load factor and return it. double CalculateLoadFactor() const { return static_cast<double>(Size()) / static_cast<double>(NumBuckets()); } + // Make sure that everything reinserts in the right spot. Returns the number of errors. size_t Verify() { size_t errors = 0; @@ -314,14 +337,17 @@ class HashSet { DCHECK(data_ != nullptr); return data_[index]; } + const T& ElementForIndex(size_t index) const { DCHECK_LT(index, NumBuckets()); DCHECK(data_ != nullptr); return data_[index]; } + size_t IndexForHash(size_t hash) const { return hash % num_buckets_; } + size_t NextIndex(size_t index) const { if (UNLIKELY(++index >= num_buckets_)) { DCHECK_EQ(index, NumBuckets()); @@ -329,12 +355,33 @@ class HashSet { } return index; } + + // Find the hash table slot for an element, or return NumBuckets() if not found. + // This value for not found is important so that Iterator(this, FindIndex(...)) == end(). + template <typename K> + size_t FindIndex(const K& element, size_t hash) const { + DCHECK_EQ(hashfn_(element), hash); + size_t index = IndexForHash(hash); + while (true) { + const T& slot = ElementForIndex(index); + if (emptyfn_.IsEmpty(slot)) { + return NumBuckets(); + } + if (pred_(slot, element)) { + return index; + } + index = NextIndex(index); + } + } + bool IsFreeSlot(size_t index) const { return emptyfn_.IsEmpty(ElementForIndex(index)); } + size_t NumBuckets() const { return num_buckets_; } + // Allocate a number of buckets. void AllocateStorage(size_t num_buckets) { num_buckets_ = num_buckets; @@ -344,6 +391,7 @@ class HashSet { emptyfn_.MakeEmpty(data_[i]); } } + void DeallocateStorage() { if (num_buckets_ != 0) { for (size_t i = 0; i < NumBuckets(); ++i) { @@ -354,6 +402,7 @@ class HashSet { num_buckets_ = 0; } } + // Expand the set based on the load factors. void Expand() { size_t min_index = static_cast<size_t>(Size() / min_load_factor_); @@ -365,6 +414,7 @@ class HashSet { // When we hit elements_until_expand_, we are at the max load factor and must expand again. elements_until_expand_ = NumBuckets() * max_load_factor_; } + // Expand / shrink the table to the new specified size. void Resize(size_t new_size) { DCHECK_GE(new_size, Size()); @@ -381,6 +431,7 @@ class HashSet { } allocfn_.deallocate(old_data, old_num_buckets); } + ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const { while (!emptyfn_.IsEmpty(data_[index])) { index = NextIndex(index); @@ -398,8 +449,6 @@ class HashSet { T* data_; // Backing storage. double min_load_factor_; double max_load_factor_; - - friend class Iterator; }; } // namespace art diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc index 5f498d9..e88637f 100644 --- a/runtime/base/hash_set_test.cc +++ b/runtime/base/hash_set_test.cc @@ -21,7 +21,7 @@ #include <string> #include <unordered_set> -#include "common_runtime_test.h" +#include <gtest/gtest.h> #include "hash_map.h" namespace art { @@ -35,7 +35,7 @@ struct IsEmptyFnString { } }; -class HashSetTest : public CommonRuntimeTest { +class HashSetTest : public testing::Test { public: HashSetTest() : seed_(97421), unique_number_(0) { } |