diff options
Diffstat (limited to 'runtime/base')
-rw-r--r-- | runtime/base/allocator.h | 2 | ||||
-rw-r--r-- | runtime/base/hash_set.h | 407 | ||||
-rw-r--r-- | runtime/base/hash_set_test.cc | 203 |
3 files changed, 611 insertions, 1 deletions
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h index 30f7f12..5a09c96 100644 --- a/runtime/base/allocator.h +++ b/runtime/base/allocator.h @@ -101,7 +101,7 @@ inline void RegisterFree(AllocatorTag tag, size_t bytes) { // Tracking allocator for use with STL types, tracks how much memory is used. template<class T, AllocatorTag kTag> -class TrackingAllocatorImpl { +class TrackingAllocatorImpl : public std::allocator<T> { public: typedef typename std::allocator<T>::value_type value_type; typedef typename std::allocator<T>::size_type size_type; diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h new file mode 100644 index 0000000..992e5b1 --- /dev/null +++ b/runtime/base/hash_set.h @@ -0,0 +1,407 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_BASE_HASH_SET_H_ +#define ART_RUNTIME_BASE_HASH_SET_H_ + +#include <functional> +#include <memory> +#include <stdint.h> +#include <utility> + +#include "logging.h" + +namespace art { + +// Returns true if an item is empty. +template <class T> +class DefaultEmptyFn { + public: + void MakeEmpty(T& item) const { + item = T(); + } + bool IsEmpty(const T& item) const { + return item == T(); + } +}; + +template <class T> +class DefaultEmptyFn<T*> { + public: + void MakeEmpty(T*& item) const { + item = nullptr; + } + bool IsEmpty(const T*& item) const { + return item == nullptr; + } +}; + +// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't +// boxed. Uses linear probing. +// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item) +template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>, + class Pred = std::equal_to<T>, class Alloc = std::allocator<T>> +class HashSet { + public: + static constexpr double kDefaultMinLoadFactor = 0.5; + static constexpr double kDefaultMaxLoadFactor = 0.9; + static constexpr size_t kMinBuckets = 1000; + + class Iterator { + public: + Iterator(const Iterator&) = default; + Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) { + } + Iterator& operator=(const Iterator&) = default; + bool operator==(const Iterator& other) const { + return hash_set_ == other.hash_set_ && index_ == other.index_; + } + bool operator!=(const Iterator& other) const { + return !(*this == other); + } + Iterator operator++() { // Value after modification. + index_ = NextNonEmptySlot(index_); + return *this; + } + Iterator operator++(int) { + Iterator temp = *this; + index_ = NextNonEmptySlot(index_); + return temp; + } + T& operator*() { + DCHECK(!hash_set_->IsFreeSlot(GetIndex())); + return hash_set_->ElementForIndex(index_); + } + const T& operator*() const { + DCHECK(!hash_set_->IsFreeSlot(GetIndex())); + return hash_set_->ElementForIndex(index_); + } + T* operator->() { + return &**this; + } + const T* operator->() const { + return &**this; + } + // TODO: Operator -- --(int) + + private: + HashSet* hash_set_; + size_t index_; + + size_t GetIndex() const { + return index_; + } + size_t NextNonEmptySlot(size_t index) const { + const size_t num_buckets = hash_set_->NumBuckets(); + DCHECK_LT(index, num_buckets); + do { + ++index; + } while (index < num_buckets && hash_set_->IsFreeSlot(index)); + return index; + } + + friend class HashSet; + }; + + void Clear() { + DeallocateStorage(); + AllocateStorage(1); + num_elements_ = 0; + elements_until_expand_ = 0; + } + HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr), + min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) { + Clear(); + } + HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + *this = other; + } + HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + *this = std::move(other); + } + ~HashSet() { + DeallocateStorage(); + } + HashSet& operator=(HashSet&& other) { + std::swap(data_, other.data_); + std::swap(num_buckets_, other.num_buckets_); + std::swap(num_elements_, other.num_elements_); + std::swap(elements_until_expand_, other.elements_until_expand_); + std::swap(min_load_factor_, other.min_load_factor_); + std::swap(max_load_factor_, other.max_load_factor_); + return *this; + } + HashSet& operator=(const HashSet& other) { + DeallocateStorage(); + AllocateStorage(other.NumBuckets()); + for (size_t i = 0; i < num_buckets_; ++i) { + ElementForIndex(i) = other.data_[i]; + } + num_elements_ = other.num_elements_; + elements_until_expand_ = other.elements_until_expand_; + min_load_factor_ = other.min_load_factor_; + max_load_factor_ = other.max_load_factor_; + return *this; + } + // Lower case for c++11 for each. + Iterator begin() { + Iterator ret(this, 0); + if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) { + ++ret; // Skip all the empty slots. + } + return ret; + } + // Lower case for c++11 for each. + Iterator end() { + return Iterator(this, NumBuckets()); + } + bool Empty() { + return begin() == end(); + } + // Erase algorithm: + // Make an empty slot where the iterator is pointing. + // Scan fowards until we hit another empty slot. + // If an element inbetween doesn't rehash to the range from the current empty slot to the + // iterator. It must be before the empty slot, in that case we can move it to the empty slot + // and set the empty slot to be the location we just moved from. + // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an + // element to its actual location/index. + Iterator Erase(Iterator it) { + // empty_index is the index that will become empty. + size_t empty_index = it.GetIndex(); + DCHECK(!IsFreeSlot(empty_index)); + size_t next_index = empty_index; + bool filled = false; // True if we filled the empty index. + while (true) { + next_index = NextIndex(next_index); + T& next_element = ElementForIndex(next_index); + // If the next element is empty, we are done. Make sure to clear the current empty index. + if (emptyfn_.IsEmpty(next_element)) { + emptyfn_.MakeEmpty(ElementForIndex(empty_index)); + break; + } + // Otherwise try to see if the next element can fill the current empty index. + const size_t next_hash = hashfn_(next_element); + // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is + // nothing we can do. + size_t next_ideal_index = IndexForHash(next_hash); + // Loop around if needed for our check. + size_t unwrapped_next_index = next_index; + if (unwrapped_next_index < empty_index) { + unwrapped_next_index += NumBuckets(); + } + // Loop around if needed for our check. + size_t unwrapped_next_ideal_index = next_ideal_index; + if (unwrapped_next_ideal_index < empty_index) { + unwrapped_next_ideal_index += NumBuckets(); + } + if (unwrapped_next_ideal_index <= empty_index || + unwrapped_next_ideal_index > unwrapped_next_index) { + // If the target index isn't within our current range it must have been probed from before + // the empty index. + ElementForIndex(empty_index) = std::move(next_element); + filled = true; // TODO: Optimize + empty_index = next_index; + } + } + --num_elements_; + // If we didn't fill the slot then we need go to the next non free slot. + if (!filled) { + ++it; + } + return it; + } + // Find an element, returns end() if not found. + // Allows custom K types, example of when this is useful. + // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy + // object in the heap for performance solution. + template <typename K> + Iterator Find(const K& element) { + return FindWithHash(element, hashfn_(element)); + } + template <typename K> + Iterator FindWithHash(const K& element, size_t hash) { + DCHECK_EQ(hashfn_(element), hash); + size_t index = IndexForHash(hash); + while (true) { + T& slot = ElementForIndex(index); + if (emptyfn_.IsEmpty(slot)) { + return end(); + } + if (pred_(slot, element)) { + return Iterator(this, index); + } + index = NextIndex(index); + } + } + // Insert an element, allows duplicates. + void Insert(const T& element) { + InsertWithHash(element, hashfn_(element)); + } + void InsertWithHash(const T& element, size_t hash) { + DCHECK_EQ(hash, hashfn_(element)); + if (num_elements_ >= elements_until_expand_) { + Expand(); + DCHECK_LT(num_elements_, elements_until_expand_); + } + const size_t index = FirstAvailableSlot(IndexForHash(hash)); + data_[index] = element; + ++num_elements_; + } + size_t Size() const { + return num_elements_; + } + void ShrinkToMaximumLoad() { + Resize(Size() / max_load_factor_); + } + // To distance that inserted elements were probed. Used for measuring how good hash functions + // are. + size_t TotalProbeDistance() const { + size_t total = 0; + for (size_t i = 0; i < NumBuckets(); ++i) { + const T& element = ElementForIndex(i); + if (!emptyfn_.IsEmpty(element)) { + size_t ideal_location = IndexForHash(hashfn_(element)); + if (ideal_location > i) { + total += i + NumBuckets() - ideal_location; + } else { + total += i - ideal_location; + } + } + } + return total; + } + // Calculate the current load factor and return it. + double CalculateLoadFactor() const { + return static_cast<double>(Size()) / static_cast<double>(NumBuckets()); + } + // Make sure that everything reinserts in the right spot. Returns the number of errors. + size_t Verify() { + size_t errors = 0; + for (size_t i = 0; i < num_buckets_; ++i) { + T& element = data_[i]; + if (!emptyfn_.IsEmpty(element)) { + T temp; + emptyfn_.MakeEmpty(temp); + std::swap(temp, element); + size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp))); + if (i != first_slot) { + LOG(ERROR) << "Element " << i << " should be in slot " << first_slot; + ++errors; + } + std::swap(temp, element); + } + } + return errors; + } + + private: + T& ElementForIndex(size_t index) { + DCHECK_LT(index, NumBuckets()); + DCHECK(data_ != nullptr); + return data_[index]; + } + const T& ElementForIndex(size_t index) const { + DCHECK_LT(index, NumBuckets()); + DCHECK(data_ != nullptr); + return data_[index]; + } + size_t IndexForHash(size_t hash) const { + return hash % num_buckets_; + } + size_t NextIndex(size_t index) const { + if (UNLIKELY(++index >= num_buckets_)) { + DCHECK_EQ(index, NumBuckets()); + return 0; + } + return index; + } + bool IsFreeSlot(size_t index) const { + return emptyfn_.IsEmpty(ElementForIndex(index)); + } + size_t NumBuckets() const { + return num_buckets_; + } + // Allocate a number of buckets. + void AllocateStorage(size_t num_buckets) { + num_buckets_ = num_buckets; + data_ = allocfn_.allocate(num_buckets_); + for (size_t i = 0; i < num_buckets_; ++i) { + allocfn_.construct(allocfn_.address(data_[i])); + emptyfn_.MakeEmpty(data_[i]); + } + } + void DeallocateStorage() { + if (num_buckets_ != 0) { + for (size_t i = 0; i < NumBuckets(); ++i) { + allocfn_.destroy(allocfn_.address(data_[i])); + } + allocfn_.deallocate(data_, NumBuckets()); + data_ = nullptr; + num_buckets_ = 0; + } + } + // Expand the set based on the load factors. + void Expand() { + size_t min_index = static_cast<size_t>(Size() / min_load_factor_); + if (min_index < kMinBuckets) { + min_index = kMinBuckets; + } + // Resize based on the minimum load factor. + Resize(min_index); + // When we hit elements_until_expand_, we are at the max load factor and must expand again. + elements_until_expand_ = NumBuckets() * max_load_factor_; + } + // Expand / shrink the table to the new specified size. + void Resize(size_t new_size) { + DCHECK_GE(new_size, Size()); + T* old_data = data_; + size_t old_num_buckets = num_buckets_; + // Reinsert all of the old elements. + AllocateStorage(new_size); + for (size_t i = 0; i < old_num_buckets; ++i) { + T& element = old_data[i]; + if (!emptyfn_.IsEmpty(element)) { + data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element); + } + allocfn_.destroy(allocfn_.address(element)); + } + allocfn_.deallocate(old_data, old_num_buckets); + } + ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const { + while (!emptyfn_.IsEmpty(data_[index])) { + index = NextIndex(index); + } + return index; + } + + Alloc allocfn_; // Allocator function. + HashFn hashfn_; // Hashing function. + EmptyFn emptyfn_; // IsEmpty/SetEmpty function. + Pred pred_; // Equals function. + size_t num_elements_; // Number of inserted elements. + size_t num_buckets_; // Number of hash table buckets. + size_t elements_until_expand_; // Maxmimum number of elements until we expand the table. + T* data_; // Backing storage. + double min_load_factor_; + double max_load_factor_; + + friend class Iterator; +}; + +} // namespace art + +#endif // ART_RUNTIME_BASE_HASH_SET_H_ diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc new file mode 100644 index 0000000..885fc06 --- /dev/null +++ b/runtime/base/hash_set_test.cc @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_set.h" + +#include <map> +#include <sstream> +#include <string> +#include <unordered_set> + +#include "common_runtime_test.h" +#include "utils.h" + +namespace art { + +class IsEmptyFnString { + public: + void MakeEmpty(std::string& item) const { + item.clear(); + } + bool IsEmpty(const std::string& item) const { + return item.empty(); + } +}; + +class HashSetTest : public CommonRuntimeTest { + public: + HashSetTest() : seed_(97421), unique_number_(0) { + } + std::string RandomString(size_t len) { + std::ostringstream oss; + for (size_t i = 0; i < len; ++i) { + oss << static_cast<char>('A' + PRand() % 64); + } + static_assert(' ' < 'A', "space must be less than a"); + oss << " " << unique_number_++; // Relies on ' ' < 'A' + return oss.str(); + } + void SetSeed(size_t seed) { + seed_ = seed; + } + size_t PRand() { // Pseudo random. + seed_ = seed_ * 1103515245 + 12345; + return seed_; + } + + private: + size_t seed_; + size_t unique_number_; +}; + +TEST_F(HashSetTest, TestSmoke) { + HashSet<std::string, IsEmptyFnString> hash_set; + const std::string test_string = "hello world 1234"; + ASSERT_TRUE(hash_set.Empty()); + ASSERT_EQ(hash_set.Size(), 0U); + hash_set.Insert(test_string); + auto it = hash_set.Find(test_string); + ASSERT_EQ(*it, test_string); + auto after_it = hash_set.Erase(it); + ASSERT_TRUE(after_it == hash_set.end()); + ASSERT_TRUE(hash_set.Empty()); + ASSERT_EQ(hash_set.Size(), 0U); + it = hash_set.Find(test_string); + ASSERT_TRUE(it == hash_set.end()); +} + +TEST_F(HashSetTest, TestInsertAndErase) { + HashSet<std::string, IsEmptyFnString> hash_set; + static constexpr size_t count = 1000; + std::vector<std::string> strings; + for (size_t i = 0; i < count; ++i) { + // Insert a bunch of elements and make sure we can find them. + strings.push_back(RandomString(10)); + hash_set.Insert(strings[i]); + auto it = hash_set.Find(strings[i]); + ASSERT_TRUE(it != hash_set.end()); + ASSERT_EQ(*it, strings[i]); + } + ASSERT_EQ(strings.size(), hash_set.Size()); + // Try to erase the odd strings. + for (size_t i = 1; i < count; i += 2) { + auto it = hash_set.Find(strings[i]); + ASSERT_TRUE(it != hash_set.end()); + ASSERT_EQ(*it, strings[i]); + hash_set.Erase(it); + } + // Test removed. + for (size_t i = 1; i < count; i += 2) { + auto it = hash_set.Find(strings[i]); + ASSERT_TRUE(it == hash_set.end()); + } + for (size_t i = 0; i < count; i += 2) { + auto it = hash_set.Find(strings[i]); + ASSERT_TRUE(it != hash_set.end()); + ASSERT_EQ(*it, strings[i]); + } +} + +TEST_F(HashSetTest, TestIterator) { + HashSet<std::string, IsEmptyFnString> hash_set; + ASSERT_TRUE(hash_set.begin() == hash_set.end()); + static constexpr size_t count = 1000; + std::vector<std::string> strings; + for (size_t i = 0; i < count; ++i) { + // Insert a bunch of elements and make sure we can find them. + strings.push_back(RandomString(10)); + hash_set.Insert(strings[i]); + } + // Make sure we visit each string exactly once. + std::map<std::string, size_t> found_count; + for (const std::string& s : hash_set) { + ++found_count[s]; + } + for (size_t i = 0; i < count; ++i) { + ASSERT_EQ(found_count[strings[i]], 1U); + } + found_count.clear(); + // Remove all the elements with iterator erase. + for (auto it = hash_set.begin(); it != hash_set.end();) { + ++found_count[*it]; + it = hash_set.Erase(it); + ASSERT_EQ(hash_set.Verify(), 0U); + } + for (size_t i = 0; i < count; ++i) { + ASSERT_EQ(found_count[strings[i]], 1U); + } +} + +TEST_F(HashSetTest, TestSwap) { + HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb; + std::vector<std::string> strings; + static constexpr size_t count = 1000; + for (size_t i = 0; i < count; ++i) { + strings.push_back(RandomString(10)); + hash_seta.Insert(strings[i]); + } + std::swap(hash_seta, hash_setb); + hash_seta.Insert("TEST"); + hash_setb.Insert("TEST2"); + for (size_t i = 0; i < count; ++i) { + strings.push_back(RandomString(10)); + hash_seta.Insert(strings[i]); + } +} + +TEST_F(HashSetTest, TestStress) { + HashSet<std::string, IsEmptyFnString> hash_set; + std::unordered_multiset<std::string> std_set; + std::vector<std::string> strings; + static constexpr size_t string_count = 2000; + static constexpr size_t operations = 100000; + static constexpr size_t target_size = 5000; + for (size_t i = 0; i < string_count; ++i) { + strings.push_back(RandomString(i % 10 + 1)); + } + const size_t seed = time(nullptr); + SetSeed(seed); + LOG(INFO) << "Starting stress test with seed " << seed; + for (size_t i = 0; i < operations; ++i) { + ASSERT_EQ(hash_set.Size(), std_set.size()); + size_t delta = std::abs(static_cast<ssize_t>(target_size) - + static_cast<ssize_t>(hash_set.Size())); + size_t n = PRand(); + if (n % target_size == 0) { + hash_set.Clear(); + std_set.clear(); + ASSERT_TRUE(hash_set.Empty()); + ASSERT_TRUE(std_set.empty()); + } else if (n % target_size < delta) { + // Skew towards adding elements until we are at the desired size. + const std::string& s = strings[PRand() % string_count]; + hash_set.Insert(s); + std_set.insert(s); + ASSERT_EQ(*hash_set.Find(s), *std_set.find(s)); + } else { + const std::string& s = strings[PRand() % string_count]; + auto it1 = hash_set.Find(s); + auto it2 = std_set.find(s); + ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end()); + if (it1 != hash_set.end()) { + ASSERT_EQ(*it1, *it2); + hash_set.Erase(it1); + std_set.erase(it2); + } + } + } +} + +} // namespace art |