summaryrefslogtreecommitdiffstats
path: root/runtime/base
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/base')
-rw-r--r--runtime/base/allocator.h2
-rw-r--r--runtime/base/hash_set.h407
-rw-r--r--runtime/base/hash_set_test.cc203
3 files changed, 611 insertions, 1 deletions
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 30f7f12..5a09c96 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -101,7 +101,7 @@ inline void RegisterFree(AllocatorTag tag, size_t bytes) {
// Tracking allocator for use with STL types, tracks how much memory is used.
template<class T, AllocatorTag kTag>
-class TrackingAllocatorImpl {
+class TrackingAllocatorImpl : public std::allocator<T> {
public:
typedef typename std::allocator<T>::value_type value_type;
typedef typename std::allocator<T>::size_type size_type;
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
new file mode 100644
index 0000000..992e5b1
--- /dev/null
+++ b/runtime/base/hash_set.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_HASH_SET_H_
+#define ART_RUNTIME_BASE_HASH_SET_H_
+
+#include <functional>
+#include <memory>
+#include <stdint.h>
+#include <utility>
+
+#include "logging.h"
+
+namespace art {
+
+// Returns true if an item is empty.
+template <class T>
+class DefaultEmptyFn {
+ public:
+ void MakeEmpty(T& item) const {
+ item = T();
+ }
+ bool IsEmpty(const T& item) const {
+ return item == T();
+ }
+};
+
+template <class T>
+class DefaultEmptyFn<T*> {
+ public:
+ void MakeEmpty(T*& item) const {
+ item = nullptr;
+ }
+ bool IsEmpty(const T*& item) const {
+ return item == nullptr;
+ }
+};
+
+// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
+// boxed. Uses linear probing.
+// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item)
+template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
+ class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
+class HashSet {
+ public:
+ static constexpr double kDefaultMinLoadFactor = 0.5;
+ static constexpr double kDefaultMaxLoadFactor = 0.9;
+ static constexpr size_t kMinBuckets = 1000;
+
+ class Iterator {
+ public:
+ Iterator(const Iterator&) = default;
+ Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) {
+ }
+ Iterator& operator=(const Iterator&) = default;
+ bool operator==(const Iterator& other) const {
+ return hash_set_ == other.hash_set_ && index_ == other.index_;
+ }
+ bool operator!=(const Iterator& other) const {
+ return !(*this == other);
+ }
+ Iterator operator++() { // Value after modification.
+ index_ = NextNonEmptySlot(index_);
+ return *this;
+ }
+ Iterator operator++(int) {
+ Iterator temp = *this;
+ index_ = NextNonEmptySlot(index_);
+ return temp;
+ }
+ T& operator*() {
+ DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
+ return hash_set_->ElementForIndex(index_);
+ }
+ const T& operator*() const {
+ DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
+ return hash_set_->ElementForIndex(index_);
+ }
+ T* operator->() {
+ return &**this;
+ }
+ const T* operator->() const {
+ return &**this;
+ }
+ // TODO: Operator -- --(int)
+
+ private:
+ HashSet* hash_set_;
+ size_t index_;
+
+ size_t GetIndex() const {
+ return index_;
+ }
+ size_t NextNonEmptySlot(size_t index) const {
+ const size_t num_buckets = hash_set_->NumBuckets();
+ DCHECK_LT(index, num_buckets);
+ do {
+ ++index;
+ } while (index < num_buckets && hash_set_->IsFreeSlot(index));
+ return index;
+ }
+
+ friend class HashSet;
+ };
+
+ void Clear() {
+ DeallocateStorage();
+ AllocateStorage(1);
+ num_elements_ = 0;
+ elements_until_expand_ = 0;
+ }
+ HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
+ min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
+ Clear();
+ }
+ HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+ *this = other;
+ }
+ HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+ *this = std::move(other);
+ }
+ ~HashSet() {
+ DeallocateStorage();
+ }
+ HashSet& operator=(HashSet&& other) {
+ std::swap(data_, other.data_);
+ std::swap(num_buckets_, other.num_buckets_);
+ std::swap(num_elements_, other.num_elements_);
+ std::swap(elements_until_expand_, other.elements_until_expand_);
+ std::swap(min_load_factor_, other.min_load_factor_);
+ std::swap(max_load_factor_, other.max_load_factor_);
+ return *this;
+ }
+ HashSet& operator=(const HashSet& other) {
+ DeallocateStorage();
+ AllocateStorage(other.NumBuckets());
+ for (size_t i = 0; i < num_buckets_; ++i) {
+ ElementForIndex(i) = other.data_[i];
+ }
+ num_elements_ = other.num_elements_;
+ elements_until_expand_ = other.elements_until_expand_;
+ min_load_factor_ = other.min_load_factor_;
+ max_load_factor_ = other.max_load_factor_;
+ return *this;
+ }
+ // Lower case for c++11 for each.
+ Iterator begin() {
+ Iterator ret(this, 0);
+ if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) {
+ ++ret; // Skip all the empty slots.
+ }
+ return ret;
+ }
+ // Lower case for c++11 for each.
+ Iterator end() {
+ return Iterator(this, NumBuckets());
+ }
+ bool Empty() {
+ return begin() == end();
+ }
+ // Erase algorithm:
+ // Make an empty slot where the iterator is pointing.
+ // Scan fowards until we hit another empty slot.
+ // If an element inbetween doesn't rehash to the range from the current empty slot to the
+ // iterator. It must be before the empty slot, in that case we can move it to the empty slot
+ // and set the empty slot to be the location we just moved from.
+ // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
+ // element to its actual location/index.
+ Iterator Erase(Iterator it) {
+ // empty_index is the index that will become empty.
+ size_t empty_index = it.GetIndex();
+ DCHECK(!IsFreeSlot(empty_index));
+ size_t next_index = empty_index;
+ bool filled = false; // True if we filled the empty index.
+ while (true) {
+ next_index = NextIndex(next_index);
+ T& next_element = ElementForIndex(next_index);
+ // If the next element is empty, we are done. Make sure to clear the current empty index.
+ if (emptyfn_.IsEmpty(next_element)) {
+ emptyfn_.MakeEmpty(ElementForIndex(empty_index));
+ break;
+ }
+ // Otherwise try to see if the next element can fill the current empty index.
+ const size_t next_hash = hashfn_(next_element);
+ // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
+ // nothing we can do.
+ size_t next_ideal_index = IndexForHash(next_hash);
+ // Loop around if needed for our check.
+ size_t unwrapped_next_index = next_index;
+ if (unwrapped_next_index < empty_index) {
+ unwrapped_next_index += NumBuckets();
+ }
+ // Loop around if needed for our check.
+ size_t unwrapped_next_ideal_index = next_ideal_index;
+ if (unwrapped_next_ideal_index < empty_index) {
+ unwrapped_next_ideal_index += NumBuckets();
+ }
+ if (unwrapped_next_ideal_index <= empty_index ||
+ unwrapped_next_ideal_index > unwrapped_next_index) {
+ // If the target index isn't within our current range it must have been probed from before
+ // the empty index.
+ ElementForIndex(empty_index) = std::move(next_element);
+ filled = true; // TODO: Optimize
+ empty_index = next_index;
+ }
+ }
+ --num_elements_;
+ // If we didn't fill the slot then we need go to the next non free slot.
+ if (!filled) {
+ ++it;
+ }
+ return it;
+ }
+ // Find an element, returns end() if not found.
+ // Allows custom K types, example of when this is useful.
+ // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
+ // object in the heap for performance solution.
+ template <typename K>
+ Iterator Find(const K& element) {
+ return FindWithHash(element, hashfn_(element));
+ }
+ template <typename K>
+ Iterator FindWithHash(const K& element, size_t hash) {
+ DCHECK_EQ(hashfn_(element), hash);
+ size_t index = IndexForHash(hash);
+ while (true) {
+ T& slot = ElementForIndex(index);
+ if (emptyfn_.IsEmpty(slot)) {
+ return end();
+ }
+ if (pred_(slot, element)) {
+ return Iterator(this, index);
+ }
+ index = NextIndex(index);
+ }
+ }
+ // Insert an element, allows duplicates.
+ void Insert(const T& element) {
+ InsertWithHash(element, hashfn_(element));
+ }
+ void InsertWithHash(const T& element, size_t hash) {
+ DCHECK_EQ(hash, hashfn_(element));
+ if (num_elements_ >= elements_until_expand_) {
+ Expand();
+ DCHECK_LT(num_elements_, elements_until_expand_);
+ }
+ const size_t index = FirstAvailableSlot(IndexForHash(hash));
+ data_[index] = element;
+ ++num_elements_;
+ }
+ size_t Size() const {
+ return num_elements_;
+ }
+ void ShrinkToMaximumLoad() {
+ Resize(Size() / max_load_factor_);
+ }
+ // To distance that inserted elements were probed. Used for measuring how good hash functions
+ // are.
+ size_t TotalProbeDistance() const {
+ size_t total = 0;
+ for (size_t i = 0; i < NumBuckets(); ++i) {
+ const T& element = ElementForIndex(i);
+ if (!emptyfn_.IsEmpty(element)) {
+ size_t ideal_location = IndexForHash(hashfn_(element));
+ if (ideal_location > i) {
+ total += i + NumBuckets() - ideal_location;
+ } else {
+ total += i - ideal_location;
+ }
+ }
+ }
+ return total;
+ }
+ // Calculate the current load factor and return it.
+ double CalculateLoadFactor() const {
+ return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
+ }
+ // Make sure that everything reinserts in the right spot. Returns the number of errors.
+ size_t Verify() {
+ size_t errors = 0;
+ for (size_t i = 0; i < num_buckets_; ++i) {
+ T& element = data_[i];
+ if (!emptyfn_.IsEmpty(element)) {
+ T temp;
+ emptyfn_.MakeEmpty(temp);
+ std::swap(temp, element);
+ size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
+ if (i != first_slot) {
+ LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
+ ++errors;
+ }
+ std::swap(temp, element);
+ }
+ }
+ return errors;
+ }
+
+ private:
+ T& ElementForIndex(size_t index) {
+ DCHECK_LT(index, NumBuckets());
+ DCHECK(data_ != nullptr);
+ return data_[index];
+ }
+ const T& ElementForIndex(size_t index) const {
+ DCHECK_LT(index, NumBuckets());
+ DCHECK(data_ != nullptr);
+ return data_[index];
+ }
+ size_t IndexForHash(size_t hash) const {
+ return hash % num_buckets_;
+ }
+ size_t NextIndex(size_t index) const {
+ if (UNLIKELY(++index >= num_buckets_)) {
+ DCHECK_EQ(index, NumBuckets());
+ return 0;
+ }
+ return index;
+ }
+ bool IsFreeSlot(size_t index) const {
+ return emptyfn_.IsEmpty(ElementForIndex(index));
+ }
+ size_t NumBuckets() const {
+ return num_buckets_;
+ }
+ // Allocate a number of buckets.
+ void AllocateStorage(size_t num_buckets) {
+ num_buckets_ = num_buckets;
+ data_ = allocfn_.allocate(num_buckets_);
+ for (size_t i = 0; i < num_buckets_; ++i) {
+ allocfn_.construct(allocfn_.address(data_[i]));
+ emptyfn_.MakeEmpty(data_[i]);
+ }
+ }
+ void DeallocateStorage() {
+ if (num_buckets_ != 0) {
+ for (size_t i = 0; i < NumBuckets(); ++i) {
+ allocfn_.destroy(allocfn_.address(data_[i]));
+ }
+ allocfn_.deallocate(data_, NumBuckets());
+ data_ = nullptr;
+ num_buckets_ = 0;
+ }
+ }
+ // Expand the set based on the load factors.
+ void Expand() {
+ size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
+ if (min_index < kMinBuckets) {
+ min_index = kMinBuckets;
+ }
+ // Resize based on the minimum load factor.
+ Resize(min_index);
+ // When we hit elements_until_expand_, we are at the max load factor and must expand again.
+ elements_until_expand_ = NumBuckets() * max_load_factor_;
+ }
+ // Expand / shrink the table to the new specified size.
+ void Resize(size_t new_size) {
+ DCHECK_GE(new_size, Size());
+ T* old_data = data_;
+ size_t old_num_buckets = num_buckets_;
+ // Reinsert all of the old elements.
+ AllocateStorage(new_size);
+ for (size_t i = 0; i < old_num_buckets; ++i) {
+ T& element = old_data[i];
+ if (!emptyfn_.IsEmpty(element)) {
+ data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
+ }
+ allocfn_.destroy(allocfn_.address(element));
+ }
+ allocfn_.deallocate(old_data, old_num_buckets);
+ }
+ ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
+ while (!emptyfn_.IsEmpty(data_[index])) {
+ index = NextIndex(index);
+ }
+ return index;
+ }
+
+ Alloc allocfn_; // Allocator function.
+ HashFn hashfn_; // Hashing function.
+ EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
+ Pred pred_; // Equals function.
+ size_t num_elements_; // Number of inserted elements.
+ size_t num_buckets_; // Number of hash table buckets.
+ size_t elements_until_expand_; // Maxmimum number of elements until we expand the table.
+ T* data_; // Backing storage.
+ double min_load_factor_;
+ double max_load_factor_;
+
+ friend class Iterator;
+};
+
+} // namespace art
+
+#endif // ART_RUNTIME_BASE_HASH_SET_H_
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
new file mode 100644
index 0000000..885fc06
--- /dev/null
+++ b/runtime/base/hash_set_test.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hash_set.h"
+
+#include <map>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+
+#include "common_runtime_test.h"
+#include "utils.h"
+
+namespace art {
+
+class IsEmptyFnString {
+ public:
+ void MakeEmpty(std::string& item) const {
+ item.clear();
+ }
+ bool IsEmpty(const std::string& item) const {
+ return item.empty();
+ }
+};
+
+class HashSetTest : public CommonRuntimeTest {
+ public:
+ HashSetTest() : seed_(97421), unique_number_(0) {
+ }
+ std::string RandomString(size_t len) {
+ std::ostringstream oss;
+ for (size_t i = 0; i < len; ++i) {
+ oss << static_cast<char>('A' + PRand() % 64);
+ }
+ static_assert(' ' < 'A', "space must be less than a");
+ oss << " " << unique_number_++; // Relies on ' ' < 'A'
+ return oss.str();
+ }
+ void SetSeed(size_t seed) {
+ seed_ = seed;
+ }
+ size_t PRand() { // Pseudo random.
+ seed_ = seed_ * 1103515245 + 12345;
+ return seed_;
+ }
+
+ private:
+ size_t seed_;
+ size_t unique_number_;
+};
+
+TEST_F(HashSetTest, TestSmoke) {
+ HashSet<std::string, IsEmptyFnString> hash_set;
+ const std::string test_string = "hello world 1234";
+ ASSERT_TRUE(hash_set.Empty());
+ ASSERT_EQ(hash_set.Size(), 0U);
+ hash_set.Insert(test_string);
+ auto it = hash_set.Find(test_string);
+ ASSERT_EQ(*it, test_string);
+ auto after_it = hash_set.Erase(it);
+ ASSERT_TRUE(after_it == hash_set.end());
+ ASSERT_TRUE(hash_set.Empty());
+ ASSERT_EQ(hash_set.Size(), 0U);
+ it = hash_set.Find(test_string);
+ ASSERT_TRUE(it == hash_set.end());
+}
+
+TEST_F(HashSetTest, TestInsertAndErase) {
+ HashSet<std::string, IsEmptyFnString> hash_set;
+ static constexpr size_t count = 1000;
+ std::vector<std::string> strings;
+ for (size_t i = 0; i < count; ++i) {
+ // Insert a bunch of elements and make sure we can find them.
+ strings.push_back(RandomString(10));
+ hash_set.Insert(strings[i]);
+ auto it = hash_set.Find(strings[i]);
+ ASSERT_TRUE(it != hash_set.end());
+ ASSERT_EQ(*it, strings[i]);
+ }
+ ASSERT_EQ(strings.size(), hash_set.Size());
+ // Try to erase the odd strings.
+ for (size_t i = 1; i < count; i += 2) {
+ auto it = hash_set.Find(strings[i]);
+ ASSERT_TRUE(it != hash_set.end());
+ ASSERT_EQ(*it, strings[i]);
+ hash_set.Erase(it);
+ }
+ // Test removed.
+ for (size_t i = 1; i < count; i += 2) {
+ auto it = hash_set.Find(strings[i]);
+ ASSERT_TRUE(it == hash_set.end());
+ }
+ for (size_t i = 0; i < count; i += 2) {
+ auto it = hash_set.Find(strings[i]);
+ ASSERT_TRUE(it != hash_set.end());
+ ASSERT_EQ(*it, strings[i]);
+ }
+}
+
+TEST_F(HashSetTest, TestIterator) {
+ HashSet<std::string, IsEmptyFnString> hash_set;
+ ASSERT_TRUE(hash_set.begin() == hash_set.end());
+ static constexpr size_t count = 1000;
+ std::vector<std::string> strings;
+ for (size_t i = 0; i < count; ++i) {
+ // Insert a bunch of elements and make sure we can find them.
+ strings.push_back(RandomString(10));
+ hash_set.Insert(strings[i]);
+ }
+ // Make sure we visit each string exactly once.
+ std::map<std::string, size_t> found_count;
+ for (const std::string& s : hash_set) {
+ ++found_count[s];
+ }
+ for (size_t i = 0; i < count; ++i) {
+ ASSERT_EQ(found_count[strings[i]], 1U);
+ }
+ found_count.clear();
+ // Remove all the elements with iterator erase.
+ for (auto it = hash_set.begin(); it != hash_set.end();) {
+ ++found_count[*it];
+ it = hash_set.Erase(it);
+ ASSERT_EQ(hash_set.Verify(), 0U);
+ }
+ for (size_t i = 0; i < count; ++i) {
+ ASSERT_EQ(found_count[strings[i]], 1U);
+ }
+}
+
+TEST_F(HashSetTest, TestSwap) {
+ HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
+ std::vector<std::string> strings;
+ static constexpr size_t count = 1000;
+ for (size_t i = 0; i < count; ++i) {
+ strings.push_back(RandomString(10));
+ hash_seta.Insert(strings[i]);
+ }
+ std::swap(hash_seta, hash_setb);
+ hash_seta.Insert("TEST");
+ hash_setb.Insert("TEST2");
+ for (size_t i = 0; i < count; ++i) {
+ strings.push_back(RandomString(10));
+ hash_seta.Insert(strings[i]);
+ }
+}
+
+TEST_F(HashSetTest, TestStress) {
+ HashSet<std::string, IsEmptyFnString> hash_set;
+ std::unordered_multiset<std::string> std_set;
+ std::vector<std::string> strings;
+ static constexpr size_t string_count = 2000;
+ static constexpr size_t operations = 100000;
+ static constexpr size_t target_size = 5000;
+ for (size_t i = 0; i < string_count; ++i) {
+ strings.push_back(RandomString(i % 10 + 1));
+ }
+ const size_t seed = time(nullptr);
+ SetSeed(seed);
+ LOG(INFO) << "Starting stress test with seed " << seed;
+ for (size_t i = 0; i < operations; ++i) {
+ ASSERT_EQ(hash_set.Size(), std_set.size());
+ size_t delta = std::abs(static_cast<ssize_t>(target_size) -
+ static_cast<ssize_t>(hash_set.Size()));
+ size_t n = PRand();
+ if (n % target_size == 0) {
+ hash_set.Clear();
+ std_set.clear();
+ ASSERT_TRUE(hash_set.Empty());
+ ASSERT_TRUE(std_set.empty());
+ } else if (n % target_size < delta) {
+ // Skew towards adding elements until we are at the desired size.
+ const std::string& s = strings[PRand() % string_count];
+ hash_set.Insert(s);
+ std_set.insert(s);
+ ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
+ } else {
+ const std::string& s = strings[PRand() % string_count];
+ auto it1 = hash_set.Find(s);
+ auto it2 = std_set.find(s);
+ ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
+ if (it1 != hash_set.end()) {
+ ASSERT_EQ(*it1, *it2);
+ hash_set.Erase(it1);
+ std_set.erase(it2);
+ }
+ }
+ }
+}
+
+} // namespace art