3 files changed, 611 insertions, 1 deletions
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 30f7f12..5a09c96 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -101,7 +101,7 @@ inline void RegisterFree(AllocatorTag tag, size_t bytes) {
 
 // Tracking allocator for use with STL types, tracks how much memory is used.
 template<class T, AllocatorTag kTag>
-class TrackingAllocatorImpl {
+class TrackingAllocatorImpl : public std::allocator<T> {
  public:
   typedef typename std::allocator<T>::value_type value_type;
   typedef typename std::allocator<T>::size_type size_type;
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
new file mode 100644
index 0000000..992e5b1
--- /dev/null
+++ b/runtime/base/hash_set.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_HASH_SET_H_
+#define ART_RUNTIME_BASE_HASH_SET_H_
+
+#include <functional>
+#include <memory>
+#include <stdint.h>
+#include <utility>
+
+#include "logging.h"
+
+namespace art {
+
+// Returns true if an item is empty.
+template <class T>
+class DefaultEmptyFn {
+ public:
+  void MakeEmpty(T& item) const {
+    item = T();
+  }
+  bool IsEmpty(const T& item) const {
+    return item == T();
+  }
+};
+
+template <class T>
+class DefaultEmptyFn<T*> {
+ public:
+  void MakeEmpty(T*& item) const {
+    item = nullptr;
+  }
+  bool IsEmpty(const T*& item) const {
+    return item == nullptr;
+  }
+};
+
+// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
+// boxed. Uses linear probing.
+// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item)
+template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
+    class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
+class HashSet {
+ public:
+  static constexpr double kDefaultMinLoadFactor = 0.5;
+  static constexpr double kDefaultMaxLoadFactor = 0.9;
+  static constexpr size_t kMinBuckets = 1000;
+
+  class Iterator {
+   public:
+    Iterator(const Iterator&) = default;
+    Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) {
+    }
+    Iterator& operator=(const Iterator&) = default;
+    bool operator==(const Iterator& other) const {
+      return hash_set_ == other.hash_set_ && index_ == other.index_;
+    }
+    bool operator!=(const Iterator& other) const {
+      return !(*this == other);
+    }
+    Iterator operator++() {  // Value after modification.
+      index_ = NextNonEmptySlot(index_);
+      return *this;
+    }
+    Iterator operator++(int) {
+      Iterator temp = *this;
+      index_ = NextNonEmptySlot(index_);
+      return temp;
+    }
+    T& operator*() {
+      DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
+      return hash_set_->ElementForIndex(index_);
+    }
+    const T& operator*() const {
+      DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
+      return hash_set_->ElementForIndex(index_);
+    }
+    T* operator->() {
+      return &**this;
+    }
+    const T* operator->() const {
+      return &**this;
+    }
+    // TODO: Operator -- --(int)
+
+   private:
+    HashSet* hash_set_;
+    size_t index_;
+
+    size_t GetIndex() const {
+      return index_;
+    }
+    size_t NextNonEmptySlot(size_t index) const {
+      const size_t num_buckets = hash_set_->NumBuckets();
+      DCHECK_LT(index, num_buckets);
+      do {
+        ++index;
+      } while (index < num_buckets && hash_set_->IsFreeSlot(index));
+      return index;
+    }
+
+    friend class HashSet;
+  };
+
+  void Clear() {
+    DeallocateStorage();
+    AllocateStorage(1);
+    num_elements_ = 0;
+    elements_until_expand_ = 0;
+  }
+  HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
+      min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
+    Clear();
+  }
+  HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+    *this = other;
+  }
+  HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+    *this = std::move(other);
+  }
+  ~HashSet() {
+    DeallocateStorage();
+  }
+  HashSet& operator=(HashSet&& other) {
+    std::swap(data_, other.data_);
+    std::swap(num_buckets_, other.num_buckets_);
+    std::swap(num_elements_, other.num_elements_);
+    std::swap(elements_until_expand_, other.elements_until_expand_);
+    std::swap(min_load_factor_, other.min_load_factor_);
+    std::swap(max_load_factor_, other.max_load_factor_);
+    return *this;
+  }
+  HashSet& operator=(const HashSet& other) {
+    DeallocateStorage();
+    AllocateStorage(other.NumBuckets());
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      ElementForIndex(i) = other.data_[i];
+    }
+    num_elements_ = other.num_elements_;
+    elements_until_expand_ = other.elements_until_expand_;
+    min_load_factor_ = other.min_load_factor_;
+    max_load_factor_ = other.max_load_factor_;
+    return *this;
+  }
+  // Lower case for c++11 for each.
+  Iterator begin() {
+    Iterator ret(this, 0);
+    if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) {
+      ++ret;  // Skip all the empty slots.
+    }
+    return ret;
+  }
+  // Lower case for c++11 for each.
+  Iterator end() {
+    return Iterator(this, NumBuckets());
+  }
+  bool Empty() {
+    return begin() == end();
+  }
+  // Erase algorithm:
+  // Make an empty slot where the iterator is pointing.
+  // Scan fowards until we hit another empty slot.
+  // If an element inbetween doesn't rehash to the range from the current empty slot to the
+  // iterator. It must be before the empty slot, in that case we can move it to the empty slot
+  // and set the empty slot to be the location we just moved from.
+  // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
+  // element to its actual location/index.
+  Iterator Erase(Iterator it) {
+    // empty_index is the index that will become empty.
+    size_t empty_index = it.GetIndex();
+    DCHECK(!IsFreeSlot(empty_index));
+    size_t next_index = empty_index;
+    bool filled = false;  // True if we filled the empty index.
+    while (true) {
+      next_index = NextIndex(next_index);
+      T& next_element = ElementForIndex(next_index);
+      // If the next element is empty, we are done. Make sure to clear the current empty index.
+      if (emptyfn_.IsEmpty(next_element)) {
+        emptyfn_.MakeEmpty(ElementForIndex(empty_index));
+        break;
+      }
+      // Otherwise try to see if the next element can fill the current empty index.
+      const size_t next_hash = hashfn_(next_element);
+      // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
+      // nothing we can do.
+      size_t next_ideal_index = IndexForHash(next_hash);
+      // Loop around if needed for our check.
+      size_t unwrapped_next_index = next_index;
+      if (unwrapped_next_index < empty_index) {
+        unwrapped_next_index += NumBuckets();
+      }
+      // Loop around if needed for our check.
+      size_t unwrapped_next_ideal_index = next_ideal_index;
+      if (unwrapped_next_ideal_index < empty_index) {
+        unwrapped_next_ideal_index += NumBuckets();
+      }
+      if (unwrapped_next_ideal_index <= empty_index ||
+          unwrapped_next_ideal_index > unwrapped_next_index) {
+        // If the target index isn't within our current range it must have been probed from before
+        // the empty index.
+        ElementForIndex(empty_index) = std::move(next_element);
+        filled = true;  // TODO: Optimize
+        empty_index = next_index;
+      }
+    }
+    --num_elements_;
+    // If we didn't fill the slot then we need go to the next non free slot.
+    if (!filled) {
+      ++it;
+    }
+    return it;
+  }
+  // Find an element, returns end() if not found.
+  // Allows custom K types, example of when this is useful.
+  // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
+  // object in the heap for performance solution.
+  template <typename K>
+  Iterator Find(const K& element) {
+    return FindWithHash(element, hashfn_(element));
+  }
+  template <typename K>
+  Iterator FindWithHash(const K& element, size_t hash) {
+    DCHECK_EQ(hashfn_(element), hash);
+    size_t index = IndexForHash(hash);
+    while (true) {
+      T& slot = ElementForIndex(index);
+      if (emptyfn_.IsEmpty(slot)) {
+        return end();
+      }
+      if (pred_(slot, element)) {
+        return Iterator(this, index);
+      }
+      index = NextIndex(index);
+    }
+  }
+  // Insert an element, allows duplicates.
+  void Insert(const T& element) {
+    InsertWithHash(element, hashfn_(element));
+  }
+  void InsertWithHash(const T& element, size_t hash) {
+    DCHECK_EQ(hash, hashfn_(element));
+    if (num_elements_ >= elements_until_expand_) {
+      Expand();
+      DCHECK_LT(num_elements_, elements_until_expand_);
+    }
+    const size_t index = FirstAvailableSlot(IndexForHash(hash));
+    data_[index] = element;
+    ++num_elements_;
+  }
+  size_t Size() const {
+    return num_elements_;
+  }
+  void ShrinkToMaximumLoad() {
+    Resize(Size() / max_load_factor_);
+  }
+  // To distance that inserted elements were probed. Used for measuring how good hash functions
+  // are.
+  size_t TotalProbeDistance() const {
+    size_t total = 0;
+    for (size_t i = 0; i < NumBuckets(); ++i) {
+      const T& element = ElementForIndex(i);
+      if (!emptyfn_.IsEmpty(element)) {
+        size_t ideal_location = IndexForHash(hashfn_(element));
+        if (ideal_location > i) {
+          total += i + NumBuckets() - ideal_location;
+        } else {
+          total += i - ideal_location;
+        }
+      }
+    }
+    return total;
+  }
+  // Calculate the current load factor and return it.
+  double CalculateLoadFactor() const {
+    return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
+  }
+  // Make sure that everything reinserts in the right spot. Returns the number of errors.
+  size_t Verify() {
+    size_t errors = 0;
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      T& element = data_[i];
+      if (!emptyfn_.IsEmpty(element)) {
+        T temp;
+        emptyfn_.MakeEmpty(temp);
+        std::swap(temp, element);
+        size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
+        if (i != first_slot) {
+          LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
+          ++errors;
+        }
+        std::swap(temp, element);
+      }
+    }
+    return errors;
+  }
+
+ private:
+  T& ElementForIndex(size_t index) {
+    DCHECK_LT(index, NumBuckets());
+    DCHECK(data_ != nullptr);
+    return data_[index];
+  }
+  const T& ElementForIndex(size_t index) const {
+    DCHECK_LT(index, NumBuckets());
+    DCHECK(data_ != nullptr);
+    return data_[index];
+  }
+  size_t IndexForHash(size_t hash) const {
+    return hash % num_buckets_;
+  }
+  size_t NextIndex(size_t index) const {
+    if (UNLIKELY(++index >= num_buckets_)) {
+      DCHECK_EQ(index, NumBuckets());
+      return 0;
+    }
+    return index;
+  }
+  bool IsFreeSlot(size_t index) const {
+    return emptyfn_.IsEmpty(ElementForIndex(index));
+  }
+  size_t NumBuckets() const {
+    return num_buckets_;
+  }
+  // Allocate a number of buckets.
+  void AllocateStorage(size_t num_buckets) {
+    num_buckets_ = num_buckets;
+    data_ = allocfn_.allocate(num_buckets_);
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      allocfn_.construct(allocfn_.address(data_[i]));
+      emptyfn_.MakeEmpty(data_[i]);
+    }
+  }
+  void DeallocateStorage() {
+    if (num_buckets_ != 0) {
+      for (size_t i = 0; i < NumBuckets(); ++i) {
+        allocfn_.destroy(allocfn_.address(data_[i]));
+      }
+      allocfn_.deallocate(data_, NumBuckets());
+      data_ = nullptr;
+      num_buckets_ = 0;
+    }
+  }
+  // Expand the set based on the load factors.
+  void Expand() {
+    size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
+    if (min_index < kMinBuckets) {
+      min_index = kMinBuckets;
+    }
+    // Resize based on the minimum load factor.
+    Resize(min_index);
+    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
+    elements_until_expand_ = NumBuckets() * max_load_factor_;
+  }
+  // Expand / shrink the table to the new specified size.
+  void Resize(size_t new_size) {
+    DCHECK_GE(new_size, Size());
+    T* old_data = data_;
+    size_t old_num_buckets = num_buckets_;
+    // Reinsert all of the old elements.
+    AllocateStorage(new_size);
+    for (size_t i = 0; i < old_num_buckets; ++i) {
+      T& element = old_data[i];
+      if (!emptyfn_.IsEmpty(element)) {
+        data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
+      }
+      allocfn_.destroy(allocfn_.address(element));
+    }
+    allocfn_.deallocate(old_data, old_num_buckets);
+  }
+  ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
+    while (!emptyfn_.IsEmpty(data_[index])) {
+      index = NextIndex(index);
+    }
+    return index;
+  }
+
+  Alloc allocfn_;  // Allocator function.
+  HashFn hashfn_;  // Hashing function.
+  EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
+  Pred pred_;  // Equals function.
+  size_t num_elements_;  // Number of inserted elements.
+  size_t num_buckets_;  // Number of hash table buckets.
+  size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
+  T* data_;  // Backing storage.
+  double min_load_factor_;
+  double max_load_factor_;
+
+  friend class Iterator;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_HASH_SET_H_
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
new file mode 100644
index 0000000..885fc06
--- /dev/null
+++ b/runtime/base/hash_set_test.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hash_set.h"
+
+#include <map>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+
+#include "common_runtime_test.h"
+#include "utils.h"
+
+namespace art {
+
+class IsEmptyFnString {
+ public:
+  void MakeEmpty(std::string& item) const {
+    item.clear();
+  }
+  bool IsEmpty(const std::string& item) const {
+    return item.empty();
+  }
+};
+
+class HashSetTest : public CommonRuntimeTest {
+ public:
+  HashSetTest() : seed_(97421), unique_number_(0) {
+  }
+  std::string RandomString(size_t len) {
+    std::ostringstream oss;
+    for (size_t i = 0; i < len; ++i) {
+      oss << static_cast<char>('A' + PRand() % 64);
+    }
+    static_assert(' ' < 'A', "space must be less than a");
+    oss << " " << unique_number_++;  // Relies on ' ' < 'A'
+    return oss.str();
+  }
+  void SetSeed(size_t seed) {
+    seed_ = seed;
+  }
+  size_t PRand() {  // Pseudo random.
+    seed_ = seed_ * 1103515245 + 12345;
+    return seed_;
+  }
+
+ private:
+  size_t seed_;
+  size_t unique_number_;
+};
+
+TEST_F(HashSetTest, TestSmoke) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  const std::string test_string = "hello world 1234";
+  ASSERT_TRUE(hash_set.Empty());
+  ASSERT_EQ(hash_set.Size(), 0U);
+  hash_set.Insert(test_string);
+  auto it = hash_set.Find(test_string);
+  ASSERT_EQ(*it, test_string);
+  auto after_it = hash_set.Erase(it);
+  ASSERT_TRUE(after_it == hash_set.end());
+  ASSERT_TRUE(hash_set.Empty());
+  ASSERT_EQ(hash_set.Size(), 0U);
+  it = hash_set.Find(test_string);
+  ASSERT_TRUE(it == hash_set.end());
+}
+
+TEST_F(HashSetTest, TestInsertAndErase) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  static constexpr size_t count = 1000;
+  std::vector<std::string> strings;
+  for (size_t i = 0; i < count; ++i) {
+    // Insert a bunch of elements and make sure we can find them.
+    strings.push_back(RandomString(10));
+    hash_set.Insert(strings[i]);
+    auto it = hash_set.Find(strings[i]);
+    ASSERT_TRUE(it != hash_set.end());
+    ASSERT_EQ(*it, strings[i]);
+  }
+  ASSERT_EQ(strings.size(), hash_set.Size());
+  // Try to erase the odd strings.
+  for (size_t i = 1; i < count; i += 2) {
+    auto it = hash_set.Find(strings[i]);
+    ASSERT_TRUE(it != hash_set.end());
+    ASSERT_EQ(*it, strings[i]);
+    hash_set.Erase(it);
+  }
+  // Test removed.
+  for (size_t i = 1; i < count; i += 2) {
+    auto it = hash_set.Find(strings[i]);
+    ASSERT_TRUE(it == hash_set.end());
+  }
+  for (size_t i = 0; i < count; i += 2) {
+    auto it = hash_set.Find(strings[i]);
+    ASSERT_TRUE(it != hash_set.end());
+    ASSERT_EQ(*it, strings[i]);
+  }
+}
+
+TEST_F(HashSetTest, TestIterator) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  ASSERT_TRUE(hash_set.begin() == hash_set.end());
+  static constexpr size_t count = 1000;
+  std::vector<std::string> strings;
+  for (size_t i = 0; i < count; ++i) {
+    // Insert a bunch of elements and make sure we can find them.
+    strings.push_back(RandomString(10));
+    hash_set.Insert(strings[i]);
+  }
+  // Make sure we visit each string exactly once.
+  std::map<std::string, size_t> found_count;
+  for (const std::string& s : hash_set) {
+    ++found_count[s];
+  }
+  for (size_t i = 0; i < count; ++i) {
+    ASSERT_EQ(found_count[strings[i]], 1U);
+  }
+  found_count.clear();
+  // Remove all the elements with iterator erase.
+  for (auto it = hash_set.begin(); it != hash_set.end();) {
+    ++found_count[*it];
+    it = hash_set.Erase(it);
+    ASSERT_EQ(hash_set.Verify(), 0U);
+  }
+  for (size_t i = 0; i < count; ++i) {
+    ASSERT_EQ(found_count[strings[i]], 1U);
+  }
+}
+
+TEST_F(HashSetTest, TestSwap) {
+  HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
+  std::vector<std::string> strings;
+  static constexpr size_t count = 1000;
+  for (size_t i = 0; i < count; ++i) {
+    strings.push_back(RandomString(10));
+    hash_seta.Insert(strings[i]);
+  }
+  std::swap(hash_seta, hash_setb);
+  hash_seta.Insert("TEST");
+  hash_setb.Insert("TEST2");
+  for (size_t i = 0; i < count; ++i) {
+    strings.push_back(RandomString(10));
+    hash_seta.Insert(strings[i]);
+  }
+}
+
+TEST_F(HashSetTest, TestStress) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::unordered_multiset<std::string> std_set;
+  std::vector<std::string> strings;
+  static constexpr size_t string_count = 2000;
+  static constexpr size_t operations = 100000;
+  static constexpr size_t target_size = 5000;
+  for (size_t i = 0; i < string_count; ++i) {
+    strings.push_back(RandomString(i % 10 + 1));
+  }
+  const size_t seed = time(nullptr);
+  SetSeed(seed);
+  LOG(INFO) << "Starting stress test with seed " << seed;
+  for (size_t i = 0; i < operations; ++i) {
+    ASSERT_EQ(hash_set.Size(), std_set.size());
+    size_t delta = std::abs(static_cast<ssize_t>(target_size) -
+                            static_cast<ssize_t>(hash_set.Size()));
+    size_t n = PRand();
+    if (n % target_size == 0) {
+      hash_set.Clear();
+      std_set.clear();
+      ASSERT_TRUE(hash_set.Empty());
+      ASSERT_TRUE(std_set.empty());
+    } else  if (n % target_size < delta) {
+      // Skew towards adding elements until we are at the desired size.
+      const std::string& s = strings[PRand() % string_count];
+      hash_set.Insert(s);
+      std_set.insert(s);
+      ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
+    } else {
+      const std::string& s = strings[PRand() % string_count];
+      auto it1 = hash_set.Find(s);
+      auto it2 = std_set.find(s);
+      ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
+      if (it1 != hash_set.end()) {
+        ASSERT_EQ(*it1, *it2);
+        hash_set.Erase(it1);
+        std_set.erase(it2);
+      }
+    }
+  }
+}
+
+}  // namespace art