diff options
author | Hiroshi Yamauchi <yamauchi@google.com> | 2015-03-09 11:57:48 -0700 |
---|---|---|
committer | Hiroshi Yamauchi <yamauchi@google.com> | 2015-03-11 15:32:59 -0700 |
commit | 4460a84be92b5a94ecfb5c650aef4945ab849c93 (patch) | |
tree | 2167b79cf593d5ff686aaf0e3bca3b7c571c6d69 /runtime/gc/allocator/rosalloc-inl.h | |
parent | 4cfe74cb50b73f5f4b6dd32aabed55d044afe348 (diff) | |
download | art-4460a84be92b5a94ecfb5c650aef4945ab849c93.zip art-4460a84be92b5a94ecfb5c650aef4945ab849c93.tar.gz art-4460a84be92b5a94ecfb5c650aef4945ab849c93.tar.bz2 |
Rosalloc thread local allocation path without a cas.
Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees 4101 -> 2929 (~26% reduction)
Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
Diffstat (limited to 'runtime/gc/allocator/rosalloc-inl.h')
-rw-r--r-- | runtime/gc/allocator/rosalloc-inl.h | 121 |
1 files changed, 117 insertions, 4 deletions
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index f6c9d3c..bba92a1 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -28,15 +28,19 @@ inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() { } template<bool kThreadSafe> -inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) { +inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (UNLIKELY(size > kLargeSizeThreshold)) { - return AllocLargeObject(self, size, bytes_allocated); + return AllocLargeObject(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } void* m; if (kThreadSafe) { - m = AllocFromRun(self, size, bytes_allocated); + m = AllocFromRun(self, size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { - m = AllocFromRunThreadUnsafe(self, size, bytes_allocated); + m = AllocFromRunThreadUnsafe(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Check if the returned memory is really all zero. if (ShouldCheckZeroMemory() && m != nullptr) { @@ -48,6 +52,115 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by return m; } +inline bool RosAlloc::Run::IsFull() { + const size_t num_vec = NumberOfBitmapVectors(); + for (size_t v = 0; v < num_vec; ++v) { + if (~alloc_bit_map_[v] != 0) { + return false; + } + } + return true; +} + +inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return false; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + DCHECK_EQ(idx, SizeToIndex(size)); + DCHECK_EQ(bracket_size, IndexToBracketSize(idx)); + DCHECK_EQ(bracket_size, bracketSizes[idx]); + DCHECK_LE(size, bracket_size); + DCHECK(size > 512 || bracket_size - size < 16); + DCHECK_LT(idx, kNumThreadLocalSizeBrackets); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + return !thread_local_run->IsFull(); +} + +inline void* RosAlloc::AllocFromThreadLocalRun(Thread* self, size_t size, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return nullptr; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + void* slot_addr = thread_local_run->AllocSlot(); + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + } + return slot_addr; +} + +inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return size; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + return numOfSlots[idx] * bracket_size; +} + +inline void* RosAlloc::Run::AllocSlot() { + const size_t idx = size_bracket_idx_; + while (true) { + if (kIsDebugBuild) { + // Make sure that no slots leaked, the bitmap should be full for all previous vectors. + for (size_t i = 0; i < first_search_vec_idx_; ++i) { + CHECK_EQ(~alloc_bit_map_[i], 0U); + } + } + uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; + uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); + if (LIKELY(ffz1 != 0)) { + const uint32_t ffz = ffz1 - 1; + const uint32_t slot_idx = ffz + + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; + const uint32_t mask = 1U << ffz; + DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; + // Found an empty slot. Set the bit. + DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); + *alloc_bitmap_ptr |= mask; + DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); + uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + + headerSizes[idx] + slot_idx * bracketSizes[idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) + << ", bracket_size=" << std::dec << bracketSizes[idx] + << ", slot_idx=" << slot_idx; + } + return slot_addr; + } + const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; + if (first_search_vec_idx_ + 1 >= num_words) { + DCHECK(IsFull()); + // Already at the last word, return null. + return nullptr; + } + // Increase the index to the next word and try again. + ++first_search_vec_idx_; + } +} + } // namespace allocator } // namespace gc } // namespace art |