diff options
author | Hiroshi Yamauchi <yamauchi@google.com> | 2015-03-09 11:57:48 -0700 |
---|---|---|
committer | Hiroshi Yamauchi <yamauchi@google.com> | 2015-03-11 15:32:59 -0700 |
commit | 4460a84be92b5a94ecfb5c650aef4945ab849c93 (patch) | |
tree | 2167b79cf593d5ff686aaf0e3bca3b7c571c6d69 | |
parent | 4cfe74cb50b73f5f4b6dd32aabed55d044afe348 (diff) | |
download | art-4460a84be92b5a94ecfb5c650aef4945ab849c93.zip art-4460a84be92b5a94ecfb5c650aef4945ab849c93.tar.gz art-4460a84be92b5a94ecfb5c650aef4945ab849c93.tar.bz2 |
Rosalloc thread local allocation path without a cas.
Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees 4101 -> 2929 (~26% reduction)
Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
35 files changed, 728 insertions, 279 deletions
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index 8486597..e0d62d7 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -263,6 +263,8 @@ void CommonRuntimeTest::SetUp() { // pool is created by the runtime. runtime_->GetHeap()->CreateThreadPool(); runtime_->GetHeap()->VerifyHeap(); // Check for heap corruption before the test + // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject). + runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U); // Get the boot class path from the runtime so it can be used in tests. boot_class_path_ = class_linker_->GetBootClassPath(); diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc index 87ce166..7780935 100644 --- a/runtime/gc/accounting/mod_union_table_test.cc +++ b/runtime/gc/accounting/mod_union_table_test.cc @@ -48,9 +48,9 @@ class ModUnionTableTest : public CommonRuntimeTest { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { auto* klass = GetObjectArrayClass(self, space); const size_t size = ComputeArraySize(self, klass, component_count, 2); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* obj = down_cast<mirror::ObjectArray<mirror::Object>*>( - space->Alloc(self, size, &bytes_allocated, nullptr)); + space->Alloc(self, size, &bytes_allocated, nullptr, &bytes_tl_bulk_allocated)); if (obj != nullptr) { obj->SetClass(klass); obj->SetLength(static_cast<int32_t>(component_count)); @@ -77,9 +77,10 @@ class ModUnionTableTest : public CommonRuntimeTest { // copy of the class in the same space that we are allocating in. DCHECK(java_lang_object_array_ != nullptr); const size_t class_size = java_lang_object_array_->GetClassSize(); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* klass = down_cast<mirror::Class*>(space->Alloc(self, class_size, &bytes_allocated, - nullptr)); + nullptr, + &bytes_tl_bulk_allocated)); DCHECK(klass != nullptr); memcpy(klass, java_lang_object_array_, class_size); Runtime::Current()->GetHeap()->GetCardTable()->MarkCard(klass); diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index f6c9d3c..bba92a1 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -28,15 +28,19 @@ inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() { } template<bool kThreadSafe> -inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) { +inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (UNLIKELY(size > kLargeSizeThreshold)) { - return AllocLargeObject(self, size, bytes_allocated); + return AllocLargeObject(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } void* m; if (kThreadSafe) { - m = AllocFromRun(self, size, bytes_allocated); + m = AllocFromRun(self, size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { - m = AllocFromRunThreadUnsafe(self, size, bytes_allocated); + m = AllocFromRunThreadUnsafe(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Check if the returned memory is really all zero. if (ShouldCheckZeroMemory() && m != nullptr) { @@ -48,6 +52,115 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by return m; } +inline bool RosAlloc::Run::IsFull() { + const size_t num_vec = NumberOfBitmapVectors(); + for (size_t v = 0; v < num_vec; ++v) { + if (~alloc_bit_map_[v] != 0) { + return false; + } + } + return true; +} + +inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return false; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + DCHECK_EQ(idx, SizeToIndex(size)); + DCHECK_EQ(bracket_size, IndexToBracketSize(idx)); + DCHECK_EQ(bracket_size, bracketSizes[idx]); + DCHECK_LE(size, bracket_size); + DCHECK(size > 512 || bracket_size - size < 16); + DCHECK_LT(idx, kNumThreadLocalSizeBrackets); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + return !thread_local_run->IsFull(); +} + +inline void* RosAlloc::AllocFromThreadLocalRun(Thread* self, size_t size, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return nullptr; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + void* slot_addr = thread_local_run->AllocSlot(); + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + } + return slot_addr; +} + +inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return size; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + return numOfSlots[idx] * bracket_size; +} + +inline void* RosAlloc::Run::AllocSlot() { + const size_t idx = size_bracket_idx_; + while (true) { + if (kIsDebugBuild) { + // Make sure that no slots leaked, the bitmap should be full for all previous vectors. + for (size_t i = 0; i < first_search_vec_idx_; ++i) { + CHECK_EQ(~alloc_bit_map_[i], 0U); + } + } + uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; + uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); + if (LIKELY(ffz1 != 0)) { + const uint32_t ffz = ffz1 - 1; + const uint32_t slot_idx = ffz + + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; + const uint32_t mask = 1U << ffz; + DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; + // Found an empty slot. Set the bit. + DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); + *alloc_bitmap_ptr |= mask; + DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); + uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + + headerSizes[idx] + slot_idx * bracketSizes[idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) + << ", bracket_size=" << std::dec << bracketSizes[idx] + << ", slot_idx=" << slot_idx; + } + return slot_addr; + } + const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; + if (first_search_vec_idx_ + 1 >= num_words) { + DCHECK(IsFull()); + // Already at the last word, return null. + return nullptr; + } + // Increase the index to the next word and try again. + ++first_search_vec_idx_; + } +} + } // namespace allocator } // namespace gc } // namespace art diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index f51093a..f64a4ff 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -454,7 +454,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) { return byte_size; } -void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); DCHECK_GT(size, kLargeSizeThreshold); size_t num_pages = RoundUp(size, kPageSize) / kPageSize; void* r; @@ -470,6 +473,8 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca } const size_t total_bytes = num_pages * kPageSize; *bytes_allocated = total_bytes; + *usable_size = total_bytes; + *bytes_tl_bulk_allocated = total_bytes; if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r) << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize) @@ -622,7 +627,12 @@ inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) { return slot_addr; } -void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -634,14 +644,19 @@ void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* byte Locks::mutator_lock_->AssertExclusiveHeld(self); void* slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (LIKELY(slot_addr != nullptr)) { - DCHECK(bytes_allocated != nullptr); *bytes_allocated = bracket_size; - // Caller verifies that it is all 0. + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; } + // Caller verifies that it is all 0. return slot_addr; } -void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -712,31 +727,43 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) self->SetRosAllocRun(idx, thread_local_run); DCHECK(!thread_local_run->IsFull()); } - DCHECK(thread_local_run != nullptr); DCHECK(!thread_local_run->IsFull()); DCHECK(thread_local_run->IsThreadLocal()); + // Account for all the free slots in the new or refreshed thread local run. + *bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size; slot_addr = thread_local_run->AllocSlot(); // Must succeed now with a new run. DCHECK(slot_addr != nullptr); + } else { + // The slot is already counted. Leave it as is. + *bytes_tl_bulk_allocated = 0; } + DCHECK(slot_addr != nullptr); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + *bytes_allocated = bracket_size; + *usable_size = bracket_size; } else { // Use the (shared) current run. MutexLock mu(self, *size_bracket_locks_[idx]); slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; + } } - DCHECK(bytes_allocated != nullptr); - *bytes_allocated = bracket_size; // Caller verifies that it is all 0. return slot_addr; } @@ -852,44 +879,6 @@ std::string RosAlloc::Run::Dump() { return stream.str(); } -inline void* RosAlloc::Run::AllocSlot() { - const size_t idx = size_bracket_idx_; - while (true) { - if (kIsDebugBuild) { - // Make sure that no slots leaked, the bitmap should be full for all previous vectors. - for (size_t i = 0; i < first_search_vec_idx_; ++i) { - CHECK_EQ(~alloc_bit_map_[i], 0U); - } - } - uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; - uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); - if (LIKELY(ffz1 != 0)) { - const uint32_t ffz = ffz1 - 1; - const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; - const uint32_t mask = 1U << ffz; - DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; - // Found an empty slot. Set the bit. - DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); - *alloc_bitmap_ptr |= mask; - DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); - uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; - } - return slot_addr; - } - const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; - if (first_search_vec_idx_ + 1 >= num_words) { - DCHECK(IsFull()); - // Already at the last word, return null. - return nullptr; - } - // Increase the index to the next word and try again. - ++first_search_vec_idx_; - } -} - void RosAlloc::Run::FreeSlot(void* ptr) { DCHECK(!IsThreadLocal()); const uint8_t idx = size_bracket_idx_; @@ -920,6 +909,25 @@ void RosAlloc::Run::FreeSlot(void* ptr) { } } +size_t RosAlloc::Run::NumberOfFreeSlots() { + size_t num_alloc_slots = 0; + const size_t idx = size_bracket_idx_; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_slots, 32) / 32; + DCHECK_NE(num_vec, 0U); + for (size_t v = 0; v < num_vec - 1; v++) { + num_alloc_slots += POPCOUNT(alloc_bit_map_[v]); + } + // Don't count the invalid bits in the last vector. + uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] & + ~GetBitmapLastVectorMask(num_slots, num_vec); + num_alloc_slots += POPCOUNT(last_vec_masked); + size_t num_free_slots = num_slots - num_alloc_slots; + DCHECK_LE(num_alloc_slots, num_slots); + DCHECK_LE(num_free_slots, num_slots); + return num_free_slots; +} + inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) { DCHECK(IsThreadLocal()); // Free slots in the alloc bit map based on the thread local free bit map. @@ -1055,16 +1063,6 @@ inline bool RosAlloc::Run::IsAllFree() { return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec); } -inline bool RosAlloc::Run::IsFull() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; ++v) { - if (~alloc_bit_map_[v] != 0) { - return false; - } - } - return true; -} - inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { const size_t num_vec = NumberOfBitmapVectors(); for (size_t v = 0; v < num_vec; v++) { @@ -1654,10 +1652,11 @@ void RosAlloc::SetFootprintLimit(size_t new_capacity) { } } -void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { +size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). ReaderMutexLock wmu(self, bulk_free_lock_); + size_t free_bytes = 0U; for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); @@ -1665,9 +1664,12 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { // Invalid means already revoked. DCHECK(thread_local_run->IsThreadLocal()); if (thread_local_run != dedicated_full_run_) { + // Note the thread local run may not be full here. thread->SetRosAllocRun(idx, dedicated_full_run_); DCHECK_EQ(thread_local_run->magic_num_, kMagicNum); - // Note the thread local run may not be full here. + // Count the number of free slots left. + size_t num_free_slots = thread_local_run->NumberOfFreeSlots(); + free_bytes += num_free_slots * bracketSizes[idx]; bool dont_care; thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care); thread_local_run->SetIsThreadLocal(false); @@ -1677,6 +1679,7 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { RevokeRun(self, idx, thread_local_run); } } + return free_bytes; } void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) { @@ -1719,16 +1722,18 @@ void RosAlloc::RevokeThreadUnsafeCurrentRuns() { } } -void RosAlloc::RevokeAllThreadLocalRuns() { +size_t RosAlloc::RevokeAllThreadLocalRuns() { // This is called when a mutator thread won't allocate such as at // the Zygote creation time or during the GC pause. MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_); MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_); std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); + size_t free_bytes = 0U; for (Thread* thread : thread_list) { - RevokeThreadLocalRuns(thread); + free_bytes += RevokeThreadLocalRuns(thread); } RevokeThreadUnsafeCurrentRuns(); + return free_bytes; } void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 3269e10..d1e7ad9 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -230,8 +230,10 @@ class RosAlloc { static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec); // Returns true if all the slots in the run are not in use. bool IsAllFree(); + // Returns the number of free slots. + size_t NumberOfFreeSlots(); // Returns true if all the slots in the run are in use. - bool IsFull(); + ALWAYS_INLINE bool IsFull(); // Returns true if the bulk free bit map is clean. bool IsBulkFreeBitmapClean(); // Returns true if the thread local free bit map is clean. @@ -309,6 +311,15 @@ class RosAlloc { DCHECK(bracketSizes[idx] == size); return idx; } + // Returns true if the given allocation size is for a thread local allocation. + static bool IsSizeForThreadLocal(size_t size) { + DCHECK_GT(kNumThreadLocalSizeBrackets, 0U); + size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1; + bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx]; + DCHECK(size > kLargeSizeThreshold || + (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); + return is_size_for_thread_local; + } // Rounds up the size up the nearest bracket size. static size_t RoundToBracketSize(size_t size) { DCHECK(size <= kLargeSizeThreshold); @@ -504,11 +515,13 @@ class RosAlloc { size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Allocate/free a run slot. - void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); // Allocate/free a run slot without acquiring locks. // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx); @@ -527,7 +540,9 @@ class RosAlloc { size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_); // Allocates large objects. - void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); + void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Revoke a run by adding it to non_full_runs_ or freeing the pages. void RevokeRun(Thread* self, size_t idx, Run* run); @@ -551,13 +566,26 @@ class RosAlloc { // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> - void* Alloc(Thread* self, size_t size, size_t* bytes_allocated) + void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); size_t Free(Thread* self, void* ptr) LOCKS_EXCLUDED(bulk_free_lock_); size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) LOCKS_EXCLUDED(bulk_free_lock_); + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); + + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). + ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); + // Returns the size of the allocated slot for a given allocated memory chunk. size_t UsableSize(const void* ptr); // Returns the size of the allocated slot for a given size. @@ -586,9 +614,13 @@ class RosAlloc { void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_); // Releases the thread-local runs assigned to the given thread back to the common set of runs. - void RevokeThreadLocalRuns(Thread* thread); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeThreadLocalRuns(Thread* thread); // Releases the thread-local runs assigned to all the threads back to the common set of runs. - void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); // Assert the thread local runs of a thread are revoked. void AssertThreadLocalRunsAreRevoked(Thread* thread); // Assert all the thread local runs are revoked. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index dd45eca..db7a4ef 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1259,8 +1259,9 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { size_t region_space_bytes_allocated = 0U; size_t non_moving_space_bytes_allocated = 0U; size_t bytes_allocated = 0U; + size_t dummy; mirror::Object* to_ref = region_space_->AllocNonvirtual<true>( - region_space_alloc_size, ®ion_space_bytes_allocated, nullptr); + region_space_alloc_size, ®ion_space_bytes_allocated, nullptr, &dummy); bytes_allocated = region_space_bytes_allocated; if (to_ref != nullptr) { DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated); @@ -1286,7 +1287,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { } fall_back_to_non_moving = true; to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size, - &non_moving_space_bytes_allocated, nullptr); + &non_moving_space_bytes_allocated, nullptr, &dummy); CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed"; bytes_allocated = non_moving_space_bytes_allocated; // Mark it in the mark bitmap. diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index 8be18be..eafcc45 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -48,6 +48,7 @@ void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) { gc_cause_ = gc_cause; freed_ = ObjectBytePair(); freed_los_ = ObjectBytePair(); + freed_bytes_revoke_ = 0; } uint64_t Iteration::GetEstimatedThroughput() const { diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index b809469..ed5207a 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -75,6 +75,12 @@ class Iteration { uint64_t GetFreedLargeObjects() const { return freed_los_.objects; } + uint64_t GetFreedRevokeBytes() const { + return freed_bytes_revoke_; + } + void SetFreedRevoke(uint64_t freed) { + freed_bytes_revoke_ = freed; + } void Reset(GcCause gc_cause, bool clear_soft_references); // Returns the estimated throughput of the iteration. uint64_t GetEstimatedThroughput() const; @@ -99,6 +105,7 @@ class Iteration { TimingLogger timings_; ObjectBytePair freed_; ObjectBytePair freed_los_; + uint64_t freed_bytes_revoke_; // see Heap::num_bytes_freed_revoke_. std::vector<uint64_t> pause_times_; friend class GarbageCollector; diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 8aac484..ee4e752 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -292,6 +292,7 @@ void MarkSweep::ReclaimPhase() { Runtime::Current()->AllowNewSystemWeaks(); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + GetHeap()->RecordFreeRevoke(); // Reclaim unmarked objects. Sweep(false); // Swap the live and mark bitmaps for each space which we modified space. This is an diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index c1ba5e3..b3d59f2 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -242,6 +242,7 @@ void SemiSpace::MarkingPhase() { // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); + GetHeap()->RecordFreeRevoke(); // this is for the non-moving rosalloc space used by GSS. // Record freed memory. const int64_t from_bytes = from_space_->GetBytesAllocated(); const int64_t to_bytes = bytes_moved_; @@ -489,17 +490,18 @@ static inline size_t CopyAvoidingDirtyingPages(void* dest, const void* src, size mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { const size_t object_size = obj->SizeOf(); - size_t bytes_allocated; + size_t bytes_allocated, dummy; mirror::Object* forward_address = nullptr; if (generational_ && reinterpret_cast<uint8_t*>(obj) < last_gc_to_space_end_) { // If it's allocated before the last GC (older), move // (pseudo-promote) it to the main free list space (as sort // of an old generation.) forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); if (UNLIKELY(forward_address == nullptr)) { // If out of space, fall back to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); // No logic for marking the bitmap, so it must be null. DCHECK(to_space_live_bitmap_ == nullptr); } else { @@ -544,7 +546,8 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { } } else { // If it's allocated after the last GC (younger), copy it to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } @@ -552,7 +555,7 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { // If it's still null, attempt to use the fallback space. if (UNLIKELY(forward_address == nullptr)) { forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space."; accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap(); if (bitmap != nullptr) { diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index b8c2452..b770096 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -64,6 +64,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas // fragmentation. } AllocationTimer alloc_timer(this, &obj); + // bytes allocated for the (individual) object. size_t bytes_allocated; size_t usable_size; size_t new_num_bytes_allocated = 0; @@ -86,13 +87,29 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas usable_size = bytes_allocated; pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); + } else if (!kInstrumented && allocator == kAllocatorTypeRosAlloc && + (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) && + LIKELY(obj != nullptr)) { + DCHECK(!running_on_valgrind_); + obj->SetClass(klass); + if (kUseBakerOrBrooksReadBarrier) { + if (kUseBrooksReadBarrier) { + obj->SetReadBarrierPointer(obj); + } + obj->AssertReadBarrierPointer(); + } + usable_size = bytes_allocated; + pre_fence_visitor(obj, usable_size); + QuasiAtomic::ThreadFenceForConstructor(); } else { + // bytes allocated that takes bulk thread-local buffer allocations into account. + size_t bytes_tl_bulk_allocated = 0; obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); if (UNLIKELY(obj == nullptr)) { bool is_current_allocator = allocator == GetCurrentAllocator(); obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size, - &klass); + &bytes_tl_bulk_allocated, &klass); if (obj == nullptr) { bool after_is_current_allocator = allocator == GetCurrentAllocator(); // If there is a pending exception, fail the allocation right away since the next one @@ -126,9 +143,9 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas WriteBarrierField(obj, mirror::Object::ClassOffset(), klass); } pre_fence_visitor(obj, usable_size); - new_num_bytes_allocated = - static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) - + bytes_allocated; + new_num_bytes_allocated = static_cast<size_t>( + num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated)) + + bytes_tl_bulk_allocated; } if (kIsDebugBuild && Runtime::Current()->IsStarted()) { CHECK_LE(obj->SizeOf(), usable_size); @@ -196,8 +213,10 @@ inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klas template <const bool kInstrumented, const bool kGrow> inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB && + allocator_type != kAllocatorTypeRosAlloc && UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { return nullptr; } @@ -210,35 +229,56 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (LIKELY(ret != nullptr)) { *bytes_allocated = alloc_size; *usable_size = alloc_size; + *bytes_tl_bulk_allocated = alloc_size; } break; } case kAllocatorTypeRosAlloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = + rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + if (!kInstrumented) { + DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size)); + } + ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeDlMalloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeNonMoving: { - ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeLOS: { - ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Note that the bump pointer spaces aren't necessarily next to // the other continuous spaces like the non-moving alloc space or // the zygote space. @@ -257,20 +297,22 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) { return nullptr; } - *bytes_allocated = new_tlab_size; + *bytes_tl_bulk_allocated = new_tlab_size; } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } case kAllocatorTypeRegion: { DCHECK(region_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment); - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeRegionTLAB: { @@ -283,15 +325,17 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator // Try to allocate a tlab. if (!region_space_->AllocNewTlab(self)) { // Failed to allocate a tlab. Try non-tlab. - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } - *bytes_allocated = space::RegionSpace::kRegionSize; + *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize; // Fall-through. } else { // Check OOME for a non-tlab allocation. if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { // Neither tlab or non-tlab works. Give up. @@ -301,18 +345,20 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator } else { // Large. Check OOME. if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { return nullptr; } } } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 7534515..a41d65c 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -156,6 +156,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max total_objects_freed_ever_(0), num_bytes_allocated_(0), native_bytes_allocated_(0), + num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), verify_pre_gc_heap_(verify_pre_gc_heap), @@ -1344,6 +1345,19 @@ void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) { } } +void Heap::RecordFreeRevoke() { + // Subtract num_bytes_freed_revoke_ from num_bytes_allocated_ to cancel out the + // the ahead-of-time, bulk counting of bytes allocated in rosalloc thread-local buffers. + // If there's a concurrent revoke, ok to not necessarily reset num_bytes_freed_revoke_ + // all the way to zero exactly as the remainder will be subtracted at the next GC. + size_t bytes_freed = num_bytes_freed_revoke_.LoadSequentiallyConsistent(); + CHECK_GE(num_bytes_freed_revoke_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_freed_revoke_ underflow"; + CHECK_GE(num_bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_allocated_ underflow"; + GetCurrentGcIteration()->SetFreedRevoke(bytes_freed); +} + space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const { for (const auto& space : continuous_spaces_) { if (space->AsContinuousSpace()->IsRosAllocSpace()) { @@ -1358,6 +1372,7 @@ space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t alloc_size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) { bool was_default_allocator = allocator == GetCurrentAllocator(); // Make sure there is no pending exception since we may need to throw an OOME. @@ -1377,7 +1392,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } // A GC was in progress and we blocked, retry allocation now that memory has been freed. mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1391,7 +1406,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } if (gc_ran) { mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1411,7 +1426,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (plan_gc_ran) { // Did we free sufficient memory for the allocation to succeed? mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1420,7 +1435,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat // Allocations have failed after GCs; this is an exceptional state. // Try harder, growing the heap if necessary. mirror::Object* ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1437,7 +1452,8 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (was_default_allocator && allocator != GetCurrentAllocator()) { return nullptr; } - ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size); + ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (ptr == nullptr) { const uint64_t current_time = NanoTime(); switch (allocator) { @@ -1453,7 +1469,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat case HomogeneousSpaceCompactResult::kSuccess: // If the allocation succeeded, we delayed an oom. ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { count_delayed_oom_++; } @@ -1498,7 +1514,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } else { LOG(WARNING) << "Disabled moving GC due to the non moving space being full"; ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); } } break; @@ -1984,8 +2000,8 @@ class ZygoteCompactingCollector FINAL : public collector::SemiSpace { if (it == bins_.end()) { // No available space in the bins, place it in the target space instead (grows the zygote // space). - size_t bytes_allocated; - forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr); + size_t bytes_allocated, dummy; + forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr, &dummy); if (to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } else { @@ -3084,7 +3100,8 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, SetIdealFootprint(target_size); if (IsGcConcurrent()) { const uint64_t freed_bytes = current_gc_iteration_.GetFreedBytes() + - current_gc_iteration_.GetFreedLargeObjectBytes(); + current_gc_iteration_.GetFreedLargeObjectBytes() + + current_gc_iteration_.GetFreedRevokeBytes(); // Bytes allocated will shrink by freed_bytes after the GC runs, so if we want to figure out // how many bytes were allocated during the GC we need to add freed_bytes back on. CHECK_GE(bytes_allocated + freed_bytes, bytes_allocated_before_gc); @@ -3290,31 +3307,43 @@ void Heap::RequestTrim(Thread* self) { void Heap::RevokeThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(bump_pointer_space_->RevokeThreadLocalBuffers(thread), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(region_space_->RevokeThreadLocalBuffers(thread), 0U); } } void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } } void Heap::RevokeAllThreadLocalBuffers() { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeAllThreadLocalBuffers(); + size_t freed_bytes_revoke = rosalloc_space_->RevokeAllThreadLocalBuffers(); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(bump_pointer_space_->RevokeAllThreadLocalBuffers(), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(region_space_->RevokeAllThreadLocalBuffers(), 0U); } } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index d41e17f..959ff18 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -390,6 +390,9 @@ class Heap { // free-list backed space. void RecordFree(uint64_t freed_objects, int64_t freed_bytes); + // Record the bytes freed by thread-local buffer revoke. + void RecordFreeRevoke(); + // Must be called if a field of an Object in the heap changes, and before any GC safe-point. // The call is not needed if NULL is stored in the field. ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/, @@ -664,6 +667,11 @@ class Heap { // Whether or not we may use a garbage collector, used so that we only create collectors we need. bool MayUseCollector(CollectorType type) const; + // Used by tests to reduce timinig-dependent flakiness in OOME behavior. + void SetMinIntervalHomogeneousSpaceCompactionByOom(uint64_t interval) { + min_interval_homogeneous_space_compaction_by_oom_ = interval; + } + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -724,6 +732,7 @@ class Heap { // an initial allocation attempt failed. mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -742,7 +751,8 @@ class Heap { template <const bool kInstrumented, const bool kGrow> ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) @@ -998,6 +1008,13 @@ class Heap { // Bytes which are allocated and managed by native code but still need to be accounted for. Atomic<size_t> native_bytes_allocated_; + // Number of bytes freed by thread local buffer revokes. This will + // cancel out the ahead-of-time bulk counting of bytes allocated in + // rosalloc thread-local buffers. It is temporarily accumulated + // here to be subtracted from num_bytes_allocated_ later at the next + // GC. + Atomic<size_t> num_bytes_freed_revoke_; + // Info related to the current or previous GC iteration. collector::Iteration current_gc_iteration_; diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 9f1f953..14a93d1 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -24,7 +24,8 @@ namespace gc { namespace space { inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); mirror::Object* ret = AllocNonvirtual(num_bytes); if (LIKELY(ret != nullptr)) { @@ -32,13 +33,15 @@ inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; } return ret; } inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); num_bytes = RoundUp(num_bytes, kAlignment); uint8_t* end = end_.LoadRelaxed(); @@ -54,6 +57,7 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t if (UNLIKELY(usable_size != nullptr)) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return obj; } diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index fbfc449..1303d77 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -93,12 +93,13 @@ mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); } -void BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), block_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } -void BumpPointerSpace::RevokeAllThreadLocalBuffers() { +size_t BumpPointerSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -107,6 +108,7 @@ void BumpPointerSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void BumpPointerSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index 089ede4..c496a42 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -47,10 +47,10 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(size_t num_bytes); @@ -103,9 +103,9 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { void Dump(std::ostream& os) const; - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h index 4c8a35e..9eace89 100644 --- a/runtime/gc/space/dlmalloc_space-inl.h +++ b/runtime/gc/space/dlmalloc_space-inl.h @@ -27,11 +27,13 @@ namespace space { inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* obj; { MutexLock mu(self, lock_); - obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != NULL)) { // Zero freshly allocated memory, done while not holding the space's lock. @@ -49,9 +51,11 @@ inline size_t DlMallocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ return size + kChunkOverhead; } -inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes, - size_t* bytes_allocated, - size_t* usable_size) { +inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked( + Thread* /*self*/, size_t num_bytes, + size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { @@ -61,6 +65,7 @@ inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t allocation_size = AllocationSizeNonvirtual(result, usable_size); DCHECK(bytes_allocated != NULL); *bytes_allocated = allocation_size; + *bytes_tl_bulk_allocated = allocation_size; } return result; } diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index b8a9dd6..225861d 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -123,7 +123,8 @@ void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t ini } mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -131,7 +132,8 @@ mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); mspace_set_footprint_limit(mspace_, max_allowed); // Try the allocation. - result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = mspace_footprint(mspace_); mspace_set_footprint_limit(mspace_, footprint); diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h index 6ce138c..1f80f1f 100644 --- a/runtime/gc/space/dlmalloc_space.h +++ b/runtime/gc/space/dlmalloc_space.h @@ -48,11 +48,15 @@ class DlMallocSpace : public MallocSpace { // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_) { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_) { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Virtual to allow ValgrindMallocSpace to intercept. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { @@ -67,15 +71,22 @@ class DlMallocSpace : public MallocSpace { LOCKS_EXCLUDED(lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return num_bytes; + } + // DlMallocSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } // Faster non-virtual allocation path. mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Faster non-virtual allocation size path. size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size); @@ -134,7 +145,8 @@ class DlMallocSpace : public MallocSpace { private: mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(lock_); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 7523de5..5c8e4b9 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -38,10 +38,11 @@ class ValgrindLargeObjectMapSpace FINAL : public LargeObjectMapSpace { } virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE { mirror::Object* obj = LargeObjectMapSpace::Alloc(self, num_bytes + kValgrindRedZoneBytes * 2, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); mirror::Object* object_without_rdz = reinterpret_cast<mirror::Object*>( reinterpret_cast<uintptr_t>(obj) + kValgrindRedZoneBytes); VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<void*>(obj), kValgrindRedZoneBytes); @@ -108,7 +109,8 @@ LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) { } mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { std::string error_msg; MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", nullptr, num_bytes, PROT_READ | PROT_WRITE, true, false, &error_msg); @@ -131,6 +133,8 @@ mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; num_bytes_allocated_ += allocation_size; total_bytes_allocated_ += allocation_size; ++num_objects_allocated_; @@ -413,7 +417,7 @@ size_t FreeListSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) { } mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { MutexLock mu(self, lock_); const size_t allocation_size = RoundUp(num_bytes, kAlignment); AllocationInfo temp_info; @@ -451,6 +455,8 @@ mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* byt if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; // Need to do these inside of the lock. ++num_objects_allocated_; ++total_objects_allocated_; diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h index 847f575..d1f9386 100644 --- a/runtime/gc/space/large_object_space.h +++ b/runtime/gc/space/large_object_space.h @@ -62,9 +62,11 @@ class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace { } size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // LargeObjectSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } bool IsAllocSpace() const OVERRIDE { return true; @@ -124,7 +126,7 @@ class LargeObjectMapSpace : public LargeObjectSpace { // Return the storage space required by obj. size_t AllocationSize(mirror::Object* obj, size_t* usable_size); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); size_t Free(Thread* self, mirror::Object* ptr); void Walk(DlMallocSpace::WalkCallback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); // TODO: disabling thread safety analysis as this may be called when we already hold lock_. @@ -153,7 +155,7 @@ class FreeListSpace FINAL : public LargeObjectSpace { size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t Free(Thread* self, mirror::Object* obj) OVERRIDE; void Walk(DlMallocSpace::WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); void Dump(std::ostream& os) const; diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc index e17bad8..a261663 100644 --- a/runtime/gc/space/large_object_space_test.cc +++ b/runtime/gc/space/large_object_space_test.cc @@ -49,11 +49,13 @@ void LargeObjectSpaceTest::LargeObjectTest() { while (requests.size() < num_allocations) { size_t request_size = test_rand(&rand_seed) % max_allocation_size; size_t allocation_size = 0; + size_t bytes_tl_bulk_allocated; mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size, - nullptr); + nullptr, &bytes_tl_bulk_allocated); ASSERT_TRUE(obj != nullptr); ASSERT_EQ(allocation_size, los->AllocationSize(obj, nullptr)); ASSERT_GE(allocation_size, request_size); + ASSERT_EQ(allocation_size, bytes_tl_bulk_allocated); // Fill in our magic value. uint8_t magic = (request_size & 0xFF) | 1; memset(obj, magic, request_size); @@ -83,9 +85,10 @@ void LargeObjectSpaceTest::LargeObjectTest() { // Test that dump doesn't crash. los->Dump(LOG(INFO)); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; // Checks that the coalescing works. - mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr); + mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated); EXPECT_TRUE(obj != nullptr); los->Free(Thread::Current(), obj); @@ -102,8 +105,9 @@ class AllocRaceTask : public Task { void Run(Thread* self) { for (size_t i = 0; i < iterations_ ; ++i) { - size_t alloc_size; - mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr); + size_t alloc_size, bytes_tl_bulk_allocated; + mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr, + &bytes_tl_bulk_allocated); NanoSleep((id_ + 3) * 1000); // (3+id) mu s diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h index 06239e5..bbf1bbb 100644 --- a/runtime/gc/space/malloc_space.h +++ b/runtime/gc/space/malloc_space.h @@ -55,10 +55,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { // Allocate num_bytes allowing the underlying space to grow. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) = 0; // Allocate num_bytes without allowing the underlying space to grow. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Return the storage space required by obj. If usable_size isn't nullptr then it is set to the // amount of the storage space that may be used by obj. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0; @@ -67,6 +68,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by MallocSpace::Alloc(). + virtual size_t MaxBytesBulkAllocatedFor(size_t num_bytes) = 0; + #ifndef NDEBUG virtual void CheckMoreCoreForPrecondition() {} // to be overridden in the debug build. #else diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index a4ed718..1cdf69d 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -24,30 +24,36 @@ namespace gc { namespace space { inline mirror::Object* RegionSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); - return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size); + return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } inline mirror::Object* RegionSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } template<bool kForEvac> inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); mirror::Object* obj; if (LIKELY(num_bytes <= kRegionSize)) { // Non-large object. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(evac_region_ != nullptr); - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -55,9 +61,11 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by MutexLock mu(Thread::Current(), region_lock_); // Retry with current region since another thread may have updated it. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -73,7 +81,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by r->Unfree(time_); r->SetNewlyAllocated(); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); current_region_ = r; return obj; @@ -85,7 +93,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by if (r->IsFree()) { r->Unfree(time_); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); evac_region_ = r; return obj; @@ -94,7 +102,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } } else { // Large object. - obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size); + obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (LIKELY(obj != nullptr)) { return obj; } @@ -103,7 +112,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAllocated() && IsInToSpace()); DCHECK(IsAligned<kAlignment>(num_bytes)); Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_); @@ -124,6 +134,7 @@ inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* byte if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(old_top); } @@ -253,7 +264,8 @@ inline mirror::Object* RegionSpace::GetNextObject(mirror::Object* obj) { template<bool kForEvac> mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); DCHECK_GT(num_bytes, kRegionSize); size_t num_regs = RoundUp(num_bytes, kRegionSize) / kRegionSize; @@ -300,6 +312,7 @@ mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocate if (usable_size != nullptr) { *usable_size = num_regs * kRegionSize; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(first_reg->Begin()); } else { // right points to the non-free region. Start with the one after it. diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 8bb73d6..814ab6c 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -76,7 +76,7 @@ RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map) current_region_ = &full_region_; evac_region_ = nullptr; size_t ignored; - DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr) == nullptr); + DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr, &ignored) == nullptr); } size_t RegionSpace::FromSpaceSize() { @@ -356,9 +356,10 @@ bool RegionSpace::AllocNewTlab(Thread* self) { return false; } -void RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), region_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { @@ -377,7 +378,7 @@ void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { thread->SetTlab(nullptr, nullptr); } -void RegionSpace::RevokeAllThreadLocalBuffers() { +size_t RegionSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -385,6 +386,7 @@ void RegionSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void RegionSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index 4160547..b88ce24 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -42,18 +42,20 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); // The main allocation routine. template<bool kForEvac> ALWAYS_INLINE mirror::Object* AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); // Allocate/free large objects (objects that are larger than the region size.) template<bool kForEvac> - mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size); + mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated); void FreeLarge(mirror::Object* large_obj, size_t bytes_allocated); // Return the storage space required by obj. @@ -87,10 +89,10 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void DumpRegions(std::ostream& os); void DumpNonFreeRegions(std::ostream& os); - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(region_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(region_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); @@ -269,7 +271,8 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { } ALWAYS_INLINE mirror::Object* Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); bool IsFree() const { bool is_free = state_ == RegionState::kRegionStateFree; diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h index 5d6642d..9d582a3 100644 --- a/runtime/gc/space/rosalloc_space-inl.h +++ b/runtime/gc/space/rosalloc_space-inl.h @@ -26,13 +26,19 @@ namespace art { namespace gc { namespace space { +template<bool kMaybeRunningOnValgrind> inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) { // obj is a valid object. Use its class in the header to get the size. // Don't use verification since the object may be dead if we are sweeping. size_t size = obj->SizeOf<kVerifyNone>(); - bool running_on_valgrind = RUNNING_ON_VALGRIND != 0; - if (running_on_valgrind) { - size += 2 * kDefaultValgrindRedZoneBytes; + bool running_on_valgrind = false; + if (kMaybeRunningOnValgrind) { + running_on_valgrind = RUNNING_ON_VALGRIND != 0; + if (running_on_valgrind) { + size += 2 * kDefaultValgrindRedZoneBytes; + } + } else { + DCHECK_EQ(RUNNING_ON_VALGRIND, 0U); } size_t size_by_size = rosalloc_->UsableSize(size); if (kIsDebugBuild) { @@ -55,28 +61,50 @@ inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ template<bool kThreadSafe> inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { - size_t rosalloc_size = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + size_t rosalloc_bytes_allocated = 0; + size_t rosalloc_usable_size = 0; + size_t rosalloc_bytes_tl_bulk_allocated = 0; if (!kThreadSafe) { Locks::mutator_lock_->AssertExclusiveHeld(self); } mirror::Object* result = reinterpret_cast<mirror::Object*>( - rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_size)); + rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_bytes_allocated, + &rosalloc_usable_size, + &rosalloc_bytes_tl_bulk_allocated)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result) << ") not in bounds of allocation space " << *this; } DCHECK(bytes_allocated != NULL); - *bytes_allocated = rosalloc_size; - DCHECK_EQ(rosalloc_size, rosalloc_->UsableSize(result)); + *bytes_allocated = rosalloc_bytes_allocated; + DCHECK_EQ(rosalloc_usable_size, rosalloc_->UsableSize(result)); if (usable_size != nullptr) { - *usable_size = rosalloc_size; + *usable_size = rosalloc_usable_size; } + DCHECK(bytes_tl_bulk_allocated != NULL); + *bytes_tl_bulk_allocated = rosalloc_bytes_tl_bulk_allocated; } return result; } +inline bool RosAllocSpace::CanAllocThreadLocal(Thread* self, size_t num_bytes) { + return rosalloc_->CanAllocFromThreadLocalRun(self, num_bytes); +} + +inline mirror::Object* RosAllocSpace::AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + return reinterpret_cast<mirror::Object*>( + rosalloc_->AllocFromThreadLocalRun(self, num_bytes, bytes_allocated)); +} + +inline size_t RosAllocSpace::MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes) { + return rosalloc_->MaxBytesBulkAllocatedFor(num_bytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index ced25a4..f140021 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -154,7 +154,8 @@ allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_ } mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -162,7 +163,8 @@ mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); rosalloc_->SetFootprintLimit(max_allowed); // Try the allocation. - result = AllocCommon(self, num_bytes, bytes_allocated, usable_size); + result = AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = rosalloc_->Footprint(); rosalloc_->SetFootprintLimit(footprint); @@ -209,7 +211,7 @@ size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** p __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + kPrefetchLookAhead])); } if (kVerifyFreedBytes) { - verify_bytes += AllocationSizeNonvirtual(ptrs[i], nullptr); + verify_bytes += AllocationSizeNonvirtual<true>(ptrs[i], nullptr); } } @@ -338,12 +340,12 @@ void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, } } -void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { - rosalloc_->RevokeThreadLocalRuns(thread); +size_t RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { + return rosalloc_->RevokeThreadLocalRuns(thread); } -void RosAllocSpace::RevokeAllThreadLocalBuffers() { - rosalloc_->RevokeAllThreadLocalRuns(); +size_t RosAllocSpace::RevokeAllThreadLocalBuffers() { + return rosalloc_->RevokeAllThreadLocalRuns(); } void RosAllocSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h index c856e95..36268f7 100644 --- a/runtime/gc/space/rosalloc_space.h +++ b/runtime/gc/space/rosalloc_space.h @@ -47,18 +47,21 @@ class RosAllocSpace : public MallocSpace { bool low_memory_mode, bool can_move_objects); mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size); + return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { - return AllocationSizeNonvirtual(obj, usable_size); + return AllocationSizeNonvirtual<true>(obj, usable_size); } size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -66,17 +69,33 @@ class RosAllocSpace : public MallocSpace { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. - return AllocCommon(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocNonvirtualThreadUnsafe(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. Pass in false for thread unsafe. - return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocThreadLocal(Thread* self, size_t num_bytes); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE mirror::Object* AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return MaxBytesBulkAllocatedForNonvirtual(num_bytes); + } + ALWAYS_INLINE size_t MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes); + // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held. + template<bool kMaybeRunningOnValgrind> size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) NO_THREAD_SAFETY_ANALYSIS; @@ -99,8 +118,8 @@ class RosAllocSpace : public MallocSpace { uint64_t GetBytesAllocated() OVERRIDE; uint64_t GetObjectsAllocated() OVERRIDE; - void RevokeThreadLocalBuffers(Thread* thread); - void RevokeAllThreadLocalBuffers(); + size_t RevokeThreadLocalBuffers(Thread* thread); + size_t RevokeAllThreadLocalBuffers(); void AssertThreadLocalBuffersAreRevoked(Thread* thread); void AssertAllThreadLocalBuffersAreRevoked(); @@ -134,7 +153,7 @@ class RosAllocSpace : public MallocSpace { private: template<bool kThreadSafe = true> mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, size_t maximum_size, bool low_memory_mode) OVERRIDE { diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index d24650b..f2378d9 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -203,14 +203,24 @@ class AllocSpace { // succeeds, the output parameter bytes_allocated will be set to the // actually allocated bytes which is >= num_bytes. // Alloc can be called from multiple threads at the same time and must be thread-safe. + // + // bytes_tl_bulk_allocated - bytes allocated in bulk ahead of time for a thread local allocation, + // if applicable. It can be + // 1) equal to bytes_allocated if it's not a thread local allocation, + // 2) greater than bytes_allocated if it's a thread local + // allocation that required a new buffer, or + // 3) zero if it's a thread local allocation in an existing + // buffer. + // This is what is to be added to Heap::num_bytes_allocated_. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. virtual mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } // Return the storage space required by obj. @@ -224,11 +234,15 @@ class AllocSpace { // Revoke any sort of thread-local buffers that are used to speed up allocations for the given // thread, if the alloc space implementation uses any. - virtual void RevokeThreadLocalBuffers(Thread* thread) = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeThreadLocalBuffers(Thread* thread) = 0; // Revoke any sort of thread-local buffers that are used to speed up allocations for all the // threads, if the alloc space implementation uses any. - virtual void RevokeAllThreadLocalBuffers() = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeAllThreadLocalBuffers() = 0; virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0; diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 09d10dd..3e9e9f7 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -61,11 +61,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -73,11 +75,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -182,34 +186,38 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { ScopedObjectAccess soa(self); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -219,13 +227,15 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -233,7 +243,7 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(1U * MB, free1); // Make sure that the zygote space isn't directly at the start of the space. - EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr) != nullptr); + EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr); gc::Heap* heap = Runtime::Current()->GetHeap(); space::Space* old_space = space; @@ -250,22 +260,26 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { AddSpace(space, false); // Succeeds, fits without adjusting the footprint limit. - ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size)); + ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size)); + ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(2U * MB, ptr3_bytes_allocated); EXPECT_LE(2U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); space->Free(self, ptr3.Assign(nullptr)); // Final clean up. @@ -285,34 +299,38 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { AddSpace(space); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -322,13 +340,15 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -348,14 +368,16 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits without adjusting the max allowed footprint. mirror::Object* lots_of_objects[1024]; for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray(); lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -363,12 +385,15 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits by adjusting the max allowed footprint. for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; - lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size); + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; + lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size, + &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -425,10 +450,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t StackHandleScope<1> hs(soa.Self()); auto object(hs.NewHandle<mirror::Object>(nullptr)); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { - object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } footprint = space->GetFootprint(); EXPECT_GE(space->Size(), footprint); // invariant @@ -441,6 +469,8 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t } else { EXPECT_GE(allocation_size, 8u); } + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); amount_allocated += allocation_size; break; } @@ -518,11 +548,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t auto large_object(hs.NewHandle<mirror::Object>(nullptr)); size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr)); + large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { large_object.Assign(AllocWithGrowth(space, self, three_quarters_space, &bytes_allocated, - nullptr)); + nullptr, &bytes_tl_bulk_allocated)); } EXPECT_TRUE(large_object.Get() != nullptr); diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h index ae8e892..bc329e1 100644 --- a/runtime/gc/space/valgrind_malloc_space-inl.h +++ b/runtime/gc/space/valgrind_malloc_space-inl.h @@ -32,10 +32,15 @@ namespace valgrind_details { template <size_t kValgrindRedZoneBytes, bool kUseObjSizeForUsable> inline mirror::Object* AdjustForValgrind(void* obj_with_rdz, size_t num_bytes, size_t bytes_allocated, size_t usable_size, - size_t* bytes_allocated_out, size_t* usable_size_out) { + size_t bytes_tl_bulk_allocated, + size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { if (bytes_allocated_out != nullptr) { *bytes_allocated_out = bytes_allocated; } + if (bytes_tl_bulk_allocated_out != nullptr) { + *bytes_tl_bulk_allocated_out = bytes_tl_bulk_allocated; + } // This cuts over-provision and is a trade-off between testing the over-provisioning code paths // vs checking overflows in the regular paths. @@ -82,20 +87,25 @@ ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocWithGrowth( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -106,11 +116,13 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::Alloc( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } @@ -118,8 +130,10 @@ mirror::Object* ValgrindMallocSpace<S, return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>(obj_with_rdz, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated, bytes_allocated_out, - usable_size_out); + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -130,20 +144,25 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocThreadUnsafe( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocThreadUnsafe(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -226,6 +245,17 @@ ValgrindMallocSpace<S, mem_map->Size() - initial_size); } +template <typename S, + size_t kValgrindRedZoneBytes, + bool kAdjustForRedzoneInAllocSize, + bool kUseObjSizeForUsable> +size_t ValgrindMallocSpace<S, + kValgrindRedZoneBytes, + kAdjustForRedzoneInAllocSize, + kUseObjSizeForUsable>::MaxBytesBulkAllocatedFor(size_t num_bytes) { + return S::MaxBytesBulkAllocatedFor(num_bytes + 2 * kValgrindRedZoneBytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h index 707ea69..a6b010a 100644 --- a/runtime/gc/space/valgrind_malloc_space.h +++ b/runtime/gc/space/valgrind_malloc_space.h @@ -34,12 +34,13 @@ template <typename BaseMallocSpaceType, class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { public: mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE; mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE - EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -53,6 +54,8 @@ class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { UNUSED(ptr); } + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE; + template <typename... Params> explicit ValgrindMallocSpace(MemMap* mem_map, size_t initial_size, Params... params); virtual ~ValgrindMallocSpace() {} diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc index a868e68..9e882a8 100644 --- a/runtime/gc/space/zygote_space.cc +++ b/runtime/gc/space/zygote_space.cc @@ -77,7 +77,7 @@ void ZygoteSpace::Dump(std::ostream& os) const { << ",name=\"" << GetName() << "\"]"; } -mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*) { +mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*, size_t*) { UNIMPLEMENTED(FATAL); UNREACHABLE(); } diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h index 0cf4bb1..934a234 100644 --- a/runtime/gc/space/zygote_space.h +++ b/runtime/gc/space/zygote_space.h @@ -46,7 +46,7 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { } mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -55,9 +55,11 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // ZygoteSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } uint64_t GetBytesAllocated() { |