diff options
-rw-r--r-- | runtime/base/mutex.cc | 22 | ||||
-rw-r--r-- | runtime/gc/accounting/space_bitmap.cc | 14 | ||||
-rw-r--r-- | runtime/gc/accounting/space_bitmap.h | 3 | ||||
-rw-r--r-- | runtime/gc/allocator/rosalloc-inl.h | 2 | ||||
-rw-r--r-- | runtime/gc/allocator/rosalloc.cc | 651 | ||||
-rw-r--r-- | runtime/gc/allocator/rosalloc.h | 68 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 41 | ||||
-rw-r--r-- | runtime/gc/heap.h | 10 | ||||
-rw-r--r-- | runtime/gc/space/rosalloc_space.cc | 2 | ||||
-rw-r--r-- | runtime/gc/space/space_test.h | 17 | ||||
-rw-r--r-- | runtime/thread.cc | 3 |
11 files changed, 451 insertions, 382 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index fdd0249..2bc17bf 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -206,16 +206,16 @@ void BaseMutex::DumpContention(std::ostream& os) const { os << "never contended"; } else { os << "contended " << contention_count - << " times, average wait of contender " << PrettyDuration(wait_time / contention_count); + << " total wait of contender " << PrettyDuration(wait_time) + << " average " << PrettyDuration(wait_time / contention_count); SafeMap<uint64_t, size_t> most_common_blocker; SafeMap<uint64_t, size_t> most_common_blocked; - typedef SafeMap<uint64_t, size_t>::const_iterator It; for (size_t i = 0; i < kContentionLogSize; ++i) { uint64_t blocked_tid = log[i].blocked_tid; uint64_t owner_tid = log[i].owner_tid; uint32_t count = log[i].count; if (count > 0) { - It it = most_common_blocked.find(blocked_tid); + auto it = most_common_blocked.find(blocked_tid); if (it != most_common_blocked.end()) { most_common_blocked.Overwrite(blocked_tid, it->second + count); } else { @@ -231,10 +231,10 @@ void BaseMutex::DumpContention(std::ostream& os) const { } uint64_t max_tid = 0; size_t max_tid_count = 0; - for (It it = most_common_blocked.begin(); it != most_common_blocked.end(); ++it) { - if (it->second > max_tid_count) { - max_tid = it->first; - max_tid_count = it->second; + for (const auto& pair : most_common_blocked) { + if (pair.second > max_tid_count) { + max_tid = pair.first; + max_tid_count = pair.second; } } if (max_tid != 0) { @@ -242,10 +242,10 @@ void BaseMutex::DumpContention(std::ostream& os) const { } max_tid = 0; max_tid_count = 0; - for (It it = most_common_blocker.begin(); it != most_common_blocker.end(); ++it) { - if (it->second > max_tid_count) { - max_tid = it->first; - max_tid_count = it->second; + for (const auto& pair : most_common_blocker) { + if (pair.second > max_tid_count) { + max_tid = pair.first; + max_tid_count = pair.second; } } if (max_tid != 0) { diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index 31a1537..66f9a3a 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -21,13 +21,17 @@ namespace gc { namespace accounting { template<size_t kAlignment> +size_t SpaceBitmap<kAlignment>::ComputeBitmapSize(uint64_t capacity) { + const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord; + return (RoundUp(capacity, kBytesCoveredPerWord) / kBytesCoveredPerWord) * kWordSize; +} + +template<size_t kAlignment> SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap( const std::string& name, MemMap* mem_map, byte* heap_begin, size_t heap_capacity) { CHECK(mem_map != nullptr); uword* bitmap_begin = reinterpret_cast<uword*>(mem_map->Begin()); - const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord; - size_t bitmap_size = (RoundUp(static_cast<uint64_t>(heap_capacity), kBytesCoveredPerWord) / - kBytesCoveredPerWord) * kWordSize; + const size_t bitmap_size = ComputeBitmapSize(heap_capacity); return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin); } @@ -45,9 +49,7 @@ template<size_t kAlignment> SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::Create( const std::string& name, byte* heap_begin, size_t heap_capacity) { // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord. - const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord; - size_t bitmap_size = (RoundUp(static_cast<uint64_t>(heap_capacity), kBytesCoveredPerWord) / - kBytesCoveredPerWord) * kWordSize; + const size_t bitmap_size = ComputeBitmapSize(heap_capacity); std::string error_msg; UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size, PROT_READ | PROT_WRITE, false, &error_msg)); diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index df3fd37..5c7cce2 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -200,6 +200,9 @@ class SpaceBitmap { SpaceBitmap(const std::string& name, MemMap* mem_map, uword* bitmap_begin, size_t bitmap_size, const void* heap_begin); + // Helper function for computing bitmap size based on a 64 bit capacity. + static size_t ComputeBitmapSize(uint64_t capacity); + template<bool kSetBit> bool Modify(const mirror::Object* obj); diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index f395314..ac0f67b 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -29,7 +29,7 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by } void* m = AllocFromRun(self, size, bytes_allocated); // Check if the returned memory is really all zero. - if (kCheckZeroMemory && m != NULL) { + if (kCheckZeroMemory && m != nullptr) { byte* bytes = reinterpret_cast<byte*>(m); for (size_t i = 0; i < size; ++i) { DCHECK_EQ(bytes[i], 0); diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index 0f2d6a9..821aa2d 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -32,6 +32,10 @@ namespace allocator { extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment); +static constexpr bool kUsePrefetchDuringAllocRun = true; +static constexpr bool kPrefetchNewRunDataByZeroing = false; +static constexpr size_t kPrefetchStride = 64; + size_t RosAlloc::bracketSizes[kNumOfSizeBrackets]; size_t RosAlloc::numOfPages[kNumOfSizeBrackets]; size_t RosAlloc::numOfSlots[kNumOfSizeBrackets]; @@ -39,6 +43,9 @@ size_t RosAlloc::headerSizes[kNumOfSizeBrackets]; size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets]; size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets]; bool RosAlloc::initialized_ = false; +size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 }; +RosAlloc::Run* RosAlloc::dedicated_full_run_ = + reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_); RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity, PageReleaseMode page_release_mode, size_t page_release_size_threshold) @@ -62,8 +69,9 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity, << ", max_capacity=" << std::dec << max_capacity_; memset(current_runs_, 0, sizeof(current_runs_)); for (size_t i = 0; i < kNumOfSizeBrackets; i++) { - size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock", - kRosAllocBracketLock); + size_bracket_lock_names[i] = + StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i)); + size_bracket_locks_[i] = new Mutex(size_bracket_lock_names[i].c_str(), kRosAllocBracketLock); } DCHECK_EQ(footprint_, capacity_); size_t num_of_pages = footprint_ / kPageSize; @@ -71,7 +79,7 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity, std::string error_msg; page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize), PROT_READ | PROT_WRITE, false, &error_msg)); - CHECK(page_map_mem_map_.get() != NULL) << "Couldn't allocate the page map : " << error_msg; + CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg; page_map_ = page_map_mem_map_->Begin(); page_map_size_ = num_of_pages; max_page_map_size_ = max_num_of_pages; @@ -103,7 +111,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) { lock_.AssertHeld(self); DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject); FreePageRun* res = NULL; - size_t req_byte_size = num_pages * kPageSize; + const size_t req_byte_size = num_pages * kPageSize; // Find the lowest address free page run that's large enough. for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) { FreePageRun* fpr = *it; @@ -260,8 +268,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) { break; } if (kIsDebugBuild) { - // Clear the first page which isn't madvised away in the debug - // build for the magic number. + // Clear the first page since it is not madvised due to the magic number. memset(res, 0, kPageSize); } if (kTraceRosAlloc) { @@ -279,7 +286,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) { return nullptr; } -size_t RosAlloc::FreePages(Thread* self, void* ptr) { +size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) { lock_.AssertHeld(self); size_t pm_idx = ToPageMapIndex(ptr); DCHECK_LT(pm_idx, page_map_size_); @@ -310,10 +317,21 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) { num_pages++; idx++; } + const size_t byte_size = num_pages * kPageSize; + if (already_zero) { + if (kCheckZeroMemory) { + const uword* word_ptr = reinterpret_cast<uword*>(ptr); + for (size_t i = 0; i < byte_size / sizeof(uword); ++i) { + CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i; + } + } + } else if (!DoesReleaseAllPages()) { + memset(ptr, 0, byte_size); + } if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr) - << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize) + << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size) << "(" << std::dec << (num_pages * kPageSize) << ")"; } @@ -322,8 +340,8 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) { if (kIsDebugBuild) { fpr->magic_num_ = kMagicNumFree; } - fpr->SetByteSize(this, num_pages * kPageSize); - DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0)); + fpr->SetByteSize(this, byte_size); + DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this))); DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end()); if (!free_page_runs_.empty()) { @@ -349,6 +367,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) { if (kTraceRosAlloc) { LOG(INFO) << "Success"; } + // Clear magic num since this is no longer the start of a free page run. + if (kIsDebugBuild) { + h->magic_num_ = 0; + } free_page_runs_.erase(it++); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex @@ -395,6 +417,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) { } l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this)); DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0)); + // Clear magic num since this is no longer the start of a free page run. + if (kIsDebugBuild) { + fpr->magic_num_ = 0; + } fpr = l; } else { // Not adjacent. Stop. @@ -422,7 +448,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) { LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr) << " into free_page_runs_"; } - return num_pages; + return byte_size; } void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) { @@ -439,23 +465,19 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca } return nullptr; } - if (bytes_allocated != NULL) { - *bytes_allocated = num_pages * kPageSize; - } + const size_t total_bytes = num_pages * kPageSize; + *bytes_allocated = total_bytes; if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r) << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize) << "(" << std::dec << (num_pages * kPageSize) << ")"; } - if (!DoesReleaseAllPages()) { - // If it does not release all pages, pages may not be zeroed out. - memset(r, 0, size); - } // Check if the returned memory is really all zero. if (kCheckZeroMemory) { - byte* bytes = reinterpret_cast<byte*>(r); - for (size_t i = 0; i < size; ++i) { - DCHECK_EQ(bytes[i], 0); + CHECK_EQ(total_bytes % sizeof(uword), 0U); + const uword* words = reinterpret_cast<uword*>(r); + for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) { + CHECK_EQ(words[i], 0U); } } return r; @@ -479,7 +501,7 @@ size_t RosAlloc::FreeInternal(Thread* self, void* ptr) { LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx]; return 0; case kPageMapLargeObject: - return FreePages(self, ptr) * kPageSize; + return FreePages(self, ptr, false); case kPageMapLargeObjectPart: LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx]; return 0; @@ -503,9 +525,7 @@ size_t RosAlloc::FreeInternal(Thread* self, void* ptr) { } } DCHECK(run != nullptr); - const size_t size = IndexToBracketSize(run->size_bracket_idx_); - FreeFromRun(self, ptr, run); - return size; + return FreeFromRun(self, ptr, run); } size_t RosAlloc::Free(Thread* self, void* ptr) { @@ -513,42 +533,57 @@ size_t RosAlloc::Free(Thread* self, void* ptr) { return FreeInternal(self, ptr); } -RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) { - Run* new_run; - size_t num_pages = numOfPages[idx]; - // Get the lowest address non-full run from the binary tree. - Run* temp = NULL; - std::set<Run*>* bt = &non_full_runs_[idx]; - std::set<Run*>::iterator found = bt->lower_bound(temp); - if (found != bt->end()) { - // If there's one, use it as the current run. - Run* non_full_run = *found; - DCHECK(non_full_run != NULL); - new_run = non_full_run; - DCHECK_EQ(new_run->is_thread_local_, 0); - bt->erase(found); - DCHECK_EQ(non_full_run->is_thread_local_, 0); - } else { - // If there's none, allocate a new run and use it as the - // current run. - { - MutexLock mu(self, lock_); - new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun)); - } - if (new_run == NULL) { - return NULL; - } +RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) { + RosAlloc::Run* new_run = nullptr; + { + MutexLock mu(self, lock_); + new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun)); + } + if (LIKELY(new_run != nullptr)) { if (kIsDebugBuild) { new_run->magic_num_ = kMagicNum; } new_run->size_bracket_idx_ = idx; - new_run->top_slot_idx_ = 0; - new_run->ClearBitMaps(); - new_run->to_be_bulk_freed_ = false; + new_run->SetAllocBitMapBitsForInvalidSlots(); + DCHECK(!new_run->IsThreadLocal()); + DCHECK_EQ(new_run->first_search_vec_idx_, 0U); + DCHECK(!new_run->to_be_bulk_freed_); + if (kUsePrefetchDuringAllocRun && idx <= kMaxThreadLocalSizeBracketIdx) { + // Take ownership of the cache lines if we are likely to be thread local run. + if (kPrefetchNewRunDataByZeroing) { + // Zeroing the data is sometimes faster than prefetching but it increases memory usage + // since we end up dirtying zero pages which may have been madvised. + new_run->ZeroData(); + } else { + const size_t num_of_slots = numOfSlots[idx]; + const size_t bracket_size = bracketSizes[idx]; + const size_t num_of_bytes = num_of_slots * bracket_size; + byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx]; + for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) { + __builtin_prefetch(begin + i); + } + } + } } return new_run; } +RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) { + // Get the lowest address non-full run from the binary tree. + std::set<Run*>* const bt = &non_full_runs_[idx]; + if (!bt->empty()) { + // If there's one, use it as the current run. + auto it = bt->begin(); + Run* non_full_run = *it; + DCHECK(non_full_run != nullptr); + DCHECK(!non_full_run->IsThreadLocal()); + bt->erase(it); + return non_full_run; + } + // If there's none, allocate a new run and use it as the current run. + return AllocRun(self, idx); +} + void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) { DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; @@ -564,66 +599,62 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) { // Use a thread-local run. Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); - if (UNLIKELY(thread_local_run == NULL)) { - MutexLock mu(self, *size_bracket_locks_[idx]); - thread_local_run = RefillRun(self, idx); - if (UNLIKELY(thread_local_run == NULL)) { - return NULL; - } - DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); - DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); - thread_local_run->is_thread_local_ = 1; - self->SetRosAllocRun(idx, thread_local_run); - DCHECK(!thread_local_run->IsFull()); - } - - DCHECK(thread_local_run != NULL); - DCHECK_NE(thread_local_run->is_thread_local_, 0); + // Allow invalid since this will always fail the allocation. + DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); slot_addr = thread_local_run->AllocSlot(); - - if (UNLIKELY(slot_addr == NULL)) { + // The allocation must fail if the run is invalid. + DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr) + << "allocated from an invalid run"; + if (UNLIKELY(slot_addr == nullptr)) { // The run got full. Try to free slots. DCHECK(thread_local_run->IsFull()); MutexLock mu(self, *size_bracket_locks_[idx]); bool is_all_free_after_merge; + // This is safe to do for the dedicated_full_run_ since the bitmaps are empty. if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) { + DCHECK_NE(thread_local_run, dedicated_full_run_); // Some slot got freed. Keep it. DCHECK(!thread_local_run->IsFull()); DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree()); if (is_all_free_after_merge) { - // Reinstate the bump index mode if it's all free. - DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]); - thread_local_run->top_slot_idx_ = 0; + // Check that the bitmap idx is back at 0 if it's all free. + DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U); } } else { // No slots got freed. Try to refill the thread-local run. DCHECK(thread_local_run->IsFull()); - self->SetRosAllocRun(idx, nullptr); - thread_local_run->is_thread_local_ = 0; - if (kIsDebugBuild) { - full_runs_[idx].insert(thread_local_run); - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex - << reinterpret_cast<intptr_t>(thread_local_run) - << " into full_runs_[" << std::dec << idx << "]"; + if (thread_local_run != dedicated_full_run_) { + self->SetRosAllocRun(idx, dedicated_full_run_); + thread_local_run->SetIsThreadLocal(false); + if (kIsDebugBuild) { + full_runs_[idx].insert(thread_local_run); + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex + << reinterpret_cast<intptr_t>(thread_local_run) + << " into full_runs_[" << std::dec << idx << "]"; + } } + DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end()); } - DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); - DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end()); + thread_local_run = RefillRun(self, idx); if (UNLIKELY(thread_local_run == NULL)) { return NULL; } DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); - thread_local_run->is_thread_local_ = 1; + thread_local_run->SetIsThreadLocal(true); self->SetRosAllocRun(idx, thread_local_run); DCHECK(!thread_local_run->IsFull()); } DCHECK(thread_local_run != NULL); DCHECK(!thread_local_run->IsFull()); - DCHECK_NE(thread_local_run->is_thread_local_, 0); + DCHECK(thread_local_run->IsThreadLocal()); slot_addr = thread_local_run->AllocSlot(); // Must succeed now with a new run. DCHECK(slot_addr != NULL); @@ -644,7 +675,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) } DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end()); - current_run->is_thread_local_ = 0; + current_run->SetIsThreadLocal(false); current_runs_[idx] = current_run; DCHECK(!current_run->IsFull()); } @@ -671,7 +702,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) DCHECK(current_run != NULL); DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end()); - current_run->is_thread_local_ = 0; + current_run->SetIsThreadLocal(false); current_runs_[idx] = current_run; DCHECK(!current_run->IsFull()); slot_addr = current_run->AllocSlot(); @@ -684,27 +715,27 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) << "(" << std::dec << (bracket_size) << ")"; } } - if (LIKELY(bytes_allocated != NULL)) { - *bytes_allocated = bracket_size; - } - memset(slot_addr, 0, size); + DCHECK(bytes_allocated != nullptr); + *bytes_allocated = bracket_size; + // Caller verifies that it is all 0. return slot_addr; } -void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { +size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { DCHECK_EQ(run->magic_num_, kMagicNum); DCHECK_LT(run, ptr); DCHECK_LT(ptr, run->End()); - size_t idx = run->size_bracket_idx_; - MutexLock mu(self, *size_bracket_locks_[idx]); + const size_t idx = run->size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; bool run_was_full = false; + MutexLock mu(self, *size_bracket_locks_[idx]); if (kIsDebugBuild) { run_was_full = run->IsFull(); } if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr); } - if (LIKELY(run->is_thread_local_ != 0)) { + if (LIKELY(run->IsThreadLocal())) { // It's a thread-local run. Just mark the thread-local free bit map and return. DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); @@ -715,7 +746,7 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { << reinterpret_cast<intptr_t>(run); } // A thread local run will be kept as a thread local even if it's become all free. - return; + return bracket_size; } // Free the slot in the run. run->FreeSlot(ptr); @@ -735,9 +766,10 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { } DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); + run->ZeroHeader(); { MutexLock mu(self, lock_); - FreePages(self, run); + FreePages(self, run, true); } } else { // It is not completely free. If it wasn't the current run or @@ -767,6 +799,7 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { } } } + return bracket_size; } std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) { @@ -792,7 +825,7 @@ std::string RosAlloc::Run::Dump() { << " size_bracket_idx=" << idx << " is_thread_local=" << static_cast<int>(is_thread_local_) << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_) - << " top_slot_idx=" << top_slot_idx_ + << " first_search_vec_idx=" << first_search_vec_idx_ << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec) << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec) << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec) @@ -800,64 +833,52 @@ std::string RosAlloc::Run::Dump() { return stream.str(); } -void* RosAlloc::Run::AllocSlot() { - size_t idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - DCHECK_LE(top_slot_idx_, num_slots); - if (LIKELY(top_slot_idx_ < num_slots)) { - // If it's in bump index mode, grab the top slot and increment the top index. - size_t slot_idx = top_slot_idx_; - byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; - } - top_slot_idx_++; - size_t vec_idx = slot_idx / 32; - size_t vec_off = slot_idx % 32; - uint32_t* vec = &alloc_bit_map_[vec_idx]; - DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); - *vec |= 1 << vec_off; - DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); - return slot_addr; - } - // Not in bump index mode. Search the alloc bit map for an empty slot. - size_t num_vec = RoundUp(num_slots, 32) / 32; - size_t slot_idx = 0; - bool found_slot = false; - for (size_t v = 0; v < num_vec; v++) { - uint32_t *vecp = &alloc_bit_map_[v]; - uint32_t ffz1 = __builtin_ffs(~*vecp); - uint32_t ffz; - // TODO: Use LIKELY or UNLIKELY here? - if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) { +inline void* RosAlloc::Run::AllocSlot() { + const size_t idx = size_bracket_idx_; + while (true) { + if (kIsDebugBuild) { + // Make sure that no slots leaked, the bitmap should be full for all previous vectors. + for (size_t i = 0; i < first_search_vec_idx_; ++i) { + CHECK_EQ(~alloc_bit_map_[i], 0U); + } + } + uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; + uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); + if (LIKELY(ffz1 != 0)) { + const uint32_t ffz = ffz1 - 1; + const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; + const uint32_t mask = 1U << ffz; + DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; // Found an empty slot. Set the bit. - DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0)); - *vecp |= (1 << ffz); - DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0)); - slot_idx = ffz + v * 32; - found_slot = true; - break; + DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); + *alloc_bitmap_ptr |= mask; + DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); + byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; + } + return slot_addr; } - } - if (LIKELY(found_slot)) { - byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; + const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; + if (first_search_vec_idx_ + 1 >= num_words) { + DCHECK(IsFull()); + // Already at the last word, return null. + return nullptr; } - return slot_addr; + // Increase the index to the next word and try again. + ++first_search_vec_idx_; } - return NULL; } -inline void RosAlloc::Run::FreeSlot(void* ptr) { - DCHECK_EQ(is_thread_local_, 0); - byte idx = size_bracket_idx_; - size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr) +void RosAlloc::Run::FreeSlot(void* ptr) { + DCHECK(!IsThreadLocal()); + const byte idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr) - (reinterpret_cast<byte*>(this) + headerSizes[idx]); - DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0)); - size_t slot_idx = offset_from_slot_base / bracketSizes[idx]; + DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); + size_t slot_idx = offset_from_slot_base / bracket_size; DCHECK_LT(slot_idx, numOfSlots[idx]); size_t vec_idx = slot_idx / 32; if (kIsDebugBuild) { @@ -866,9 +887,14 @@ inline void RosAlloc::Run::FreeSlot(void* ptr) { } size_t vec_off = slot_idx % 32; uint32_t* vec = &alloc_bit_map_[vec_idx]; - DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); - *vec &= ~(1 << vec_off); - DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); + first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx)); + const uint32_t mask = 1U << vec_off; + DCHECK_NE(*vec & mask, 0U); + *vec &= ~mask; + DCHECK_EQ(*vec & mask, 0U); + // Zero out the memory. + // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16. + memset(ptr, 0, bracket_size); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr) << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; @@ -876,11 +902,11 @@ inline void RosAlloc::Run::FreeSlot(void* ptr) { } inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) { - DCHECK_NE(is_thread_local_, 0); + DCHECK(IsThreadLocal()); // Free slots in the alloc bit map based on the thread local free bit map. - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; + const size_t idx = size_bracket_idx_; + const size_t num_of_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_of_slots, 32) / 32; bool changed = false; uint32_t* vecp = &alloc_bit_map_[0]; uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0]; @@ -890,6 +916,7 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_ uint32_t vec_before = *vecp; uint32_t vec_after; if (tl_free_vec != 0) { + first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v)); vec_after = vec_before & ~tl_free_vec; *vecp = vec_after; changed = true; @@ -898,7 +925,13 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_ vec_after = vec_before; } if (vec_after != 0) { - is_all_free_after = false; + if (v == num_vec - 1) { + // Only not all free if a bit other than the mask bits are set. + is_all_free_after = + is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after; + } else { + is_all_free_after = false; + } } DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0)); } @@ -909,16 +942,15 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_ } inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() { - DCHECK_EQ(is_thread_local_, 0); + DCHECK(!IsThreadLocal()); // Free slots in the alloc bit map based on the bulk free bit map. - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; + const size_t num_vec = NumberOfBitmapVectors(); uint32_t* vecp = &alloc_bit_map_[0]; uint32_t* free_vecp = &BulkFreeBitMap()[0]; for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) { uint32_t free_vec = *free_vecp; if (free_vec != 0) { + first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v)); *vecp &= ~free_vec; *free_vecp = 0; // clear the bulk free bit map. } @@ -927,11 +959,9 @@ inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() { } inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() { - DCHECK_NE(is_thread_local_, 0); + DCHECK(IsThreadLocal()); // Union the thread local bit map with the bulk free bit map. - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; + size_t num_vec = NumberOfBitmapVectors(); uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0]; uint32_t* from_vecp = &BulkFreeBitMap()[0]; for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) { @@ -945,66 +975,71 @@ inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() { } inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) { - DCHECK_NE(is_thread_local_, 0); + DCHECK(IsThreadLocal()); MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap"); } -inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) { - MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap"); +inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) { + return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap"); } -inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, - const char* caller_name) { - byte idx = size_bracket_idx_; - size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr) +inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, + const char* caller_name) { + const byte idx = size_bracket_idx_; + const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr) - (reinterpret_cast<byte*>(this) + headerSizes[idx]); - DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0)); - size_t slot_idx = offset_from_slot_base / bracketSizes[idx]; + const size_t bracket_size = bracketSizes[idx]; + memset(ptr, 0, bracket_size); + DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); + size_t slot_idx = offset_from_slot_base / bracket_size; DCHECK_LT(slot_idx, numOfSlots[idx]); size_t vec_idx = slot_idx / 32; if (kIsDebugBuild) { - size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32; + size_t num_vec = NumberOfBitmapVectors(); DCHECK_LT(vec_idx, num_vec); } size_t vec_off = slot_idx % 32; uint32_t* vec = &free_bit_map_base[vec_idx]; - DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); - *vec |= 1 << vec_off; - DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0)); + const uint32_t mask = 1U << vec_off; + DCHECK_EQ(*vec & mask, 0U); + *vec |= mask; + DCHECK_NE(*vec & mask, 0U); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr) << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; } + return bracket_size; +} + +inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) { + const size_t kBitsPerVec = 32; + DCHECK_GE(num_slots * kBitsPerVec, num_vec); + size_t remain = num_vec * kBitsPerVec - num_slots; + DCHECK_NE(remain, kBitsPerVec); + return ((1U << remain) - 1) << (kBitsPerVec - remain); } inline bool RosAlloc::Run::IsAllFree() { - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; - for (size_t v = 0; v < num_vec; v++) { + const byte idx = size_bracket_idx_; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = NumberOfBitmapVectors(); + DCHECK_NE(num_vec, 0U); + // Check the last vector after the loop since it uses a special case for the masked bits. + for (size_t v = 0; v < num_vec - 1; v++) { uint32_t vec = alloc_bit_map_[v]; if (vec != 0) { return false; } } - return true; + // Make sure the last word is equal to the mask, all other bits must be 0. + return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec); } inline bool RosAlloc::Run::IsFull() { - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; - size_t slots = 0; - for (size_t v = 0; v < num_vec; v++, slots += 32) { - DCHECK_GE(num_slots, slots); - uint32_t vec = alloc_bit_map_[v]; - uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1) - : (1 << (num_slots - slots)) - 1; - if ((num_slots - slots) >= 32) { - DCHECK_EQ(mask, static_cast<uint32_t>(-1)); - } - if (vec != mask) { + const size_t num_vec = NumberOfBitmapVectors(); + for (size_t v = 0; v < num_vec; ++v) { + if (~alloc_bit_map_[v] != 0) { return false; } } @@ -1012,9 +1047,7 @@ inline bool RosAlloc::Run::IsFull() { } inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; + const size_t num_vec = NumberOfBitmapVectors(); for (size_t v = 0; v < num_vec; v++) { uint32_t vec = BulkFreeBitMap()[v]; if (vec != 0) { @@ -1025,9 +1058,7 @@ inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { } inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() { - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; + const size_t num_vec = NumberOfBitmapVectors(); for (size_t v = 0; v < num_vec; v++) { uint32_t vec = ThreadLocalFreeBitMap()[v]; if (vec != 0) { @@ -1037,11 +1068,31 @@ inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() { return true; } -inline void RosAlloc::Run::ClearBitMaps() { - byte idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; - memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3); +inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() { + const size_t idx = size_bracket_idx_; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_slots, 32) / 32; + DCHECK_NE(num_vec, 0U); + // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they + // don't represent valid slots. + alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec); +} + +inline void RosAlloc::Run::ZeroHeader() { + const byte idx = size_bracket_idx_; + memset(this, 0, headerSizes[idx]); +} + +inline void RosAlloc::Run::ZeroData() { + const byte idx = size_bracket_idx_; + byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx]; + memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]); +} + +inline void RosAlloc::Run::FillAllocBitMap() { + size_t num_vec = NumberOfBitmapVectors(); + memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec); + first_search_vec_idx_ = num_vec - 1; // No free bits in any of the bitmap words. } void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), @@ -1073,7 +1124,7 @@ void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size // lock for better performance, assuming that the existence of an // allocated chunk/pointer being freed in BulkFree() guarantees that // the page map entry won't change. Disabled for now. -static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = false; +static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true; size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { size_t freed_bytes = 0; @@ -1096,11 +1147,10 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { #endif for (size_t i = 0; i < num_ptrs; i++) { void* ptr = ptrs[i]; - ptrs[i] = NULL; DCHECK_LE(base_, ptr); DCHECK_LT(ptr, base_ + footprint_); size_t pm_idx = RoundDownToPageMapIndex(ptr); - Run* run = NULL; + Run* run = nullptr; if (kReadPageMapEntryWithoutLockInBulkFree) { // Read the page map entries without locking the lock. byte page_map_entry = page_map_[pm_idx]; @@ -1111,106 +1161,74 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { } if (LIKELY(page_map_entry == kPageMapRun)) { run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize); - DCHECK_EQ(run->magic_num_, kMagicNum); } else if (LIKELY(page_map_entry == kPageMapRunPart)) { size_t pi = pm_idx; - DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart); // Find the beginning of the run. - while (page_map_[pi] != kPageMapRun) { - pi--; + do { + --pi; DCHECK_LT(pi, capacity_ / kPageSize); - } - DCHECK_EQ(page_map_[pi], kPageMapRun); + } while (page_map_[pi] != kPageMapRun); run = reinterpret_cast<Run*>(base_ + pi * kPageSize); - DCHECK_EQ(run->magic_num_, kMagicNum); } else if (page_map_entry == kPageMapLargeObject) { MutexLock mu(self, lock_); - freed_bytes += FreePages(self, ptr) * kPageSize; + freed_bytes += FreePages(self, ptr, false); continue; } else { LOG(FATAL) << "Unreachable - page map type: " << page_map_entry; } - DCHECK(run != nullptr); - // Set the bit in the bulk free bit map. - run->MarkBulkFreeBitMap(ptr); - freed_bytes += IndexToBracketSize(run->size_bracket_idx_); -#ifdef HAVE_ANDROID_OS - if (!run->to_be_bulk_freed_) { - run->to_be_bulk_freed_ = true; - runs.push_back(run); - } -#else - runs.insert(run); -#endif } else { // Read the page map entries with a lock. - bool free_from_run = false; - { - MutexLock mu(self, lock_); - DCHECK_LT(pm_idx, page_map_size_); - byte page_map_entry = page_map_[pm_idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx=" - << std::dec << pm_idx - << ", page_map_entry=" << static_cast<int>(page_map_entry); - } - if (LIKELY(page_map_entry == kPageMapRun)) { - free_from_run = true; - run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize); - DCHECK_EQ(run->magic_num_, kMagicNum); - } else if (LIKELY(page_map_entry == kPageMapRunPart)) { - free_from_run = true; - size_t pi = pm_idx; - DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart); - // Find the beginning of the run. - while (page_map_[pi] != kPageMapRun) { - pi--; - DCHECK_LT(pi, capacity_ / kPageSize); - } - DCHECK_EQ(page_map_[pi], kPageMapRun); - run = reinterpret_cast<Run*>(base_ + pi * kPageSize); - DCHECK_EQ(run->magic_num_, kMagicNum); - } else if (page_map_entry == kPageMapLargeObject) { - freed_bytes += FreePages(self, ptr) * kPageSize; - } else { - LOG(FATAL) << "Unreachable - page map type: " << page_map_entry; - } + MutexLock mu(self, lock_); + DCHECK_LT(pm_idx, page_map_size_); + byte page_map_entry = page_map_[pm_idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx=" + << std::dec << pm_idx + << ", page_map_entry=" << static_cast<int>(page_map_entry); + } + if (LIKELY(page_map_entry == kPageMapRun)) { + run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize); + } else if (LIKELY(page_map_entry == kPageMapRunPart)) { + size_t pi = pm_idx; + // Find the beginning of the run. + do { + --pi; + DCHECK_LT(pi, capacity_ / kPageSize); + } while (page_map_[pi] != kPageMapRun); + run = reinterpret_cast<Run*>(base_ + pi * kPageSize); + } else if (page_map_entry == kPageMapLargeObject) { + freed_bytes += FreePages(self, ptr, false); + continue; + } else { + LOG(FATAL) << "Unreachable - page map type: " << page_map_entry; } - if (LIKELY(free_from_run)) { - DCHECK(run != NULL); - // Set the bit in the bulk free bit map. - run->MarkBulkFreeBitMap(ptr); - freed_bytes += IndexToBracketSize(run->size_bracket_idx_); + } + DCHECK(run != nullptr); + DCHECK_EQ(run->magic_num_, kMagicNum); + // Set the bit in the bulk free bit map. + freed_bytes += run->MarkBulkFreeBitMap(ptr); #ifdef HAVE_ANDROID_OS - if (!run->to_be_bulk_freed_) { - run->to_be_bulk_freed_ = true; - runs.push_back(run); - } + if (!run->to_be_bulk_freed_) { + run->to_be_bulk_freed_ = true; + runs.push_back(run); + } #else - runs.insert(run); + runs.insert(run); #endif - } - } } // Now, iterate over the affected runs and update the alloc bit map // based on the bulk free bit map (for non-thread-local runs) and // union the bulk free bit map into the thread-local free bit map // (for thread-local runs.) -#ifdef HAVE_ANDROID_OS - typedef std::vector<Run*>::iterator It; -#else - typedef hash_set<Run*, hash_run, eq_run>::iterator It; -#endif - for (It it = runs.begin(); it != runs.end(); ++it) { - Run* run = *it; + for (Run* run : runs) { #ifdef HAVE_ANDROID_OS DCHECK(run->to_be_bulk_freed_); run->to_be_bulk_freed_ = false; #endif size_t idx = run->size_bracket_idx_; MutexLock mu(self, *size_bracket_locks_[idx]); - if (run->is_thread_local_ != 0) { + if (run->IsThreadLocal()) { DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); @@ -1219,7 +1237,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x" << std::hex << reinterpret_cast<intptr_t>(run); } - DCHECK_NE(run->is_thread_local_, 0); + DCHECK(run->IsThreadLocal()); // A thread local run will be kept as a thread local even if // it's become all free. } else { @@ -1269,8 +1287,9 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { DCHECK(non_full_runs->find(run) == non_full_runs->end()); } if (!run_was_current) { + run->ZeroHeader(); MutexLock mu(self, lock_); - FreePages(self, run); + FreePages(self, run, true); } } else { // It is not completely free. If it wasn't the current run or @@ -1381,7 +1400,7 @@ std::string RosAlloc::DumpPageMap() { stream << "[" << i << "]=Run (start)" << " idx=" << idx << " numOfPages=" << numOfPages[idx] - << " thread_local=" << static_cast<int>(run->is_thread_local_) + << " is_thread_local=" << run->is_thread_local_ << " is_all_free=" << (run->IsAllFree() ? 1 : 0) << std::endl; break; @@ -1556,6 +1575,8 @@ void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_by // The start of a run. Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize); DCHECK_EQ(run->magic_num_, kMagicNum); + // The dedicated full run doesn't contain any real allocations, don't visit the slots in + // there. run->InspectAllSlots(handler, arg); size_t num_pages = numOfPages[run->size_bracket_idx_]; if (kIsDebugBuild) { @@ -1605,14 +1626,16 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); - if (thread_local_run != NULL) { + CHECK(thread_local_run != nullptr); + // Invalid means already revoked. + DCHECK(thread_local_run->IsThreadLocal()); + if (thread_local_run != dedicated_full_run_) { + thread->SetRosAllocRun(idx, dedicated_full_run_); DCHECK_EQ(thread_local_run->magic_num_, kMagicNum); - DCHECK_NE(thread_local_run->is_thread_local_, 0); - thread->SetRosAllocRun(idx, nullptr); // Note the thread local run may not be full here. bool dont_care; thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care); - thread_local_run->is_thread_local_ = 0; + thread_local_run->SetIsThreadLocal(false); thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap(); DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); @@ -1628,7 +1651,8 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { } } else if (thread_local_run->IsAllFree()) { MutexLock mu(self, lock_); - FreePages(self, thread_local_run); + thread_local_run->ZeroHeader(); + FreePages(self, thread_local_run, true); } else { non_full_runs_[idx].insert(thread_local_run); DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end()); @@ -1648,9 +1672,8 @@ void RosAlloc::RevokeAllThreadLocalRuns() { MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_); MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_); std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); - for (auto it = thread_list.begin(); it != thread_list.end(); ++it) { - Thread* t = *it; - RevokeThreadLocalRuns(t); + for (Thread* thread : thread_list) { + RevokeThreadLocalRuns(thread); } } @@ -1662,7 +1685,7 @@ void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); - DCHECK(thread_local_run == nullptr); + DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_); } } } @@ -1770,6 +1793,15 @@ void RosAlloc::Initialize() { << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];; } } + // Fill the alloc bitmap so nobody can successfully allocate from it. + if (kIsDebugBuild) { + dedicated_full_run_->magic_num_ = kMagicNum; + } + // It doesn't matter which size bracket we use since the main goal is to have the allocation + // fail 100% of the time you attempt to allocate into the dedicated full run. + dedicated_full_run_->size_bracket_idx_ = 0; + dedicated_full_run_->FillAllocBitMap(); + dedicated_full_run_->SetIsThreadLocal(true); } void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) { @@ -1867,6 +1899,7 @@ void RosAlloc::Verify() { << " and the run size : page index range " << i << " to " << (i + num_pages) << std::endl << DumpPageMap(); } + // Don't verify the dedicated_full_run_ since it doesn't have any real allocations. runs.push_back(run); i += num_pages; CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end @@ -1891,34 +1924,25 @@ void RosAlloc::Verify() { void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump(); - size_t idx = size_bracket_idx_; + const size_t idx = size_bracket_idx_; CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump(); byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx]; - size_t num_slots = numOfSlots[idx]; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_slots, 32) / 32; + CHECK_GT(num_vec, 0U); size_t bracket_size = IndexToBracketSize(idx); CHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize) << "Mismatch in the end address of the run " << Dump(); // Check that the bulk free bitmap is clean. It's only used during BulkFree(). CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump(); - // Check the bump index mode, if it's on. - if (top_slot_idx_ < num_slots) { - // If the bump index mode is on (top_slot_idx_ < num_slots), then - // all of the slots after the top index must be free. - for (size_t i = top_slot_idx_; i < num_slots; ++i) { - size_t vec_idx = i / 32; - size_t vec_off = i % 32; - uint32_t vec = alloc_bit_map_[vec_idx]; - CHECK_EQ((vec & (1 << vec_off)), static_cast<uint32_t>(0)) - << "A slot >= top_slot_idx_ isn't free " << Dump(); - } - } else { - CHECK_EQ(top_slot_idx_, num_slots) - << "If the bump index mode is off, the top index == the number of slots " - << Dump(); - } + uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec); + // Make sure all the bits at the end of the run are set so that we don't allocate there. + CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask); + // Ensure that the first bitmap index is valid. + CHECK_LT(first_search_vec_idx_, num_vec); // Check the thread local runs, the current runs, and the run sets. - if (is_thread_local_) { + if (IsThreadLocal()) { // If it's a thread local run, then it must be pointed to by an owner thread. bool owner_found = false; std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); @@ -1980,7 +2004,6 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { } } // Check each slot. - size_t num_vec = RoundUp(num_slots, 32) / 32; size_t slots = 0; for (size_t v = 0; v < num_vec; v++, slots += 32) { DCHECK_GE(num_slots, slots) << "Out of bounds"; @@ -1991,7 +2014,7 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { bool is_allocated = ((vec >> i) & 0x1) != 0; // If a thread local run, slots may be marked freed in the // thread local free bitmap. - bool is_thread_local_freed = is_thread_local_ && ((thread_local_free_vec >> i) & 0x1) != 0; + bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0; if (is_allocated && !is_thread_local_freed) { byte* slot_addr = slot_base + (slots + i) * bracket_size; mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr); diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 0c508b7..714230b 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -155,7 +155,7 @@ class RosAlloc { // +-------------------+ // | to_be_bulk_freed | // +-------------------+ - // | top_slot_idx | + // | top_bitmap_idx | // +-------------------+ // | | // | alloc bit map | @@ -186,12 +186,12 @@ class RosAlloc { // class Run { public: - byte magic_num_; // The magic number used for debugging. - byte size_bracket_idx_; // The index of the size bracket of this run. - byte is_thread_local_; // True if this run is used as a thread-local run. - byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. - uint32_t top_slot_idx_; // The top slot index when this run is in bump index mode. - uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use. + byte magic_num_; // The magic number used for debugging. + byte size_bracket_idx_; // The index of the size bracket of this run. + byte is_thread_local_; // True if this run is used as a thread-local run. + byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. + uint32_t first_search_vec_idx_; // The index of the first bitmap vector which may contain an available slot. + uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use. // bulk_free_bit_map_[] : The bit map that is used for GC to // temporarily mark the slots to free without using a lock. After @@ -225,6 +225,16 @@ class RosAlloc { void* End() { return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_]; } + // Returns the number of bitmap words per run. + size_t NumberOfBitmapVectors() const { + return RoundUp(numOfSlots[size_bracket_idx_], 32) / 32; + } + void SetIsThreadLocal(bool is_thread_local) { + is_thread_local_ = is_thread_local ? 1 : 0; + } + bool IsThreadLocal() const { + return is_thread_local_ != 0; + } // Frees slots in the allocation bit map with regard to the // thread-local free bit map. Used when a thread-local run becomes // full. @@ -243,10 +253,13 @@ class RosAlloc { void* AllocSlot(); // Frees a slot in a run. This is used in a non-bulk free. void FreeSlot(void* ptr); - // Marks the slots to free in the bulk free bit map. - void MarkBulkFreeBitMap(void* ptr); + // Marks the slots to free in the bulk free bit map. Returns the bracket size. + size_t MarkBulkFreeBitMap(void* ptr); // Marks the slots to free in the thread-local free bit map. void MarkThreadLocalFreeBitMap(void* ptr); + // Last word mask, all of the bits in the last word which aren't valid slots are set to + // optimize allocation path. + static size_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec); // Returns true if all the slots in the run are not in use. bool IsAllFree(); // Returns true if all the slots in the run are in use. @@ -255,8 +268,14 @@ class RosAlloc { bool IsBulkFreeBitmapClean(); // Returns true if the thread local free bit map is clean. bool IsThreadLocalFreeBitmapClean(); - // Clear all the bit maps. - void ClearBitMaps(); + // Set the alloc_bit_map_ bits for slots that are past the end of the run. + void SetAllocBitMapBitsForInvalidSlots(); + // Zero the run's data. + void ZeroData(); + // Zero the run's header. + void ZeroHeader(); + // Fill the alloc bitmap with 1s. + void FillAllocBitMap(); // Iterate over all the slots and apply the given function. void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); // Dump the run metadata for debugging. @@ -267,8 +286,9 @@ class RosAlloc { EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_); private: - // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap(). - void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name); + // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap(). Returns the bracket + // size. + size_t MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name); // Turns the bit map into a string for debugging. static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec); }; @@ -376,7 +396,7 @@ class RosAlloc { return byte_offset / kPageSize; } // Returns the page map index from an address with rounding. - size_t RoundDownToPageMapIndex(void* addr) { + size_t RoundDownToPageMapIndex(void* addr) const { DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_); return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize; } @@ -446,12 +466,19 @@ class RosAlloc { hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets]; // The set of free pages. std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_); + // The dedicated full run, it is always full and shared by all threads when revoking happens. + // This is an optimization since enables us to avoid a null check for revoked runs. + static Run* dedicated_full_run_; + // Using size_t to ensure that it is at least word aligned. + static size_t dedicated_full_run_storage_[]; // The current runs where the allocations are first attempted for // the size brackes that do not use thread-local // runs. current_runs_[i] is guarded by size_bracket_locks_[i]. Run* current_runs_[kNumOfSizeBrackets]; // The mutexes, one per size bracket. Mutex* size_bracket_locks_[kNumOfSizeBrackets]; + // Bracket lock names (since locks only have char* names). + std::string size_bracket_lock_names[kNumOfSizeBrackets]; // The types of page map entries. enum { kPageMapEmpty = 0, // Not allocated. @@ -493,15 +520,19 @@ class RosAlloc { // Page-granularity alloc/free void* AllocPages(Thread* self, size_t num_pages, byte page_map_type) EXCLUSIVE_LOCKS_REQUIRED(lock_); - // Returns how many pages were freed. - size_t FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_); + // Returns how many bytes were freed. + size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Allocate/free a run slot. void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); - void FreeFromRun(Thread* self, void* ptr, Run* run) + // Returns the bracket size. + size_t FreeFromRun(Thread* self, void* ptr, Run* run) LOCKS_EXCLUDED(lock_); + // Used to allocate a new thread local run for a size bracket. + Run* AllocRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_); + // Used to acquire a new/reused run for a size bracket. Used when a // thread-local or current run gets full. Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_); @@ -558,6 +589,9 @@ class RosAlloc { void AssertAllThreadLocalRunsAreRevoked() LOCKS_EXCLUDED(Locks::thread_list_lock_); // Dumps the page map for debugging. std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_); + static Run* GetDedicatedFullRun() { + return dedicated_full_run_; + } // Callbacks for InspectAll that will count the number of bytes // allocated and objects allocated, respectively. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 517c748..5d72bc1 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -78,9 +78,9 @@ static constexpr size_t kGcAlotInterval = KB; static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB; static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB; // Sticky GC throughput adjustment, divided by 4. Increasing this causes sticky GC to occur more -// relative to partial/full GC. This is desirable since sticky GCs interfere less with mutator +// relative to partial/full GC. This may be desirable since sticky GCs interfere less with mutator // threads (lower pauses, use less memory bandwidth). -static constexpr double kStickyGcThroughputAdjustment = 1.25; +static constexpr double kStickyGcThroughputAdjustment = 1.0; // Whether or not we use the free list large object space. static constexpr bool kUseFreeListSpaceForLOS = false; // Whtehr or not we compact the zygote in PreZygoteFork. @@ -595,6 +595,11 @@ void Heap::AddSpace(space::Space* space, bool set_as_default) { if (continuous_space->IsDlMallocSpace()) { dlmalloc_space_ = continuous_space->AsDlMallocSpace(); } else if (continuous_space->IsRosAllocSpace()) { + // Revoke before if we already have a rosalloc_space_ so that we don't end up with non full + // runs from the previous one during the revoke after. + if (rosalloc_space_ != nullptr) { + rosalloc_space_->RevokeAllThreadLocalBuffers(); + } rosalloc_space_ = continuous_space->AsRosAllocSpace(); } } @@ -615,7 +620,7 @@ void Heap::AddSpace(space::Space* space, bool set_as_default) { } } -void Heap::RemoveSpace(space::Space* space) { +void Heap::RemoveSpace(space::Space* space, bool unset_as_default) { DCHECK(space != nullptr); WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); if (space->IsContinuousSpace()) { @@ -632,17 +637,19 @@ void Heap::RemoveSpace(space::Space* space) { auto it = std::find(continuous_spaces_.begin(), continuous_spaces_.end(), continuous_space); DCHECK(it != continuous_spaces_.end()); continuous_spaces_.erase(it); - if (continuous_space == dlmalloc_space_) { - dlmalloc_space_ = nullptr; - } else if (continuous_space == rosalloc_space_) { - rosalloc_space_ = nullptr; - } - if (continuous_space == main_space_) { - main_space_ = nullptr; - } else if (continuous_space == bump_pointer_space_) { - bump_pointer_space_ = nullptr; - } else if (continuous_space == temp_space_) { - temp_space_ = nullptr; + if (unset_as_default) { + if (continuous_space == dlmalloc_space_) { + dlmalloc_space_ = nullptr; + } else if (continuous_space == rosalloc_space_) { + rosalloc_space_ = nullptr; + } + if (continuous_space == main_space_) { + main_space_ = nullptr; + } else if (continuous_space == bump_pointer_space_) { + bump_pointer_space_ = nullptr; + } else if (continuous_space == temp_space_) { + temp_space_ = nullptr; + } } } else { DCHECK(space->IsDiscontinuousSpace()); @@ -725,6 +732,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n"; os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n"; os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_; + BaseMutex::DumpAll(os); } Heap::~Heap() { @@ -1457,6 +1465,10 @@ void Heap::TransitionCollector(CollectorType collector_type) { // pointer space last transition it will be protected. bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); Compact(bump_pointer_space_, main_space_); + // Remove the main space so that we don't try to trim it, this doens't work for debug + // builds since RosAlloc attempts to read the magic number from a protected page. + // TODO: Clean this up by getting rid of the remove_as_default parameter. + RemoveSpace(main_space_, false); } break; } @@ -1465,6 +1477,7 @@ void Heap::TransitionCollector(CollectorType collector_type) { case kCollectorTypeCMS: { if (IsMovingGc(collector_type_)) { // Compact to the main space from the bump pointer space, don't need to swap semispaces. + AddSpace(main_space_, false); main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); Compact(main_space_, bump_pointer_space_); } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index ceba8b6..c37bb05 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -289,6 +289,12 @@ class Heap { void RegisterGCAllocation(size_t bytes); void RegisterGCDeAllocation(size_t bytes); + // Public due to usage by tests. + void AddSpace(space::Space* space, bool set_as_default = true) + LOCKS_EXCLUDED(Locks::heap_bitmap_lock_); + void RemoveSpace(space::Space* space, bool unset_as_default = true) + LOCKS_EXCLUDED(Locks::heap_bitmap_lock_); + // Set target ideal heap utilization ratio, implements // dalvik.system.VMRuntime.setTargetHeapUtilization. void SetTargetHeapUtilization(float target); @@ -684,10 +690,6 @@ class Heap { size_t GetPercentFree(); - void AddSpace(space::Space* space, bool set_as_default = true) - LOCKS_EXCLUDED(Locks::heap_bitmap_lock_); - void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_); - static void VerificationCallback(mirror::Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index a5a6da0..f5c0e94 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -32,7 +32,7 @@ namespace art { namespace gc { namespace space { -static constexpr bool kPrefetchDuringRosAllocFreeList = true; +static constexpr bool kPrefetchDuringRosAllocFreeList = false; static constexpr size_t kPrefetchLookAhead = 8; // Use this only for verification, it is not safe to use since the class of the object may have // been freed. diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 9896a48..28200df 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -39,10 +39,8 @@ class SpaceTest : public CommonRuntimeTest { } void AddSpace(ContinuousSpace* space) { - // For RosAlloc, revoke the thread local runs before moving onto a - // new alloc space. - Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers(); - Runtime::Current()->GetHeap()->AddSpace(space); + // By passing true, AddSpace() does the revoke. + Runtime::Current()->GetHeap()->AddSpace(space, true); } mirror::Class* GetByteArrayClass(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -349,11 +347,8 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { EXPECT_EQ(usable_size, computed_usable_size); } - // Release memory and check pointers are nullptr. + // Release memory. space->FreeList(self, arraysize(lots_of_objects), lots_of_objects); - for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - EXPECT_TRUE(lots_of_objects[i] == nullptr); - } // Succeeds, fits by adjusting the max allowed footprint. for (size_t i = 0; i < arraysize(lots_of_objects); i++) { @@ -367,12 +362,8 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { EXPECT_EQ(usable_size, computed_usable_size); } - // Release memory and check pointers are nullptr - // TODO: This isn't compaction safe, fix. + // Release memory. space->FreeList(self, arraysize(lots_of_objects), lots_of_objects); - for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - EXPECT_TRUE(lots_of_objects[i] == nullptr); - } } void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size, diff --git a/runtime/thread.cc b/runtime/thread.cc index 095404f..771680b 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1018,7 +1018,8 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte tls32_.state_and_flags.as_struct.flags = 0; tls32_.state_and_flags.as_struct.state = kNative; memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes)); - memset(tlsPtr_.rosalloc_runs, 0, sizeof(tlsPtr_.rosalloc_runs)); + std::fill(tlsPtr_.rosalloc_runs, tlsPtr_.rosalloc_runs + kRosAllocNumOfSizeBrackets, + gc::allocator::RosAlloc::GetDedicatedFullRun()); for (uint32_t i = 0; i < kMaxCheckpoints; ++i) { tlsPtr_.checkpoint_functions[i] = nullptr; } |