summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathieu Chartier <mathieuc@google.com>2014-04-25 22:51:58 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-04-25 22:51:58 +0000
commitcbec967415eb0641c71ee77e478a2402780f6982 (patch)
tree97811dc79ec89dce0138eb20b1ad440a726d5e94
parent7f40b111755e300ddddd6839425337fe3af8d4e7 (diff)
parent73d1e17b3afc7d5e56184f90bf819dc64956448a (diff)
downloadart-cbec967415eb0641c71ee77e478a2402780f6982.zip
art-cbec967415eb0641c71ee77e478a2402780f6982.tar.gz
art-cbec967415eb0641c71ee77e478a2402780f6982.tar.bz2
Merge "Enable reading page map without lock in RosAlloc::BulkFree"
-rw-r--r--runtime/base/mutex.cc22
-rw-r--r--runtime/gc/accounting/space_bitmap.cc14
-rw-r--r--runtime/gc/accounting/space_bitmap.h3
-rw-r--r--runtime/gc/allocator/rosalloc-inl.h2
-rw-r--r--runtime/gc/allocator/rosalloc.cc651
-rw-r--r--runtime/gc/allocator/rosalloc.h68
-rw-r--r--runtime/gc/heap.cc41
-rw-r--r--runtime/gc/heap.h10
-rw-r--r--runtime/gc/space/rosalloc_space.cc2
-rw-r--r--runtime/gc/space/space_test.h17
-rw-r--r--runtime/thread.cc3
11 files changed, 451 insertions, 382 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index fdd0249..2bc17bf 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -206,16 +206,16 @@ void BaseMutex::DumpContention(std::ostream& os) const {
os << "never contended";
} else {
os << "contended " << contention_count
- << " times, average wait of contender " << PrettyDuration(wait_time / contention_count);
+ << " total wait of contender " << PrettyDuration(wait_time)
+ << " average " << PrettyDuration(wait_time / contention_count);
SafeMap<uint64_t, size_t> most_common_blocker;
SafeMap<uint64_t, size_t> most_common_blocked;
- typedef SafeMap<uint64_t, size_t>::const_iterator It;
for (size_t i = 0; i < kContentionLogSize; ++i) {
uint64_t blocked_tid = log[i].blocked_tid;
uint64_t owner_tid = log[i].owner_tid;
uint32_t count = log[i].count;
if (count > 0) {
- It it = most_common_blocked.find(blocked_tid);
+ auto it = most_common_blocked.find(blocked_tid);
if (it != most_common_blocked.end()) {
most_common_blocked.Overwrite(blocked_tid, it->second + count);
} else {
@@ -231,10 +231,10 @@ void BaseMutex::DumpContention(std::ostream& os) const {
}
uint64_t max_tid = 0;
size_t max_tid_count = 0;
- for (It it = most_common_blocked.begin(); it != most_common_blocked.end(); ++it) {
- if (it->second > max_tid_count) {
- max_tid = it->first;
- max_tid_count = it->second;
+ for (const auto& pair : most_common_blocked) {
+ if (pair.second > max_tid_count) {
+ max_tid = pair.first;
+ max_tid_count = pair.second;
}
}
if (max_tid != 0) {
@@ -242,10 +242,10 @@ void BaseMutex::DumpContention(std::ostream& os) const {
}
max_tid = 0;
max_tid_count = 0;
- for (It it = most_common_blocker.begin(); it != most_common_blocker.end(); ++it) {
- if (it->second > max_tid_count) {
- max_tid = it->first;
- max_tid_count = it->second;
+ for (const auto& pair : most_common_blocker) {
+ if (pair.second > max_tid_count) {
+ max_tid = pair.first;
+ max_tid_count = pair.second;
}
}
if (max_tid != 0) {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 31a1537..66f9a3a 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -21,13 +21,17 @@ namespace gc {
namespace accounting {
template<size_t kAlignment>
+size_t SpaceBitmap<kAlignment>::ComputeBitmapSize(uint64_t capacity) {
+ const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord;
+ return (RoundUp(capacity, kBytesCoveredPerWord) / kBytesCoveredPerWord) * kWordSize;
+}
+
+template<size_t kAlignment>
SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap(
const std::string& name, MemMap* mem_map, byte* heap_begin, size_t heap_capacity) {
CHECK(mem_map != nullptr);
uword* bitmap_begin = reinterpret_cast<uword*>(mem_map->Begin());
- const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord;
- size_t bitmap_size = (RoundUp(static_cast<uint64_t>(heap_capacity), kBytesCoveredPerWord) /
- kBytesCoveredPerWord) * kWordSize;
+ const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
}
@@ -45,9 +49,7 @@ template<size_t kAlignment>
SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::Create(
const std::string& name, byte* heap_begin, size_t heap_capacity) {
// Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
- const uint64_t kBytesCoveredPerWord = kAlignment * kBitsPerWord;
- size_t bitmap_size = (RoundUp(static_cast<uint64_t>(heap_capacity), kBytesCoveredPerWord) /
- kBytesCoveredPerWord) * kWordSize;
+ const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
std::string error_msg;
UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
PROT_READ | PROT_WRITE, false, &error_msg));
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index df3fd37..5c7cce2 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -200,6 +200,9 @@ class SpaceBitmap {
SpaceBitmap(const std::string& name, MemMap* mem_map, uword* bitmap_begin, size_t bitmap_size,
const void* heap_begin);
+ // Helper function for computing bitmap size based on a 64 bit capacity.
+ static size_t ComputeBitmapSize(uint64_t capacity);
+
template<bool kSetBit>
bool Modify(const mirror::Object* obj);
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index f395314..ac0f67b 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -29,7 +29,7 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by
}
void* m = AllocFromRun(self, size, bytes_allocated);
// Check if the returned memory is really all zero.
- if (kCheckZeroMemory && m != NULL) {
+ if (kCheckZeroMemory && m != nullptr) {
byte* bytes = reinterpret_cast<byte*>(m);
for (size_t i = 0; i < size; ++i) {
DCHECK_EQ(bytes[i], 0);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 0f2d6a9..821aa2d 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -32,6 +32,10 @@ namespace allocator {
extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
+static constexpr bool kUsePrefetchDuringAllocRun = true;
+static constexpr bool kPrefetchNewRunDataByZeroing = false;
+static constexpr size_t kPrefetchStride = 64;
+
size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
@@ -39,6 +43,9 @@ size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
bool RosAlloc::initialized_ = false;
+size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
+RosAlloc::Run* RosAlloc::dedicated_full_run_ =
+ reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
PageReleaseMode page_release_mode, size_t page_release_size_threshold)
@@ -62,8 +69,9 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
<< ", max_capacity=" << std::dec << max_capacity_;
memset(current_runs_, 0, sizeof(current_runs_));
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
- size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock",
- kRosAllocBracketLock);
+ size_bracket_lock_names[i] =
+ StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
+ size_bracket_locks_[i] = new Mutex(size_bracket_lock_names[i].c_str(), kRosAllocBracketLock);
}
DCHECK_EQ(footprint_, capacity_);
size_t num_of_pages = footprint_ / kPageSize;
@@ -71,7 +79,7 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
std::string error_msg;
page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize),
PROT_READ | PROT_WRITE, false, &error_msg));
- CHECK(page_map_mem_map_.get() != NULL) << "Couldn't allocate the page map : " << error_msg;
+ CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
page_map_ = page_map_mem_map_->Begin();
page_map_size_ = num_of_pages;
max_page_map_size_ = max_num_of_pages;
@@ -103,7 +111,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
lock_.AssertHeld(self);
DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
FreePageRun* res = NULL;
- size_t req_byte_size = num_pages * kPageSize;
+ const size_t req_byte_size = num_pages * kPageSize;
// Find the lowest address free page run that's large enough.
for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
FreePageRun* fpr = *it;
@@ -260,8 +268,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
break;
}
if (kIsDebugBuild) {
- // Clear the first page which isn't madvised away in the debug
- // build for the magic number.
+ // Clear the first page since it is not madvised due to the magic number.
memset(res, 0, kPageSize);
}
if (kTraceRosAlloc) {
@@ -279,7 +286,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
return nullptr;
}
-size_t RosAlloc::FreePages(Thread* self, void* ptr) {
+size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
lock_.AssertHeld(self);
size_t pm_idx = ToPageMapIndex(ptr);
DCHECK_LT(pm_idx, page_map_size_);
@@ -310,10 +317,21 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) {
num_pages++;
idx++;
}
+ const size_t byte_size = num_pages * kPageSize;
+ if (already_zero) {
+ if (kCheckZeroMemory) {
+ const uword* word_ptr = reinterpret_cast<uword*>(ptr);
+ for (size_t i = 0; i < byte_size / sizeof(uword); ++i) {
+ CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
+ }
+ }
+ } else if (!DoesReleaseAllPages()) {
+ memset(ptr, 0, byte_size);
+ }
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
- << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize)
+ << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
@@ -322,8 +340,8 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) {
if (kIsDebugBuild) {
fpr->magic_num_ = kMagicNumFree;
}
- fpr->SetByteSize(this, num_pages * kPageSize);
- DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+ fpr->SetByteSize(this, byte_size);
+ DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
if (!free_page_runs_.empty()) {
@@ -349,6 +367,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) {
if (kTraceRosAlloc) {
LOG(INFO) << "Success";
}
+ // Clear magic num since this is no longer the start of a free page run.
+ if (kIsDebugBuild) {
+ h->magic_num_ = 0;
+ }
free_page_runs_.erase(it++);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
@@ -395,6 +417,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) {
}
l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+ // Clear magic num since this is no longer the start of a free page run.
+ if (kIsDebugBuild) {
+ fpr->magic_num_ = 0;
+ }
fpr = l;
} else {
// Not adjacent. Stop.
@@ -422,7 +448,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr) {
LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
<< " into free_page_runs_";
}
- return num_pages;
+ return byte_size;
}
void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
@@ -439,23 +465,19 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca
}
return nullptr;
}
- if (bytes_allocated != NULL) {
- *bytes_allocated = num_pages * kPageSize;
- }
+ const size_t total_bytes = num_pages * kPageSize;
+ *bytes_allocated = total_bytes;
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
<< "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
- if (!DoesReleaseAllPages()) {
- // If it does not release all pages, pages may not be zeroed out.
- memset(r, 0, size);
- }
// Check if the returned memory is really all zero.
if (kCheckZeroMemory) {
- byte* bytes = reinterpret_cast<byte*>(r);
- for (size_t i = 0; i < size; ++i) {
- DCHECK_EQ(bytes[i], 0);
+ CHECK_EQ(total_bytes % sizeof(uword), 0U);
+ const uword* words = reinterpret_cast<uword*>(r);
+ for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) {
+ CHECK_EQ(words[i], 0U);
}
}
return r;
@@ -479,7 +501,7 @@ size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
case kPageMapLargeObject:
- return FreePages(self, ptr) * kPageSize;
+ return FreePages(self, ptr, false);
case kPageMapLargeObjectPart:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
@@ -503,9 +525,7 @@ size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
}
}
DCHECK(run != nullptr);
- const size_t size = IndexToBracketSize(run->size_bracket_idx_);
- FreeFromRun(self, ptr, run);
- return size;
+ return FreeFromRun(self, ptr, run);
}
size_t RosAlloc::Free(Thread* self, void* ptr) {
@@ -513,42 +533,57 @@ size_t RosAlloc::Free(Thread* self, void* ptr) {
return FreeInternal(self, ptr);
}
-RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
- Run* new_run;
- size_t num_pages = numOfPages[idx];
- // Get the lowest address non-full run from the binary tree.
- Run* temp = NULL;
- std::set<Run*>* bt = &non_full_runs_[idx];
- std::set<Run*>::iterator found = bt->lower_bound(temp);
- if (found != bt->end()) {
- // If there's one, use it as the current run.
- Run* non_full_run = *found;
- DCHECK(non_full_run != NULL);
- new_run = non_full_run;
- DCHECK_EQ(new_run->is_thread_local_, 0);
- bt->erase(found);
- DCHECK_EQ(non_full_run->is_thread_local_, 0);
- } else {
- // If there's none, allocate a new run and use it as the
- // current run.
- {
- MutexLock mu(self, lock_);
- new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun));
- }
- if (new_run == NULL) {
- return NULL;
- }
+RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
+ RosAlloc::Run* new_run = nullptr;
+ {
+ MutexLock mu(self, lock_);
+ new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
+ }
+ if (LIKELY(new_run != nullptr)) {
if (kIsDebugBuild) {
new_run->magic_num_ = kMagicNum;
}
new_run->size_bracket_idx_ = idx;
- new_run->top_slot_idx_ = 0;
- new_run->ClearBitMaps();
- new_run->to_be_bulk_freed_ = false;
+ new_run->SetAllocBitMapBitsForInvalidSlots();
+ DCHECK(!new_run->IsThreadLocal());
+ DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
+ DCHECK(!new_run->to_be_bulk_freed_);
+ if (kUsePrefetchDuringAllocRun && idx <= kMaxThreadLocalSizeBracketIdx) {
+ // Take ownership of the cache lines if we are likely to be thread local run.
+ if (kPrefetchNewRunDataByZeroing) {
+ // Zeroing the data is sometimes faster than prefetching but it increases memory usage
+ // since we end up dirtying zero pages which may have been madvised.
+ new_run->ZeroData();
+ } else {
+ const size_t num_of_slots = numOfSlots[idx];
+ const size_t bracket_size = bracketSizes[idx];
+ const size_t num_of_bytes = num_of_slots * bracket_size;
+ byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx];
+ for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
+ __builtin_prefetch(begin + i);
+ }
+ }
+ }
}
return new_run;
}
+RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
+ // Get the lowest address non-full run from the binary tree.
+ std::set<Run*>* const bt = &non_full_runs_[idx];
+ if (!bt->empty()) {
+ // If there's one, use it as the current run.
+ auto it = bt->begin();
+ Run* non_full_run = *it;
+ DCHECK(non_full_run != nullptr);
+ DCHECK(!non_full_run->IsThreadLocal());
+ bt->erase(it);
+ return non_full_run;
+ }
+ // If there's none, allocate a new run and use it as the current run.
+ return AllocRun(self, idx);
+}
+
void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
DCHECK_LE(size, kLargeSizeThreshold);
size_t bracket_size;
@@ -564,66 +599,62 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
// Use a thread-local run.
Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
- if (UNLIKELY(thread_local_run == NULL)) {
- MutexLock mu(self, *size_bracket_locks_[idx]);
- thread_local_run = RefillRun(self, idx);
- if (UNLIKELY(thread_local_run == NULL)) {
- return NULL;
- }
- DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
- DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
- thread_local_run->is_thread_local_ = 1;
- self->SetRosAllocRun(idx, thread_local_run);
- DCHECK(!thread_local_run->IsFull());
- }
-
- DCHECK(thread_local_run != NULL);
- DCHECK_NE(thread_local_run->is_thread_local_, 0);
+ // Allow invalid since this will always fail the allocation.
+ DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+ DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+ DCHECK(thread_local_run != nullptr);
+ DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
slot_addr = thread_local_run->AllocSlot();
-
- if (UNLIKELY(slot_addr == NULL)) {
+ // The allocation must fail if the run is invalid.
+ DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
+ << "allocated from an invalid run";
+ if (UNLIKELY(slot_addr == nullptr)) {
// The run got full. Try to free slots.
DCHECK(thread_local_run->IsFull());
MutexLock mu(self, *size_bracket_locks_[idx]);
bool is_all_free_after_merge;
+ // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+ DCHECK_NE(thread_local_run, dedicated_full_run_);
// Some slot got freed. Keep it.
DCHECK(!thread_local_run->IsFull());
DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
if (is_all_free_after_merge) {
- // Reinstate the bump index mode if it's all free.
- DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]);
- thread_local_run->top_slot_idx_ = 0;
+ // Check that the bitmap idx is back at 0 if it's all free.
+ DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
}
} else {
// No slots got freed. Try to refill the thread-local run.
DCHECK(thread_local_run->IsFull());
- self->SetRosAllocRun(idx, nullptr);
- thread_local_run->is_thread_local_ = 0;
- if (kIsDebugBuild) {
- full_runs_[idx].insert(thread_local_run);
- if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
- << reinterpret_cast<intptr_t>(thread_local_run)
- << " into full_runs_[" << std::dec << idx << "]";
+ if (thread_local_run != dedicated_full_run_) {
+ self->SetRosAllocRun(idx, dedicated_full_run_);
+ thread_local_run->SetIsThreadLocal(false);
+ if (kIsDebugBuild) {
+ full_runs_[idx].insert(thread_local_run);
+ if (kTraceRosAlloc) {
+ LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
+ << reinterpret_cast<intptr_t>(thread_local_run)
+ << " into full_runs_[" << std::dec << idx << "]";
+ }
}
+ DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+ DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
}
- DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
- DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+
thread_local_run = RefillRun(self, idx);
if (UNLIKELY(thread_local_run == NULL)) {
return NULL;
}
DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
- thread_local_run->is_thread_local_ = 1;
+ thread_local_run->SetIsThreadLocal(true);
self->SetRosAllocRun(idx, thread_local_run);
DCHECK(!thread_local_run->IsFull());
}
DCHECK(thread_local_run != NULL);
DCHECK(!thread_local_run->IsFull());
- DCHECK_NE(thread_local_run->is_thread_local_, 0);
+ DCHECK(thread_local_run->IsThreadLocal());
slot_addr = thread_local_run->AllocSlot();
// Must succeed now with a new run.
DCHECK(slot_addr != NULL);
@@ -644,7 +675,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
}
DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
- current_run->is_thread_local_ = 0;
+ current_run->SetIsThreadLocal(false);
current_runs_[idx] = current_run;
DCHECK(!current_run->IsFull());
}
@@ -671,7 +702,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
DCHECK(current_run != NULL);
DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
- current_run->is_thread_local_ = 0;
+ current_run->SetIsThreadLocal(false);
current_runs_[idx] = current_run;
DCHECK(!current_run->IsFull());
slot_addr = current_run->AllocSlot();
@@ -684,27 +715,27 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
<< "(" << std::dec << (bracket_size) << ")";
}
}
- if (LIKELY(bytes_allocated != NULL)) {
- *bytes_allocated = bracket_size;
- }
- memset(slot_addr, 0, size);
+ DCHECK(bytes_allocated != nullptr);
+ *bytes_allocated = bracket_size;
+ // Caller verifies that it is all 0.
return slot_addr;
}
-void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
+size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
DCHECK_EQ(run->magic_num_, kMagicNum);
DCHECK_LT(run, ptr);
DCHECK_LT(ptr, run->End());
- size_t idx = run->size_bracket_idx_;
- MutexLock mu(self, *size_bracket_locks_[idx]);
+ const size_t idx = run->size_bracket_idx_;
+ const size_t bracket_size = bracketSizes[idx];
bool run_was_full = false;
+ MutexLock mu(self, *size_bracket_locks_[idx]);
if (kIsDebugBuild) {
run_was_full = run->IsFull();
}
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
}
- if (LIKELY(run->is_thread_local_ != 0)) {
+ if (LIKELY(run->IsThreadLocal())) {
// It's a thread-local run. Just mark the thread-local free bit map and return.
DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
@@ -715,7 +746,7 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
<< reinterpret_cast<intptr_t>(run);
}
// A thread local run will be kept as a thread local even if it's become all free.
- return;
+ return bracket_size;
}
// Free the slot in the run.
run->FreeSlot(ptr);
@@ -735,9 +766,10 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
}
DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+ run->ZeroHeader();
{
MutexLock mu(self, lock_);
- FreePages(self, run);
+ FreePages(self, run, true);
}
} else {
// It is not completely free. If it wasn't the current run or
@@ -767,6 +799,7 @@ void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
}
}
}
+ return bracket_size;
}
std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
@@ -792,7 +825,7 @@ std::string RosAlloc::Run::Dump() {
<< " size_bracket_idx=" << idx
<< " is_thread_local=" << static_cast<int>(is_thread_local_)
<< " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
- << " top_slot_idx=" << top_slot_idx_
+ << " first_search_vec_idx=" << first_search_vec_idx_
<< " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
<< " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
<< " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
@@ -800,64 +833,52 @@ std::string RosAlloc::Run::Dump() {
return stream.str();
}
-void* RosAlloc::Run::AllocSlot() {
- size_t idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- DCHECK_LE(top_slot_idx_, num_slots);
- if (LIKELY(top_slot_idx_ < num_slots)) {
- // If it's in bump index mode, grab the top slot and increment the top index.
- size_t slot_idx = top_slot_idx_;
- byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
- if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
- << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
- }
- top_slot_idx_++;
- size_t vec_idx = slot_idx / 32;
- size_t vec_off = slot_idx % 32;
- uint32_t* vec = &alloc_bit_map_[vec_idx];
- DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
- *vec |= 1 << vec_off;
- DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
- return slot_addr;
- }
- // Not in bump index mode. Search the alloc bit map for an empty slot.
- size_t num_vec = RoundUp(num_slots, 32) / 32;
- size_t slot_idx = 0;
- bool found_slot = false;
- for (size_t v = 0; v < num_vec; v++) {
- uint32_t *vecp = &alloc_bit_map_[v];
- uint32_t ffz1 = __builtin_ffs(~*vecp);
- uint32_t ffz;
- // TODO: Use LIKELY or UNLIKELY here?
- if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) {
+inline void* RosAlloc::Run::AllocSlot() {
+ const size_t idx = size_bracket_idx_;
+ while (true) {
+ if (kIsDebugBuild) {
+ // Make sure that no slots leaked, the bitmap should be full for all previous vectors.
+ for (size_t i = 0; i < first_search_vec_idx_; ++i) {
+ CHECK_EQ(~alloc_bit_map_[i], 0U);
+ }
+ }
+ uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
+ uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
+ if (LIKELY(ffz1 != 0)) {
+ const uint32_t ffz = ffz1 - 1;
+ const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
+ const uint32_t mask = 1U << ffz;
+ DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
// Found an empty slot. Set the bit.
- DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
- *vecp |= (1 << ffz);
- DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
- slot_idx = ffz + v * 32;
- found_slot = true;
- break;
+ DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
+ *alloc_bitmap_ptr |= mask;
+ DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
+ byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+ if (kTraceRosAlloc) {
+ LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+ << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+ }
+ return slot_addr;
}
- }
- if (LIKELY(found_slot)) {
- byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
- if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
- << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+ const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
+ if (first_search_vec_idx_ + 1 >= num_words) {
+ DCHECK(IsFull());
+ // Already at the last word, return null.
+ return nullptr;
}
- return slot_addr;
+ // Increase the index to the next word and try again.
+ ++first_search_vec_idx_;
}
- return NULL;
}
-inline void RosAlloc::Run::FreeSlot(void* ptr) {
- DCHECK_EQ(is_thread_local_, 0);
- byte idx = size_bracket_idx_;
- size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+void RosAlloc::Run::FreeSlot(void* ptr) {
+ DCHECK(!IsThreadLocal());
+ const byte idx = size_bracket_idx_;
+ const size_t bracket_size = bracketSizes[idx];
+ const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
- (reinterpret_cast<byte*>(this) + headerSizes[idx]);
- DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
- size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+ DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
+ size_t slot_idx = offset_from_slot_base / bracket_size;
DCHECK_LT(slot_idx, numOfSlots[idx]);
size_t vec_idx = slot_idx / 32;
if (kIsDebugBuild) {
@@ -866,9 +887,14 @@ inline void RosAlloc::Run::FreeSlot(void* ptr) {
}
size_t vec_off = slot_idx % 32;
uint32_t* vec = &alloc_bit_map_[vec_idx];
- DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
- *vec &= ~(1 << vec_off);
- DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+ first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
+ const uint32_t mask = 1U << vec_off;
+ DCHECK_NE(*vec & mask, 0U);
+ *vec &= ~mask;
+ DCHECK_EQ(*vec & mask, 0U);
+ // Zero out the memory.
+ // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
+ memset(ptr, 0, bracket_size);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
<< ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
@@ -876,11 +902,11 @@ inline void RosAlloc::Run::FreeSlot(void* ptr) {
}
inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
- DCHECK_NE(is_thread_local_, 0);
+ DCHECK(IsThreadLocal());
// Free slots in the alloc bit map based on the thread local free bit map.
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
+ const size_t idx = size_bracket_idx_;
+ const size_t num_of_slots = numOfSlots[idx];
+ const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
bool changed = false;
uint32_t* vecp = &alloc_bit_map_[0];
uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
@@ -890,6 +916,7 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_
uint32_t vec_before = *vecp;
uint32_t vec_after;
if (tl_free_vec != 0) {
+ first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
vec_after = vec_before & ~tl_free_vec;
*vecp = vec_after;
changed = true;
@@ -898,7 +925,13 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_
vec_after = vec_before;
}
if (vec_after != 0) {
- is_all_free_after = false;
+ if (v == num_vec - 1) {
+ // Only not all free if a bit other than the mask bits are set.
+ is_all_free_after =
+ is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
+ } else {
+ is_all_free_after = false;
+ }
}
DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
}
@@ -909,16 +942,15 @@ inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_
}
inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
- DCHECK_EQ(is_thread_local_, 0);
+ DCHECK(!IsThreadLocal());
// Free slots in the alloc bit map based on the bulk free bit map.
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
+ const size_t num_vec = NumberOfBitmapVectors();
uint32_t* vecp = &alloc_bit_map_[0];
uint32_t* free_vecp = &BulkFreeBitMap()[0];
for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
uint32_t free_vec = *free_vecp;
if (free_vec != 0) {
+ first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
*vecp &= ~free_vec;
*free_vecp = 0; // clear the bulk free bit map.
}
@@ -927,11 +959,9 @@ inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
}
inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
- DCHECK_NE(is_thread_local_, 0);
+ DCHECK(IsThreadLocal());
// Union the thread local bit map with the bulk free bit map.
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
+ size_t num_vec = NumberOfBitmapVectors();
uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
uint32_t* from_vecp = &BulkFreeBitMap()[0];
for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
@@ -945,66 +975,71 @@ inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
}
inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
- DCHECK_NE(is_thread_local_, 0);
+ DCHECK(IsThreadLocal());
MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
}
-inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
- MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
+inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
+ return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
}
-inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
- const char* caller_name) {
- byte idx = size_bracket_idx_;
- size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
+ const char* caller_name) {
+ const byte idx = size_bracket_idx_;
+ const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
- (reinterpret_cast<byte*>(this) + headerSizes[idx]);
- DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
- size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+ const size_t bracket_size = bracketSizes[idx];
+ memset(ptr, 0, bracket_size);
+ DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
+ size_t slot_idx = offset_from_slot_base / bracket_size;
DCHECK_LT(slot_idx, numOfSlots[idx]);
size_t vec_idx = slot_idx / 32;
if (kIsDebugBuild) {
- size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+ size_t num_vec = NumberOfBitmapVectors();
DCHECK_LT(vec_idx, num_vec);
}
size_t vec_off = slot_idx % 32;
uint32_t* vec = &free_bit_map_base[vec_idx];
- DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
- *vec |= 1 << vec_off;
- DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+ const uint32_t mask = 1U << vec_off;
+ DCHECK_EQ(*vec & mask, 0U);
+ *vec |= mask;
+ DCHECK_NE(*vec & mask, 0U);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
<< reinterpret_cast<intptr_t>(ptr)
<< ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
}
+ return bracket_size;
+}
+
+inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
+ const size_t kBitsPerVec = 32;
+ DCHECK_GE(num_slots * kBitsPerVec, num_vec);
+ size_t remain = num_vec * kBitsPerVec - num_slots;
+ DCHECK_NE(remain, kBitsPerVec);
+ return ((1U << remain) - 1) << (kBitsPerVec - remain);
}
inline bool RosAlloc::Run::IsAllFree() {
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
- for (size_t v = 0; v < num_vec; v++) {
+ const byte idx = size_bracket_idx_;
+ const size_t num_slots = numOfSlots[idx];
+ const size_t num_vec = NumberOfBitmapVectors();
+ DCHECK_NE(num_vec, 0U);
+ // Check the last vector after the loop since it uses a special case for the masked bits.
+ for (size_t v = 0; v < num_vec - 1; v++) {
uint32_t vec = alloc_bit_map_[v];
if (vec != 0) {
return false;
}
}
- return true;
+ // Make sure the last word is equal to the mask, all other bits must be 0.
+ return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
}
inline bool RosAlloc::Run::IsFull() {
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
- size_t slots = 0;
- for (size_t v = 0; v < num_vec; v++, slots += 32) {
- DCHECK_GE(num_slots, slots);
- uint32_t vec = alloc_bit_map_[v];
- uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1)
- : (1 << (num_slots - slots)) - 1;
- if ((num_slots - slots) >= 32) {
- DCHECK_EQ(mask, static_cast<uint32_t>(-1));
- }
- if (vec != mask) {
+ const size_t num_vec = NumberOfBitmapVectors();
+ for (size_t v = 0; v < num_vec; ++v) {
+ if (~alloc_bit_map_[v] != 0) {
return false;
}
}
@@ -1012,9 +1047,7 @@ inline bool RosAlloc::Run::IsFull() {
}
inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
+ const size_t num_vec = NumberOfBitmapVectors();
for (size_t v = 0; v < num_vec; v++) {
uint32_t vec = BulkFreeBitMap()[v];
if (vec != 0) {
@@ -1025,9 +1058,7 @@ inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
}
inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
+ const size_t num_vec = NumberOfBitmapVectors();
for (size_t v = 0; v < num_vec; v++) {
uint32_t vec = ThreadLocalFreeBitMap()[v];
if (vec != 0) {
@@ -1037,11 +1068,31 @@ inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
return true;
}
-inline void RosAlloc::Run::ClearBitMaps() {
- byte idx = size_bracket_idx_;
- size_t num_slots = numOfSlots[idx];
- size_t num_vec = RoundUp(num_slots, 32) / 32;
- memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3);
+inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
+ const size_t idx = size_bracket_idx_;
+ const size_t num_slots = numOfSlots[idx];
+ const size_t num_vec = RoundUp(num_slots, 32) / 32;
+ DCHECK_NE(num_vec, 0U);
+ // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
+ // don't represent valid slots.
+ alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
+}
+
+inline void RosAlloc::Run::ZeroHeader() {
+ const byte idx = size_bracket_idx_;
+ memset(this, 0, headerSizes[idx]);
+}
+
+inline void RosAlloc::Run::ZeroData() {
+ const byte idx = size_bracket_idx_;
+ byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx];
+ memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
+}
+
+inline void RosAlloc::Run::FillAllocBitMap() {
+ size_t num_vec = NumberOfBitmapVectors();
+ memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
+ first_search_vec_idx_ = num_vec - 1; // No free bits in any of the bitmap words.
}
void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
@@ -1073,7 +1124,7 @@ void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size
// lock for better performance, assuming that the existence of an
// allocated chunk/pointer being freed in BulkFree() guarantees that
// the page map entry won't change. Disabled for now.
-static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = false;
+static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
size_t freed_bytes = 0;
@@ -1096,11 +1147,10 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
#endif
for (size_t i = 0; i < num_ptrs; i++) {
void* ptr = ptrs[i];
- ptrs[i] = NULL;
DCHECK_LE(base_, ptr);
DCHECK_LT(ptr, base_ + footprint_);
size_t pm_idx = RoundDownToPageMapIndex(ptr);
- Run* run = NULL;
+ Run* run = nullptr;
if (kReadPageMapEntryWithoutLockInBulkFree) {
// Read the page map entries without locking the lock.
byte page_map_entry = page_map_[pm_idx];
@@ -1111,106 +1161,74 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
}
if (LIKELY(page_map_entry == kPageMapRun)) {
run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
- DCHECK_EQ(run->magic_num_, kMagicNum);
} else if (LIKELY(page_map_entry == kPageMapRunPart)) {
size_t pi = pm_idx;
- DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
// Find the beginning of the run.
- while (page_map_[pi] != kPageMapRun) {
- pi--;
+ do {
+ --pi;
DCHECK_LT(pi, capacity_ / kPageSize);
- }
- DCHECK_EQ(page_map_[pi], kPageMapRun);
+ } while (page_map_[pi] != kPageMapRun);
run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
- DCHECK_EQ(run->magic_num_, kMagicNum);
} else if (page_map_entry == kPageMapLargeObject) {
MutexLock mu(self, lock_);
- freed_bytes += FreePages(self, ptr) * kPageSize;
+ freed_bytes += FreePages(self, ptr, false);
continue;
} else {
LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
}
- DCHECK(run != nullptr);
- // Set the bit in the bulk free bit map.
- run->MarkBulkFreeBitMap(ptr);
- freed_bytes += IndexToBracketSize(run->size_bracket_idx_);
-#ifdef HAVE_ANDROID_OS
- if (!run->to_be_bulk_freed_) {
- run->to_be_bulk_freed_ = true;
- runs.push_back(run);
- }
-#else
- runs.insert(run);
-#endif
} else {
// Read the page map entries with a lock.
- bool free_from_run = false;
- {
- MutexLock mu(self, lock_);
- DCHECK_LT(pm_idx, page_map_size_);
- byte page_map_entry = page_map_[pm_idx];
- if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
- << std::dec << pm_idx
- << ", page_map_entry=" << static_cast<int>(page_map_entry);
- }
- if (LIKELY(page_map_entry == kPageMapRun)) {
- free_from_run = true;
- run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
- DCHECK_EQ(run->magic_num_, kMagicNum);
- } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
- free_from_run = true;
- size_t pi = pm_idx;
- DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
- // Find the beginning of the run.
- while (page_map_[pi] != kPageMapRun) {
- pi--;
- DCHECK_LT(pi, capacity_ / kPageSize);
- }
- DCHECK_EQ(page_map_[pi], kPageMapRun);
- run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
- DCHECK_EQ(run->magic_num_, kMagicNum);
- } else if (page_map_entry == kPageMapLargeObject) {
- freed_bytes += FreePages(self, ptr) * kPageSize;
- } else {
- LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
- }
+ MutexLock mu(self, lock_);
+ DCHECK_LT(pm_idx, page_map_size_);
+ byte page_map_entry = page_map_[pm_idx];
+ if (kTraceRosAlloc) {
+ LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
+ << std::dec << pm_idx
+ << ", page_map_entry=" << static_cast<int>(page_map_entry);
+ }
+ if (LIKELY(page_map_entry == kPageMapRun)) {
+ run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+ } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
+ size_t pi = pm_idx;
+ // Find the beginning of the run.
+ do {
+ --pi;
+ DCHECK_LT(pi, capacity_ / kPageSize);
+ } while (page_map_[pi] != kPageMapRun);
+ run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+ } else if (page_map_entry == kPageMapLargeObject) {
+ freed_bytes += FreePages(self, ptr, false);
+ continue;
+ } else {
+ LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
}
- if (LIKELY(free_from_run)) {
- DCHECK(run != NULL);
- // Set the bit in the bulk free bit map.
- run->MarkBulkFreeBitMap(ptr);
- freed_bytes += IndexToBracketSize(run->size_bracket_idx_);
+ }
+ DCHECK(run != nullptr);
+ DCHECK_EQ(run->magic_num_, kMagicNum);
+ // Set the bit in the bulk free bit map.
+ freed_bytes += run->MarkBulkFreeBitMap(ptr);
#ifdef HAVE_ANDROID_OS
- if (!run->to_be_bulk_freed_) {
- run->to_be_bulk_freed_ = true;
- runs.push_back(run);
- }
+ if (!run->to_be_bulk_freed_) {
+ run->to_be_bulk_freed_ = true;
+ runs.push_back(run);
+ }
#else
- runs.insert(run);
+ runs.insert(run);
#endif
- }
- }
}
// Now, iterate over the affected runs and update the alloc bit map
// based on the bulk free bit map (for non-thread-local runs) and
// union the bulk free bit map into the thread-local free bit map
// (for thread-local runs.)
-#ifdef HAVE_ANDROID_OS
- typedef std::vector<Run*>::iterator It;
-#else
- typedef hash_set<Run*, hash_run, eq_run>::iterator It;
-#endif
- for (It it = runs.begin(); it != runs.end(); ++it) {
- Run* run = *it;
+ for (Run* run : runs) {
#ifdef HAVE_ANDROID_OS
DCHECK(run->to_be_bulk_freed_);
run->to_be_bulk_freed_ = false;
#endif
size_t idx = run->size_bracket_idx_;
MutexLock mu(self, *size_bracket_locks_[idx]);
- if (run->is_thread_local_ != 0) {
+ if (run->IsThreadLocal()) {
DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
@@ -1219,7 +1237,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
<< std::hex << reinterpret_cast<intptr_t>(run);
}
- DCHECK_NE(run->is_thread_local_, 0);
+ DCHECK(run->IsThreadLocal());
// A thread local run will be kept as a thread local even if
// it's become all free.
} else {
@@ -1269,8 +1287,9 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
DCHECK(non_full_runs->find(run) == non_full_runs->end());
}
if (!run_was_current) {
+ run->ZeroHeader();
MutexLock mu(self, lock_);
- FreePages(self, run);
+ FreePages(self, run, true);
}
} else {
// It is not completely free. If it wasn't the current run or
@@ -1381,7 +1400,7 @@ std::string RosAlloc::DumpPageMap() {
stream << "[" << i << "]=Run (start)"
<< " idx=" << idx
<< " numOfPages=" << numOfPages[idx]
- << " thread_local=" << static_cast<int>(run->is_thread_local_)
+ << " is_thread_local=" << run->is_thread_local_
<< " is_all_free=" << (run->IsAllFree() ? 1 : 0)
<< std::endl;
break;
@@ -1556,6 +1575,8 @@ void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_by
// The start of a run.
Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
DCHECK_EQ(run->magic_num_, kMagicNum);
+ // The dedicated full run doesn't contain any real allocations, don't visit the slots in
+ // there.
run->InspectAllSlots(handler, arg);
size_t num_pages = numOfPages[run->size_bracket_idx_];
if (kIsDebugBuild) {
@@ -1605,14 +1626,16 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
MutexLock mu(self, *size_bracket_locks_[idx]);
Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
- if (thread_local_run != NULL) {
+ CHECK(thread_local_run != nullptr);
+ // Invalid means already revoked.
+ DCHECK(thread_local_run->IsThreadLocal());
+ if (thread_local_run != dedicated_full_run_) {
+ thread->SetRosAllocRun(idx, dedicated_full_run_);
DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
- DCHECK_NE(thread_local_run->is_thread_local_, 0);
- thread->SetRosAllocRun(idx, nullptr);
// Note the thread local run may not be full here.
bool dont_care;
thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
- thread_local_run->is_thread_local_ = 0;
+ thread_local_run->SetIsThreadLocal(false);
thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
@@ -1628,7 +1651,8 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
}
} else if (thread_local_run->IsAllFree()) {
MutexLock mu(self, lock_);
- FreePages(self, thread_local_run);
+ thread_local_run->ZeroHeader();
+ FreePages(self, thread_local_run, true);
} else {
non_full_runs_[idx].insert(thread_local_run);
DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
@@ -1648,9 +1672,8 @@ void RosAlloc::RevokeAllThreadLocalRuns() {
MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
- for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
- Thread* t = *it;
- RevokeThreadLocalRuns(t);
+ for (Thread* thread : thread_list) {
+ RevokeThreadLocalRuns(thread);
}
}
@@ -1662,7 +1685,7 @@ void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
MutexLock mu(self, *size_bracket_locks_[idx]);
Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
- DCHECK(thread_local_run == nullptr);
+ DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
}
}
}
@@ -1770,6 +1793,15 @@ void RosAlloc::Initialize() {
<< ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
}
}
+ // Fill the alloc bitmap so nobody can successfully allocate from it.
+ if (kIsDebugBuild) {
+ dedicated_full_run_->magic_num_ = kMagicNum;
+ }
+ // It doesn't matter which size bracket we use since the main goal is to have the allocation
+ // fail 100% of the time you attempt to allocate into the dedicated full run.
+ dedicated_full_run_->size_bracket_idx_ = 0;
+ dedicated_full_run_->FillAllocBitMap();
+ dedicated_full_run_->SetIsThreadLocal(true);
}
void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
@@ -1867,6 +1899,7 @@ void RosAlloc::Verify() {
<< " and the run size : page index range " << i << " to " << (i + num_pages)
<< std::endl << DumpPageMap();
}
+ // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
runs.push_back(run);
i += num_pages;
CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
@@ -1891,34 +1924,25 @@ void RosAlloc::Verify() {
void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
- size_t idx = size_bracket_idx_;
+ const size_t idx = size_bracket_idx_;
CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
- size_t num_slots = numOfSlots[idx];
+ const size_t num_slots = numOfSlots[idx];
+ const size_t num_vec = RoundUp(num_slots, 32) / 32;
+ CHECK_GT(num_vec, 0U);
size_t bracket_size = IndexToBracketSize(idx);
CHECK_EQ(slot_base + num_slots * bracket_size,
reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
<< "Mismatch in the end address of the run " << Dump();
// Check that the bulk free bitmap is clean. It's only used during BulkFree().
CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
- // Check the bump index mode, if it's on.
- if (top_slot_idx_ < num_slots) {
- // If the bump index mode is on (top_slot_idx_ < num_slots), then
- // all of the slots after the top index must be free.
- for (size_t i = top_slot_idx_; i < num_slots; ++i) {
- size_t vec_idx = i / 32;
- size_t vec_off = i % 32;
- uint32_t vec = alloc_bit_map_[vec_idx];
- CHECK_EQ((vec & (1 << vec_off)), static_cast<uint32_t>(0))
- << "A slot >= top_slot_idx_ isn't free " << Dump();
- }
- } else {
- CHECK_EQ(top_slot_idx_, num_slots)
- << "If the bump index mode is off, the top index == the number of slots "
- << Dump();
- }
+ uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
+ // Make sure all the bits at the end of the run are set so that we don't allocate there.
+ CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
+ // Ensure that the first bitmap index is valid.
+ CHECK_LT(first_search_vec_idx_, num_vec);
// Check the thread local runs, the current runs, and the run sets.
- if (is_thread_local_) {
+ if (IsThreadLocal()) {
// If it's a thread local run, then it must be pointed to by an owner thread.
bool owner_found = false;
std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
@@ -1980,7 +2004,6 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
}
}
// Check each slot.
- size_t num_vec = RoundUp(num_slots, 32) / 32;
size_t slots = 0;
for (size_t v = 0; v < num_vec; v++, slots += 32) {
DCHECK_GE(num_slots, slots) << "Out of bounds";
@@ -1991,7 +2014,7 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
bool is_allocated = ((vec >> i) & 0x1) != 0;
// If a thread local run, slots may be marked freed in the
// thread local free bitmap.
- bool is_thread_local_freed = is_thread_local_ && ((thread_local_free_vec >> i) & 0x1) != 0;
+ bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
if (is_allocated && !is_thread_local_freed) {
byte* slot_addr = slot_base + (slots + i) * bracket_size;
mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 0c508b7..714230b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -155,7 +155,7 @@ class RosAlloc {
// +-------------------+
// | to_be_bulk_freed |
// +-------------------+
- // | top_slot_idx |
+ // | top_bitmap_idx |
// +-------------------+
// | |
// | alloc bit map |
@@ -186,12 +186,12 @@ class RosAlloc {
//
class Run {
public:
- byte magic_num_; // The magic number used for debugging.
- byte size_bracket_idx_; // The index of the size bracket of this run.
- byte is_thread_local_; // True if this run is used as a thread-local run.
- byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free.
- uint32_t top_slot_idx_; // The top slot index when this run is in bump index mode.
- uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
+ byte magic_num_; // The magic number used for debugging.
+ byte size_bracket_idx_; // The index of the size bracket of this run.
+ byte is_thread_local_; // True if this run is used as a thread-local run.
+ byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free.
+ uint32_t first_search_vec_idx_; // The index of the first bitmap vector which may contain an available slot.
+ uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
// bulk_free_bit_map_[] : The bit map that is used for GC to
// temporarily mark the slots to free without using a lock. After
@@ -225,6 +225,16 @@ class RosAlloc {
void* End() {
return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
}
+ // Returns the number of bitmap words per run.
+ size_t NumberOfBitmapVectors() const {
+ return RoundUp(numOfSlots[size_bracket_idx_], 32) / 32;
+ }
+ void SetIsThreadLocal(bool is_thread_local) {
+ is_thread_local_ = is_thread_local ? 1 : 0;
+ }
+ bool IsThreadLocal() const {
+ return is_thread_local_ != 0;
+ }
// Frees slots in the allocation bit map with regard to the
// thread-local free bit map. Used when a thread-local run becomes
// full.
@@ -243,10 +253,13 @@ class RosAlloc {
void* AllocSlot();
// Frees a slot in a run. This is used in a non-bulk free.
void FreeSlot(void* ptr);
- // Marks the slots to free in the bulk free bit map.
- void MarkBulkFreeBitMap(void* ptr);
+ // Marks the slots to free in the bulk free bit map. Returns the bracket size.
+ size_t MarkBulkFreeBitMap(void* ptr);
// Marks the slots to free in the thread-local free bit map.
void MarkThreadLocalFreeBitMap(void* ptr);
+ // Last word mask, all of the bits in the last word which aren't valid slots are set to
+ // optimize allocation path.
+ static size_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec);
// Returns true if all the slots in the run are not in use.
bool IsAllFree();
// Returns true if all the slots in the run are in use.
@@ -255,8 +268,14 @@ class RosAlloc {
bool IsBulkFreeBitmapClean();
// Returns true if the thread local free bit map is clean.
bool IsThreadLocalFreeBitmapClean();
- // Clear all the bit maps.
- void ClearBitMaps();
+ // Set the alloc_bit_map_ bits for slots that are past the end of the run.
+ void SetAllocBitMapBitsForInvalidSlots();
+ // Zero the run's data.
+ void ZeroData();
+ // Zero the run's header.
+ void ZeroHeader();
+ // Fill the alloc bitmap with 1s.
+ void FillAllocBitMap();
// Iterate over all the slots and apply the given function.
void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
// Dump the run metadata for debugging.
@@ -267,8 +286,9 @@ class RosAlloc {
EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
private:
- // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
- void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+ // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap(). Returns the bracket
+ // size.
+ size_t MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
// Turns the bit map into a string for debugging.
static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec);
};
@@ -376,7 +396,7 @@ class RosAlloc {
return byte_offset / kPageSize;
}
// Returns the page map index from an address with rounding.
- size_t RoundDownToPageMapIndex(void* addr) {
+ size_t RoundDownToPageMapIndex(void* addr) const {
DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
}
@@ -446,12 +466,19 @@ class RosAlloc {
hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
// The set of free pages.
std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+ // The dedicated full run, it is always full and shared by all threads when revoking happens.
+ // This is an optimization since enables us to avoid a null check for revoked runs.
+ static Run* dedicated_full_run_;
+ // Using size_t to ensure that it is at least word aligned.
+ static size_t dedicated_full_run_storage_[];
// The current runs where the allocations are first attempted for
// the size brackes that do not use thread-local
// runs. current_runs_[i] is guarded by size_bracket_locks_[i].
Run* current_runs_[kNumOfSizeBrackets];
// The mutexes, one per size bracket.
Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+ // Bracket lock names (since locks only have char* names).
+ std::string size_bracket_lock_names[kNumOfSizeBrackets];
// The types of page map entries.
enum {
kPageMapEmpty = 0, // Not allocated.
@@ -493,15 +520,19 @@ class RosAlloc {
// Page-granularity alloc/free
void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
- // Returns how many pages were freed.
- size_t FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ // Returns how many bytes were freed.
+ size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Allocate/free a run slot.
void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
LOCKS_EXCLUDED(lock_);
- void FreeFromRun(Thread* self, void* ptr, Run* run)
+ // Returns the bracket size.
+ size_t FreeFromRun(Thread* self, void* ptr, Run* run)
LOCKS_EXCLUDED(lock_);
+ // Used to allocate a new thread local run for a size bracket.
+ Run* AllocRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
// Used to acquire a new/reused run for a size bracket. Used when a
// thread-local or current run gets full.
Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
@@ -558,6 +589,9 @@ class RosAlloc {
void AssertAllThreadLocalRunsAreRevoked() LOCKS_EXCLUDED(Locks::thread_list_lock_);
// Dumps the page map for debugging.
std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ static Run* GetDedicatedFullRun() {
+ return dedicated_full_run_;
+ }
// Callbacks for InspectAll that will count the number of bytes
// allocated and objects allocated, respectively.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 517c748..5d72bc1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -78,9 +78,9 @@ static constexpr size_t kGcAlotInterval = KB;
static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB;
// Sticky GC throughput adjustment, divided by 4. Increasing this causes sticky GC to occur more
-// relative to partial/full GC. This is desirable since sticky GCs interfere less with mutator
+// relative to partial/full GC. This may be desirable since sticky GCs interfere less with mutator
// threads (lower pauses, use less memory bandwidth).
-static constexpr double kStickyGcThroughputAdjustment = 1.25;
+static constexpr double kStickyGcThroughputAdjustment = 1.0;
// Whether or not we use the free list large object space.
static constexpr bool kUseFreeListSpaceForLOS = false;
// Whtehr or not we compact the zygote in PreZygoteFork.
@@ -595,6 +595,11 @@ void Heap::AddSpace(space::Space* space, bool set_as_default) {
if (continuous_space->IsDlMallocSpace()) {
dlmalloc_space_ = continuous_space->AsDlMallocSpace();
} else if (continuous_space->IsRosAllocSpace()) {
+ // Revoke before if we already have a rosalloc_space_ so that we don't end up with non full
+ // runs from the previous one during the revoke after.
+ if (rosalloc_space_ != nullptr) {
+ rosalloc_space_->RevokeAllThreadLocalBuffers();
+ }
rosalloc_space_ = continuous_space->AsRosAllocSpace();
}
}
@@ -615,7 +620,7 @@ void Heap::AddSpace(space::Space* space, bool set_as_default) {
}
}
-void Heap::RemoveSpace(space::Space* space) {
+void Heap::RemoveSpace(space::Space* space, bool unset_as_default) {
DCHECK(space != nullptr);
WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
if (space->IsContinuousSpace()) {
@@ -632,17 +637,19 @@ void Heap::RemoveSpace(space::Space* space) {
auto it = std::find(continuous_spaces_.begin(), continuous_spaces_.end(), continuous_space);
DCHECK(it != continuous_spaces_.end());
continuous_spaces_.erase(it);
- if (continuous_space == dlmalloc_space_) {
- dlmalloc_space_ = nullptr;
- } else if (continuous_space == rosalloc_space_) {
- rosalloc_space_ = nullptr;
- }
- if (continuous_space == main_space_) {
- main_space_ = nullptr;
- } else if (continuous_space == bump_pointer_space_) {
- bump_pointer_space_ = nullptr;
- } else if (continuous_space == temp_space_) {
- temp_space_ = nullptr;
+ if (unset_as_default) {
+ if (continuous_space == dlmalloc_space_) {
+ dlmalloc_space_ = nullptr;
+ } else if (continuous_space == rosalloc_space_) {
+ rosalloc_space_ = nullptr;
+ }
+ if (continuous_space == main_space_) {
+ main_space_ = nullptr;
+ } else if (continuous_space == bump_pointer_space_) {
+ bump_pointer_space_ = nullptr;
+ } else if (continuous_space == temp_space_) {
+ temp_space_ = nullptr;
+ }
}
} else {
DCHECK(space->IsDiscontinuousSpace());
@@ -725,6 +732,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) {
os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n";
os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n";
os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_;
+ BaseMutex::DumpAll(os);
}
Heap::~Heap() {
@@ -1457,6 +1465,10 @@ void Heap::TransitionCollector(CollectorType collector_type) {
// pointer space last transition it will be protected.
bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
Compact(bump_pointer_space_, main_space_);
+ // Remove the main space so that we don't try to trim it, this doens't work for debug
+ // builds since RosAlloc attempts to read the magic number from a protected page.
+ // TODO: Clean this up by getting rid of the remove_as_default parameter.
+ RemoveSpace(main_space_, false);
}
break;
}
@@ -1465,6 +1477,7 @@ void Heap::TransitionCollector(CollectorType collector_type) {
case kCollectorTypeCMS: {
if (IsMovingGc(collector_type_)) {
// Compact to the main space from the bump pointer space, don't need to swap semispaces.
+ AddSpace(main_space_, false);
main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
Compact(main_space_, bump_pointer_space_);
}
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index ceba8b6..c37bb05 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -289,6 +289,12 @@ class Heap {
void RegisterGCAllocation(size_t bytes);
void RegisterGCDeAllocation(size_t bytes);
+ // Public due to usage by tests.
+ void AddSpace(space::Space* space, bool set_as_default = true)
+ LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+ void RemoveSpace(space::Space* space, bool unset_as_default = true)
+ LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+
// Set target ideal heap utilization ratio, implements
// dalvik.system.VMRuntime.setTargetHeapUtilization.
void SetTargetHeapUtilization(float target);
@@ -684,10 +690,6 @@ class Heap {
size_t GetPercentFree();
- void AddSpace(space::Space* space, bool set_as_default = true)
- LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
- void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-
static void VerificationCallback(mirror::Object* obj, void* arg)
SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index a5a6da0..f5c0e94 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -32,7 +32,7 @@ namespace art {
namespace gc {
namespace space {
-static constexpr bool kPrefetchDuringRosAllocFreeList = true;
+static constexpr bool kPrefetchDuringRosAllocFreeList = false;
static constexpr size_t kPrefetchLookAhead = 8;
// Use this only for verification, it is not safe to use since the class of the object may have
// been freed.
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 9896a48..28200df 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -39,10 +39,8 @@ class SpaceTest : public CommonRuntimeTest {
}
void AddSpace(ContinuousSpace* space) {
- // For RosAlloc, revoke the thread local runs before moving onto a
- // new alloc space.
- Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers();
- Runtime::Current()->GetHeap()->AddSpace(space);
+ // By passing true, AddSpace() does the revoke.
+ Runtime::Current()->GetHeap()->AddSpace(space, true);
}
mirror::Class* GetByteArrayClass(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -349,11 +347,8 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
EXPECT_EQ(usable_size, computed_usable_size);
}
- // Release memory and check pointers are nullptr.
+ // Release memory.
space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
- for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
- EXPECT_TRUE(lots_of_objects[i] == nullptr);
- }
// Succeeds, fits by adjusting the max allowed footprint.
for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
@@ -367,12 +362,8 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
EXPECT_EQ(usable_size, computed_usable_size);
}
- // Release memory and check pointers are nullptr
- // TODO: This isn't compaction safe, fix.
+ // Release memory.
space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
- for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
- EXPECT_TRUE(lots_of_objects[i] == nullptr);
- }
}
void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 095404f..771680b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1018,7 +1018,8 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte
tls32_.state_and_flags.as_struct.flags = 0;
tls32_.state_and_flags.as_struct.state = kNative;
memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
- memset(tlsPtr_.rosalloc_runs, 0, sizeof(tlsPtr_.rosalloc_runs));
+ std::fill(tlsPtr_.rosalloc_runs, tlsPtr_.rosalloc_runs + kRosAllocNumOfSizeBrackets,
+ gc::allocator::RosAlloc::GetDedicatedFullRun());
for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
tlsPtr_.checkpoint_functions[i] = nullptr;
}