diff options
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 490 | ||||
-rw-r--r-- | runtime/base/mutex.h | 1 | ||||
-rw-r--r-- | runtime/class_linker.cc | 66 | ||||
-rw-r--r-- | runtime/class_linker.h | 4 | ||||
-rw-r--r-- | runtime/common_throws.cc | 29 | ||||
-rw-r--r-- | runtime/common_throws.h | 3 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 107 | ||||
-rw-r--r-- | runtime/gc/allocator/rosalloc.cc | 55 | ||||
-rw-r--r-- | runtime/gc/allocator/rosalloc.h | 2 | ||||
-rw-r--r-- | runtime/gc/collector/garbage_collector.cc | 10 | ||||
-rw-r--r-- | runtime/gc/collector/garbage_collector.h | 3 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 41 | ||||
-rw-r--r-- | runtime/gc/space/rosalloc_space.cc | 6 | ||||
-rw-r--r-- | runtime/monitor.cc | 42 | ||||
-rw-r--r-- | runtime/monitor.h | 8 | ||||
-rw-r--r-- | runtime/native/dalvik_system_DexFile.cc | 114 | ||||
-rw-r--r-- | runtime/parsed_options.cc | 3 | ||||
-rw-r--r-- | runtime/parsed_options.h | 1 | ||||
-rw-r--r-- | runtime/profiler.cc | 99 | ||||
-rw-r--r-- | runtime/profiler.h | 51 | ||||
-rw-r--r-- | runtime/runtime.cc | 9 | ||||
-rw-r--r-- | runtime/runtime.h | 4 |
22 files changed, 713 insertions, 435 deletions
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 3082273..dd34583 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -60,26 +60,31 @@ // Callee saved. stp xSELF, x19, [sp, #264] + .cfi_rel_offset x18, 264 + .cfi_rel_offset x19, 272 + stp x20, x21, [sp, #280] + .cfi_rel_offset x20, 280 + .cfi_rel_offset x21, 288 + stp x22, x23, [sp, #296] + .cfi_rel_offset x22, 296 + .cfi_rel_offset x23, 304 + stp x24, x25, [sp, #312] + .cfi_rel_offset x24, 312 + .cfi_rel_offset x25, 320 + stp x26, x27, [sp, #328] + .cfi_rel_offset x26, 328 + .cfi_rel_offset x27, 336 + stp x28, xFP, [sp, #344] // Save FP. - str xLR, [sp, #360] + .cfi_rel_offset x28, 344 + .cfi_rel_offset x29, 352 - .cfi_offset x18,72 - .cfi_offset x19,80 - .cfi_offset x20,88 - .cfi_offset x21,96 - .cfi_offset x22,104 - .cfi_offset x23,112 - .cfi_offset x24,120 - .cfi_offset x25,128 - .cfi_offset x26,136 - .cfi_offset x27,144 - .cfi_offset x28,152 - .cfi_offset x29,160 - .cfi_offset x30,168 + str xLR, [sp, #360] + .cfi_rel_offset x30, 360 // Loads appropriate callee-save-method str x9, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] @@ -117,36 +122,44 @@ stp d14, d15, [sp, #128] stp x1, x2, [sp, #144] + .cfi_rel_offset x1, 144 + .cfi_rel_offset x2, 152 + stp x3, x4, [sp, #160] + .cfi_rel_offset x3, 160 + .cfi_rel_offset x4, 168 + stp x5, x6, [sp, #176] + .cfi_rel_offset x5, 176 + .cfi_rel_offset x6, 184 + stp x7, xSELF, [sp, #192] + .cfi_rel_offset x7, 192 + .cfi_rel_offset x18, 200 + stp x19, x20, [sp, #208] + .cfi_rel_offset x19, 208 + .cfi_rel_offset x20, 216 + stp x21, x22, [sp, #224] + .cfi_rel_offset x21, 224 + .cfi_rel_offset x22, 232 + stp x23, x24, [sp, #240] + .cfi_rel_offset x23, 240 + .cfi_rel_offset x24, 248 + stp x25, x26, [sp, #256] + .cfi_rel_offset x25, 256 + .cfi_rel_offset x26, 264 + stp x27, x28, [sp, #272] - stp xFP, xLR, [sp, #288] + .cfi_rel_offset x27, 272 + .cfi_rel_offset x28, 280 - .cfi_offset x1,144 - .cfi_offset x2,152 - .cfi_offset x3,160 - .cfi_offset x4,168 - .cfi_offset x5,176 - .cfi_offset x6,184 - .cfi_offset x7,192 - .cfi_offset x18,200 - .cfi_offset x19,208 - .cfi_offset x20,216 - .cfi_offset x21,224 - .cfi_offset x22,232 - .cfi_offset x23,240 - .cfi_offset x24,248 - .cfi_offset x25,256 - .cfi_offset x26,264 - .cfi_offset x27,272 - .cfi_offset x28,280 - .cfi_offset x29,288 - .cfi_offset x30,296 + stp xFP, xLR, [sp, #288] + .cfi_rel_offset x29, 288 + .cfi_rel_offset x30, 296 .endm /* @@ -183,15 +196,44 @@ // args. ldp x1, x2, [sp, #144] + .cfi_restore x1 + .cfi_restore x2 + ldp x3, x4, [sp, #160] + .cfi_restore x3 + .cfi_restore x4 + ldp x5, x6, [sp, #176] + .cfi_restore x5 + .cfi_restore x6 + ldp x7, xSELF, [sp, #192] + .cfi_restore x7 + .cfi_restore x18 + ldp x19, x20, [sp, #208] + .cfi_restore x19 + .cfi_restore x20 + ldp x21, x22, [sp, #224] + .cfi_restore x21 + .cfi_restore x22 + ldp x23, x24, [sp, #240] + .cfi_restore x23 + .cfi_restore x24 + ldp x25, x26, [sp, #256] + .cfi_restore x25 + .cfi_restore x26 + ldp x27, x28, [sp, #272] + .cfi_restore x27 + .cfi_restore x28 + ldp xFP, xLR, [sp, #288] + .cfi_restore x29 + .cfi_restore x30 add sp, sp, #304 .cfi_adjust_cfa_offset -304 @@ -210,15 +252,44 @@ // args. ldp x1, x2, [sp, #144] + .cfi_restore x1 + .cfi_restore x2 + ldp x3, x4, [sp, #160] + .cfi_restore x3 + .cfi_restore x4 + ldp x5, x6, [sp, #176] + .cfi_restore x5 + .cfi_restore x6 + ldp x7, xSELF, [sp, #192] + .cfi_restore x7 + .cfi_restore x18 + ldp x19, x20, [sp, #208] + .cfi_restore x19 + .cfi_restore x20 + ldp x21, x22, [sp, #224] + .cfi_restore x21 + .cfi_restore x22 + ldp x23, x24, [sp, #240] + .cfi_restore x23 + .cfi_restore x24 + ldp x25, x26, [sp, #256] + .cfi_restore x25 + .cfi_restore x26 + ldp x27, x28, [sp, #272] + .cfi_restore x27 + .cfi_restore x28 + ldp xFP, xLR, [sp, #288] + .cfi_restore x29 + .cfi_restore x30 add sp, sp, #304 .cfi_adjust_cfa_offset -304 @@ -340,6 +411,113 @@ INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvok INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck + +.macro INVOKE_STUB_CREATE_FRAME + +SAVE_SIZE=5*8 // x4, x5, SP, LR & FP saved. +SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 + + mov x9, sp // Save stack pointer. + .cfi_register sp,x9 + + add x10, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame. + sub x10, sp, x10 // Calculate SP position - saves + ArtMethod* + args + and x10, x10, # ~0xf // Enforce 16 byte stack alignment. + mov sp, x10 // Set new SP. + + sub x10, x9, #SAVE_SIZE // Calculate new FP (later). Done here as we must move SP + .cfi_def_cfa_register x10 // before this. + .cfi_adjust_cfa_offset SAVE_SIZE + + str x9, [x10, #32] // Save old stack pointer. + .cfi_rel_offset sp, 32 + + stp x4, x5, [x10, #16] // Save result and shorty addresses. + .cfi_rel_offset x4, 16 + .cfi_rel_offset x5, 24 + + stp xFP, xLR, [x10] // Store LR & FP. + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + + mov xFP, x10 // Use xFP now, as it's callee-saved. + .cfi_def_cfa_register x29 + mov xSELF, x3 // Move thread pointer into SELF register. + + // Copy arguments into stack frame. + // Use simple copy routine for now. + // 4 bytes per slot. + // X1 - source address + // W2 - args length + // X9 - destination address. + // W10 - temporary + add x9, sp, #8 // Destination address is bottom of stack + NULL. + + // Use \@ to differentiate between macro invocations. +.LcopyParams\@: + cmp w2, #0 + beq .LendCopyParams\@ + sub w2, w2, #4 // Need 65536 bytes of range. + ldr w10, [x1, x2] + str w10, [x9, x2] + + b .LcopyParams\@ + +.LendCopyParams\@: + + // Store NULL into Method* at bottom of frame. + str xzr, [sp] + +.endm + +.macro INVOKE_STUB_CALL_AND_RETURN + + // load method-> METHOD_QUICK_CODE_OFFSET + ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET] + // Branch to method. + blr x9 + + // Restore return value address and shorty address. + ldp x4,x5, [xFP, #16] + .cfi_restore x4 + .cfi_restore x5 + + // Store result (w0/x0/s0/d0) appropriately, depending on resultType. + ldrb w10, [x5] + + // Don't set anything for a void type. + cmp w10, #'V' + beq .Lexit_art_quick_invoke_stub\@ + + cmp w10, #'D' + bne .Lreturn_is_float\@ + str d0, [x4] + b .Lexit_art_quick_invoke_stub\@ + +.Lreturn_is_float\@: + cmp w10, #'F' + bne .Lreturn_is_int\@ + str s0, [x4] + b .Lexit_art_quick_invoke_stub\@ + + // Just store x0. Doesn't matter if it is 64 or 32 bits. +.Lreturn_is_int\@: + str x0, [x4] + +.Lexit_art_quick_invoke_stub\@: + ldr x2, [x29, #32] // Restore stack pointer. + mov sp, x2 + .cfi_restore sp + + ldp x29, x30, [x29] // Restore old frame pointer and link register. + .cfi_restore x29 + .cfi_restore x30 + + ret + +.endm + + /* * extern"C" void art_quick_invoke_stub(ArtMethod *method, x0 * uint32_t *args, x1 @@ -377,63 +555,7 @@ INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvo */ ENTRY art_quick_invoke_stub // Spill registers as per AACPS64 calling convention. - -SAVE_SIZE=5*8 // x4, x5, LR & FP saved. -SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 - - mov x9, sp // Save stack pointer. - - mov x10, xFP // Save frame pointer - .cfi_register x29,x10 - add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame. - - sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* + args - - and x11, x11, # ~0xf // Enforce 16 byte stack alignment. - - sub xFP, x9, #SAVE_SIZE // Calculate new FP. Don't store here until SP moved. - .cfi_def_cfa_register x29 - - mov sp, x11 // set new SP. - - str x9, [xFP, #32] // Save old stack pointer. - - .cfi_offset x9, 32 - - stp x4, x5, [xFP, #16] // Save result and shorty addresses. - - .cfi_offset x4, 16 - .cfi_offset x5, 24 - - stp x10, xLR, [xFP] // Store lr & old fp @ fp - - .cfi_offset x30, 0 - .cfi_offset x10, 8 - - mov xSELF, x3 // Move thread pointer into SELF register. - - // Copy arguments into stack frame. - // Use simple copy routine for now. - // 4 bytes per slot. - // X1 - source address - // W2 - args length - // X10 - destination address. - add x9, sp, #8 // Destination address is bottom of stack + NULL. - - // w2 = argsize parameter. -.LcopyParams: - cmp w2, #0 - beq .LendCopyParams - sub w2, w2, #4 // Need 65536 bytes of range. - ldr w10, [x1, x2] - str w10, [x9, x2] - - b .LcopyParams - -.LendCopyParams: - - // Store NULL into Method* at bottom of frame. - str xzr, [sp] + INVOKE_STUB_CREATE_FRAME // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters. // Parse the passed shorty to determine which register to load. @@ -460,7 +582,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisDouble cmp x15, # 8*12 // Skip this load if all registers full. - beq .LfillRegisters + beq .Ladvance4 add x17, x13, x15 // Calculate subroutine to jump to. br x17 @@ -470,8 +592,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisLong cmp x15, # 8*12 // Skip this load if all registers full. - beq .LfillRegisters - + beq .Ladvance8 add x17, x14, x15 // Calculate subroutine to jump to. br x17 @@ -481,18 +602,26 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisOther cmp x8, # 6*12 // Skip this load if all registers full. - beq .LfillRegisters + beq .Ladvance8 add x17, x12, x8 // Calculate subroutine to jump to. br x17 - .LisOther: // Everything else takes one vReg. cmp x8, # 6*12 // Skip this load if all registers full. - beq .LfillRegisters + beq .Ladvance4 + add x17, x11, x8 // Calculate subroutine to jump to. br x17 +.Ladvance4: + add x9, x9, #4 + b .LfillRegisters + +.Ladvance8: + add x9, x9, #8 + b .LfillRegisters + // Macro for loading a parameter into a register. // counter - the register with offset into these tables // size - the size of the register - 4 or 8 bytes. @@ -546,48 +675,8 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 .LcallFunction: - // load method-> METHOD_QUICK_CODE_OFFSET - ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET] - // Branch to method. - blr x9 + INVOKE_STUB_CALL_AND_RETURN - // Restore return value address and shorty address. - ldp x4,x5, [xFP, #16] - .cfi_restore x4 - .cfi_restore x5 - - // Store result (w0/x0/s0/d0) appropriately, depending on resultType. - ldrb w10, [x5] - - // Don't set anything for a void type. - cmp w10, #'V' - beq .Lexit_art_quick_invoke_stub - - cmp w10, #'D' - bne .Lreturn_is_float - str d0, [x4] - b .Lexit_art_quick_invoke_stub - -.Lreturn_is_float: - cmp w10, #'F' - bne .Lreturn_is_int - str s0, [x4] - b .Lexit_art_quick_invoke_stub - - // Just store x0. Doesn't matter if it is 64 or 32 bits. -.Lreturn_is_int: - str x0, [x4] - -.Lexit_art_quick_invoke_stub: - ldr x2, [x29, #32] // Restore stack pointer. - mov sp, x2 - .cfi_restore sp - - ldp x29, x30, [x29] // Restore old frame pointer and link register. - .cfi_restore x29 - .cfi_restore x30 - - ret END art_quick_invoke_stub /* extern"C" @@ -600,64 +689,7 @@ END art_quick_invoke_stub */ ENTRY art_quick_invoke_static_stub // Spill registers as per AACPS64 calling convention. - -SAVE_SIZE=5*8 // x4, x5, SP, LR & FP saved -SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 - - mov x9, sp // Save stack pointer. - - mov x10, xFP // Save frame pointer - .cfi_register x29,x10 - add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame. - - sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* + args - - and x11, x11, # ~0xf // Enforce 16 byte stack alignment. - - sub xFP, x9, #SAVE_SIZE // Calculate new FP. Don't store here until SP moved. - - mov sp, x11 // set new SP. - - .cfi_def_cfa_register 29 - - str x9, [xFP, #32] // Save old stack pointer. - - .cfi_offset x9, 32 - - stp x4, x5, [xFP, #16] // Save result and shorty addresses. - - .cfi_offset x4, 16 - .cfi_offset x5, 24 - - stp x10, xLR, [x29] // Store lr & old fp @ fp - - .cfi_offset x30, 0 - .cfi_offset x10, 8 - - mov xSELF, x3 // Move thread pointer into SELF register. - - // Copy arguments into stack frame. - // Use simple copy routine for now. - // 4 bytes per slot. - // X1 - source address - // W2 - args length - // X10 - destination address. - add x9, sp, #8 // Destination address is bottom of stack + NULL. - - // w2 = argsize parameter. -.LcopyParams2: - cmp w2, #0 - beq .LendCopyParams2 - sub w2, w2, #4 // Need 65536 bytes of range. - ldr w10, [x1, x2] - str w10, [x9, x2] - - b .LcopyParams2 - -.LendCopyParams2: - - // Store NULL into Method* at bottom of frame. - str xzr, [sp] + INVOKE_STUB_CREATE_FRAME // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters. // Parse the passed shorty to determine which register to load. @@ -683,7 +715,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisDouble2 cmp x15, # 8*12 // Skip this load if all registers full. - beq .LfillRegisters2 + beq .Ladvance4_2 add x17, x13, x15 // Calculate subroutine to jump to. br x17 @@ -693,8 +725,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisLong2 cmp x15, # 8*12 // Skip this load if all registers full. - beq .LfillRegisters2 - + beq .Ladvance8_2 add x17, x14, x15 // Calculate subroutine to jump to. br x17 @@ -704,18 +735,26 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 bne .LisOther2 cmp x8, # 7*12 // Skip this load if all registers full. - beq .LfillRegisters2 + beq .Ladvance8_2 add x17, x12, x8 // Calculate subroutine to jump to. br x17 - .LisOther2: // Everything else takes one vReg. cmp x8, # 7*12 // Skip this load if all registers full. - beq .LfillRegisters2 + beq .Ladvance4_2 + add x17, x11, x8 // Calculate subroutine to jump to. br x17 +.Ladvance4_2: + add x9, x9, #4 + b .LfillRegisters2 + +.Ladvance8_2: + add x9, x9, #8 + b .LfillRegisters2 + // Store ints. .LstoreW1_2: LOADREG x8 4 w1 .LfillRegisters2 @@ -761,52 +800,11 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 .LcallFunction2: - // load method-> METHOD_QUICK_CODE_OFFSET. - ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET] - // Branch to method. - blr x9 - - // Restore return value address and shorty address. - ldp x4, x5, [xFP, #16] - .cfi_restore x4 - .cfi_restore x5 - - // Store result (w0/x0/s0/d0) appropriately, depending on resultType. - ldrb w10, [x5] - - // Don't set anything for a void type. - cmp w10, #'V' - beq .Lexit_art_quick_invoke_stub2 - - cmp w10, #'D' - bne .Lreturn_is_float2 - str d0, [x4] - b .Lexit_art_quick_invoke_stub2 + INVOKE_STUB_CALL_AND_RETURN -.Lreturn_is_float2: - cmp w10, #'F' - bne .Lreturn_is_int2 - str s0, [x4] - b .Lexit_art_quick_invoke_stub2 - - // Just store x0. Doesn't matter if it is 64 or 32 bits. -.Lreturn_is_int2: - str x0, [x4] - -.Lexit_art_quick_invoke_stub2: - - ldr x2, [xFP, #32] // Restore stack pointer. - mov sp, x2 - .cfi_restore sp - - ldp xFP, xLR, [xFP] // Restore old frame pointer and link register. - .cfi_restore x29 - .cfi_restore x30 - - ret END art_quick_invoke_static_stub -// UNIMPLEMENTED art_quick_do_long_jump + /* * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_ diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 4b881f6..b50c098 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -76,6 +76,7 @@ enum LockLevel { kClassLinkerClassesLock, kBreakpointLock, kMonitorLock, + kMonitorListLock, kThreadListLock, kBreakpointInvokeLock, kDeoptimizationLock, diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 78b7cc0..e690b30 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -692,7 +692,7 @@ class ScopedFlock { while (true) { file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR)); if (file_.get() == NULL) { - *error_msg = StringPrintf("Failed to open file '%s'", filename); + *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno)); return false; } int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX)); @@ -741,51 +741,57 @@ class ScopedFlock { const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(const char* dex_location, uint32_t dex_location_checksum, const char* oat_location, - std::string* error_msg) { + std::vector<std::string>* error_msgs) { // We play a locking game here so that if two different processes // race to generate (or worse, one tries to open a partial generated // file) we will be okay. This is actually common with apps that use // DexClassLoader to work around the dex method reference limit and // that have a background service running in a separate process. ScopedFlock scoped_flock; - if (!scoped_flock.Init(oat_location, error_msg)) { + std::string error_msg; + if (!scoped_flock.Init(oat_location, &error_msg)) { + error_msgs->push_back(error_msg); return nullptr; } // Check if we already have an up-to-date output file const DexFile* dex_file = FindDexFileInOatLocation(dex_location, dex_location_checksum, - oat_location, error_msg); + oat_location, &error_msg); if (dex_file != nullptr) { return dex_file; } - VLOG(class_linker) << "Failed to find dex file '" << dex_location << "' in oat location '" - << oat_location << "': " << *error_msg; - error_msg->clear(); + std::string compound_msg = StringPrintf("Failed to find dex file '%s' in oat location '%s': %s", + dex_location, oat_location, error_msg.c_str()); + VLOG(class_linker) << compound_msg; + error_msgs->push_back(compound_msg); // Generate the output oat file for the dex file VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location; - if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, error_msg)) { - CHECK(!error_msg->empty()); + if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, &error_msg)) { + CHECK(!error_msg.empty()); + error_msgs->push_back(error_msg); return nullptr; } const OatFile* oat_file = OatFile::Open(oat_location, oat_location, NULL, !Runtime::Current()->IsCompiler(), - error_msg); + &error_msg); if (oat_file == nullptr) { - *error_msg = StringPrintf("Failed to open generated oat file '%s': %s", - oat_location, error_msg->c_str()); + compound_msg = StringPrintf("\nFailed to open generated oat file '%s': %s", + oat_location, error_msg.c_str()); + error_msgs->push_back(compound_msg); return nullptr; } oat_file = RegisterOatFile(oat_file); const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, &dex_location_checksum); if (oat_dex_file == nullptr) { - *error_msg = StringPrintf("Failed to find dex file '%s' (checksum 0x%x) in generated out file " - "'%s'", dex_location, dex_location_checksum, oat_location); + error_msg = StringPrintf("\nFailed to find dex file '%s' (checksum 0x%x) in generated out file " + "'%s'", dex_location, dex_location_checksum, oat_location); + error_msgs->push_back(error_msg); return nullptr; } - const DexFile* result = oat_dex_file->OpenDexFile(error_msg); - CHECK(result != nullptr) << *error_msg; + const DexFile* result = oat_dex_file->OpenDexFile(&error_msg); + CHECK(result != nullptr) << error_msgs << ", " << error_msg; CHECK_EQ(dex_location_checksum, result->GetLocationChecksum()) << "dex_location=" << dex_location << " oat_location=" << oat_location << std::hex << " dex_location_checksum=" << dex_location_checksum @@ -880,27 +886,34 @@ const DexFile* ClassLinker::VerifyAndOpenDexFileFromOatFile(const std::string& o const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_location, const uint32_t* const dex_location_checksum, - std::string* error_msg) { + std::vector<std::string>* error_msgs) { const OatFile* open_oat_file = FindOpenedOatFileFromDexLocation(dex_location, dex_location_checksum); if (open_oat_file != nullptr) { const OatFile::OatDexFile* oat_dex_file = open_oat_file->GetOatDexFile(dex_location, dex_location_checksum); - return oat_dex_file->OpenDexFile(error_msg); + std::string error_msg; + const DexFile* ret = oat_dex_file->OpenDexFile(&error_msg); + if (ret == nullptr) { + error_msgs->push_back(error_msg); + } + return ret; } // Look for an existing file next to dex. for example, for // /foo/bar/baz.jar, look for /foo/bar/baz.odex. std::string odex_filename(OatFile::DexFilenameToOdexFilename(dex_location)); bool open_failed; + std::string error_msg; const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(odex_filename, dex_location, - error_msg, &open_failed); + &error_msg, &open_failed); if (dex_file != nullptr) { return dex_file; } if (dex_location_checksum == nullptr) { - *error_msg = StringPrintf("Failed to open oat file from %s and no classes.dex found in %s: %s", - odex_filename.c_str(), dex_location, error_msg->c_str()); + error_msgs->push_back(StringPrintf("Failed to open oat file from %s and no classes.dex found in" + "%s: %s", odex_filename.c_str(), dex_location, + error_msg.c_str())); return nullptr; } @@ -914,14 +927,15 @@ const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_ if (!open_failed && TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) { PLOG(FATAL) << "Failed to remove obsolete oat file from " << cache_location; } - VLOG(class_linker) << "Failed to open oat file from " << odex_filename - << " (error '" << *error_msg << "') or " << cache_location - << " (error '" << cache_error_msg << "')."; + std::string compound_msg = StringPrintf("Failed to open oat file from %s (error '%s') or %s " + "(error '%s').", odex_filename.c_str(), error_msg.c_str(), + cache_location.c_str(), cache_error_msg.c_str()); + VLOG(class_linker) << compound_msg; + error_msgs->push_back(compound_msg); // Try to generate oat file if it wasn't found or was obsolete. - error_msg->clear(); return FindOrCreateOatFileForDexLocation(dex_location, *dex_location_checksum, - cache_location.c_str(), error_msg); + cache_location.c_str(), error_msgs); } const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) { diff --git a/runtime/class_linker.h b/runtime/class_linker.h index 701e62e..d684ad5 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -273,14 +273,14 @@ class ClassLinker { const DexFile* FindOrCreateOatFileForDexLocation(const char* dex_location, uint32_t dex_location_checksum, const char* oat_location, - std::string* error_msg) + std::vector<std::string>* error_msgs) LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_); // Find a DexFile within an OatFile given a DexFile location. Note // that this returns null if the location checksum of the DexFile // does not match the OatFile. const DexFile* FindDexFileInOatFileFromDexLocation(const char* location, const uint32_t* const location_checksum, - std::string* error_msg) + std::vector<std::string>* error_msgs) LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_); diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc index 4b6d82b..315f274 100644 --- a/runtime/common_throws.cc +++ b/runtime/common_throws.cc @@ -66,6 +66,28 @@ static void ThrowException(const ThrowLocation* throw_location, const char* exce } } +static void ThrowWrappedException(const ThrowLocation* throw_location, + const char* exception_descriptor, + mirror::Class* referrer, const char* fmt, va_list* args = NULL) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + std::ostringstream msg; + if (args != NULL) { + std::string vmsg; + StringAppendV(&vmsg, fmt, *args); + msg << vmsg; + } else { + msg << fmt; + } + AddReferrerLocation(msg, referrer); + Thread* self = Thread::Current(); + if (throw_location == NULL) { + ThrowLocation computed_throw_location = self->GetCurrentLocationForThrow(); + self->ThrowNewWrappedException(computed_throw_location, exception_descriptor, msg.str().c_str()); + } else { + self->ThrowNewWrappedException(*throw_location, exception_descriptor, msg.str().c_str()); + } +} + // AbstractMethodError void ThrowAbstractMethodError(mirror::ArtMethod* method) { @@ -243,6 +265,13 @@ void ThrowIOException(const char* fmt, ...) { va_end(args); } +void ThrowWrappedIOException(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + ThrowWrappedException(NULL, "Ljava/io/IOException;", NULL, fmt, &args); + va_end(args); +} + // LinkageError void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) { diff --git a/runtime/common_throws.h b/runtime/common_throws.h index c06763e..ebedae0 100644 --- a/runtime/common_throws.h +++ b/runtime/common_throws.h @@ -126,6 +126,9 @@ void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, void ThrowIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2))) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR; +void ThrowWrappedIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2))) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR; + // LinkageError void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index fcbcac2..2b29591 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -250,6 +250,7 @@ class QuickArgumentVisitor { if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { return fpr_args_ + (fpr_index_ * kBytesPerFprSpillLocation); } + return stack_args_ + (stack_index_ * kBytesStackArgLocation); } } if (gpr_index_ < kNumQuickGprArgs) { @@ -283,6 +284,12 @@ class QuickArgumentVisitor { } void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + // This implementation doesn't support reg-spill area for hard float + // ABI targets such as x86_64 and aarch64. So, for those targets whose + // 'kQuickSoftFloatAbi' is 'false': + // (a) 'stack_args_' should point to the first method's argument + // (b) whatever the argument type it is, the 'stack_index_' should + // be moved forward along with every visiting. gpr_index_ = 0; fpr_index_ = 0; stack_index_ = 0; @@ -290,10 +297,11 @@ class QuickArgumentVisitor { cur_type_ = Primitive::kPrimNot; is_split_long_or_double_ = false; Visit(); + if (!kQuickSoftFloatAbi || kNumQuickGprArgs == 0) { + stack_index_++; + } if (kNumQuickGprArgs > 0) { gpr_index_++; - } else { - stack_index_++; } } for (uint32_t shorty_index = 1; shorty_index < shorty_len_; ++shorty_index) { @@ -307,10 +315,11 @@ class QuickArgumentVisitor { case Primitive::kPrimInt: is_split_long_or_double_ = false; Visit(); + if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) { + stack_index_++; + } if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; - } else { - stack_index_++; } break; case Primitive::kPrimFloat: @@ -325,9 +334,8 @@ class QuickArgumentVisitor { } else { if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { fpr_index_++; - } else { - stack_index_++; } + stack_index_++; } break; case Primitive::kPrimDouble: @@ -336,22 +344,23 @@ class QuickArgumentVisitor { is_split_long_or_double_ = (kBytesPerGprSpillLocation == 4) && ((gpr_index_ + 1) == kNumQuickGprArgs); Visit(); + if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) { + if (kBytesStackArgLocation == 4) { + stack_index_+= 2; + } else { + CHECK_EQ(kBytesStackArgLocation, 8U); + stack_index_++; + } + } if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; if (kBytesPerGprSpillLocation == 4) { if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; - } else { + } else if (kQuickSoftFloatAbi) { stack_index_++; } } - } else { - if (kBytesStackArgLocation == 4) { - stack_index_+= 2; - } else { - CHECK_EQ(kBytesStackArgLocation, 8U); - stack_index_++; - } } } else { is_split_long_or_double_ = (kBytesPerFprSpillLocation == 4) && @@ -362,17 +371,14 @@ class QuickArgumentVisitor { if (kBytesPerFprSpillLocation == 4) { if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { fpr_index_++; - } else { - stack_index_++; } } + } + if (kBytesStackArgLocation == 4) { + stack_index_+= 2; } else { - if (kBytesStackArgLocation == 4) { - stack_index_+= 2; - } else { - CHECK_EQ(kBytesStackArgLocation, 8U); - stack_index_++; - } + CHECK_EQ(kBytesStackArgLocation, 8U); + stack_index_++; } } break; @@ -389,59 +395,10 @@ class QuickArgumentVisitor { CHECK_EQ(kNumQuickFprArgs, 0U); return (kNumQuickGprArgs * kBytesPerGprSpillLocation) + kBytesPerGprSpillLocation /* ArtMethod* */; } else { - size_t offset = kBytesPerGprSpillLocation; // Skip Method*. - size_t gprs_seen = 0; - size_t fprs_seen = 0; - if (!is_static && (gprs_seen < kNumQuickGprArgs)) { - gprs_seen++; - offset += kBytesStackArgLocation; - } - for (uint32_t i = 1; i < shorty_len; ++i) { - switch (shorty[i]) { - case 'Z': - case 'B': - case 'C': - case 'S': - case 'I': - case 'L': - if (gprs_seen < kNumQuickGprArgs) { - gprs_seen++; - offset += kBytesStackArgLocation; - } - break; - case 'J': - if (gprs_seen < kNumQuickGprArgs) { - gprs_seen++; - offset += 2 * kBytesStackArgLocation; - if (kBytesPerGprSpillLocation == 4) { - if (gprs_seen < kNumQuickGprArgs) { - gprs_seen++; - } - } - } - break; - case 'F': - if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { - fprs_seen++; - offset += kBytesStackArgLocation; - } - break; - case 'D': - if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { - fprs_seen++; - offset += 2 * kBytesStackArgLocation; - if (kBytesPerFprSpillLocation == 4) { - if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { - fprs_seen++; - } - } - } - break; - default: - LOG(FATAL) << "Unexpected shorty character: " << shorty[i] << " in " << shorty; - } - } - return offset; + // For now, there is no reg-spill area for the targets with + // hard float ABI. So, the offset pointing to the first method's + // parameter ('this' for non-static methods) should be returned. + return kBytesPerGprSpillLocation; // Skip Method*. } } diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index 920741f..cbefa6a 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -2005,6 +2005,61 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { } } +size_t RosAlloc::ReleasePages() { + VLOG(heap) << "RosAlloc::ReleasePages()"; + DCHECK(!DoesReleaseAllPages()); + Thread* self = Thread::Current(); + size_t reclaimed_bytes = 0; + size_t i = 0; + while (true) { + MutexLock mu(self, lock_); + // Check the page map size which might have changed due to grow/shrink. + size_t pm_end = page_map_size_; + if (i >= pm_end) { + // Reached the end. + break; + } + byte pm = page_map_[i]; + switch (pm) { + case kPageMapEmpty: { + // The start of a free page run. Release pages. + FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize); + DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end()); + size_t fpr_size = fpr->ByteSize(this); + DCHECK(IsAligned<kPageSize>(fpr_size)); + byte* start = reinterpret_cast<byte*>(fpr); + if (kIsDebugBuild) { + // In the debug build, the first page of a free page run + // contains a magic number for debugging. Exclude it. + start = reinterpret_cast<byte*>(fpr) + kPageSize; + } + byte* end = reinterpret_cast<byte*>(fpr) + fpr_size; + CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0); + reclaimed_bytes += fpr_size; + size_t num_pages = fpr_size / kPageSize; + if (kIsDebugBuild) { + for (size_t j = i + 1; j < i + num_pages; ++j) { + DCHECK_EQ(page_map_[j], kPageMapEmpty); + } + } + i += num_pages; + DCHECK_LE(i, pm_end); + break; + } + case kPageMapLargeObject: // Fall through. + case kPageMapLargeObjectPart: // Fall through. + case kPageMapRun: // Fall through. + case kPageMapRunPart: // Fall through. + ++i; + break; // Skip. + default: + LOG(FATAL) << "Unreachable - page map type: " << pm; + break; + } + } + return reclaimed_bytes; +} + } // namespace allocator } // namespace gc } // namespace art diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 0b4b189..5d9d75c 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -539,6 +539,8 @@ class RosAlloc { void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg) LOCKS_EXCLUDED(lock_); + // Release empty pages. + size_t ReleasePages() LOCKS_EXCLUDED(lock_); // Returns the current footprint. size_t Footprint() LOCKS_EXCLUDED(lock_); // Returns the current capacity, maximum footprint. diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index 07951e0..a700c73 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -201,7 +201,15 @@ uint64_t GarbageCollector::GetEstimatedMeanThroughput() const { uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const { // Add 1ms to prevent possible division by 0. - return (freed_bytes_ * 1000) / (NsToMs(GetDurationNs()) + 1); + return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1); +} + +void GarbageCollector::ResetMeasurements() { + cumulative_timings_.Reset(); + pause_histogram_.Reset(); + total_time_ns_ = 0; + total_freed_objects_ = 0; + total_freed_bytes_ = 0; } } // namespace collector diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 5b7b8a2..b19ac3f 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -110,6 +110,9 @@ class GarbageCollector { return pause_histogram_; } + // Reset the cumulative timings and pause histogram. + void ResetMeasurements(); + // Returns the estimated throughput in bytes / second. uint64_t GetEstimatedMeanThroughput() const; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index daf0fb3..eb8c7b1 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -661,7 +661,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { // Dump cumulative loggers for each GC type. uint64_t total_paused_time = 0; - for (const auto& collector : garbage_collectors_) { + for (auto& collector : garbage_collectors_) { const CumulativeLogger& logger = collector->GetCumulativeTimings(); if (logger.GetTotalNs() != 0) { os << ConstDumpable<CumulativeLogger>(logger); @@ -681,6 +681,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { total_duration += total_ns; total_paused_time += total_pause_ns; } + collector->ResetMeasurements(); } uint64_t allocation_time = static_cast<uint64_t>(total_allocation_time_) * kTimeAdjust; if (total_duration != 0) { @@ -915,8 +916,16 @@ void Heap::DoPendingTransitionOrTrim() { // Transition the collector if the desired collector type is not the same as the current // collector type. TransitionCollector(desired_collector_type); - // Do a heap trim if it is needed. - Trim(); + if (!CareAboutPauseTimes()) { + // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care + // about pauses. + Runtime* runtime = Runtime::Current(); + runtime->GetThreadList()->SuspendAll(); + runtime->GetMonitorList()->DeflateMonitors(); + runtime->GetThreadList()->ResumeAll(); + // Do a heap trim if it is needed. + Trim(); + } } void Heap::Trim() { @@ -2661,6 +2670,10 @@ void Heap::RequestCollectorTransition(CollectorType desired_collector_type, uint } void Heap::RequestHeapTrim() { + // Request a heap trim only if we do not currently care about pause times. + if (CareAboutPauseTimes()) { + return; + } // GC completed and now we must decide whether to request a heap trim (advising pages back to the // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans // a space it will hold its lock and can become a cause of jank. @@ -2682,21 +2695,17 @@ void Heap::RequestHeapTrim() { // as we don't hold the lock while requesting the trim). return; } - - // Request a heap trim only if we do not currently care about pause times. - if (!CareAboutPauseTimes()) { - { - MutexLock mu(self, *heap_trim_request_lock_); - if (last_trim_time_ + kHeapTrimWait >= NanoTime()) { - // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one - // just yet. - return; - } - heap_trim_request_pending_ = true; + { + MutexLock mu(self, *heap_trim_request_lock_); + if (last_trim_time_ + kHeapTrimWait >= NanoTime()) { + // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one + // just yet. + return; } - // Notify the daemon thread which will actually do the heap trim. - SignalHeapTrimDaemon(self); + heap_trim_request_pending_ = true; } + // Notify the daemon thread which will actually do the heap trim. + SignalHeapTrimDaemon(self); } void Heap::SignalHeapTrimDaemon(Thread* self) { diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index 012267b..5c5e7f8 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -222,6 +222,7 @@ extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intpt } size_t RosAllocSpace::Trim() { + VLOG(heap) << "RosAllocSpace::Trim() "; { MutexLock mu(Thread::Current(), lock_); // Trim to release memory at the end of the space. @@ -229,10 +230,7 @@ size_t RosAllocSpace::Trim() { } // Attempt to release pages if it does not release all empty pages. if (!rosalloc_->DoesReleaseAllPages()) { - VLOG(heap) << "RosAllocSpace::Trim() "; - size_t reclaimed = 0; - InspectAllRosAlloc(DlmallocMadviseCallback, &reclaimed, false); - return reclaimed; + return rosalloc_->ReleasePages(); } return 0; } diff --git a/runtime/monitor.cc b/runtime/monitor.cc index bcaf8ec..bbc7dd0 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -205,7 +205,7 @@ void Monitor::SetObject(mirror::Object* object) { void Monitor::Lock(Thread* self) { MutexLock mu(self, monitor_lock_); while (true) { - if (owner_ == NULL) { // Unowned. + if (owner_ == nullptr) { // Unowned. owner_ = self; CHECK_EQ(lock_count_, 0); // When debugging, save the current monitor holder for future @@ -223,15 +223,15 @@ void Monitor::Lock(Thread* self) { uint64_t wait_start_ms = log_contention ? 0 : MilliTime(); mirror::ArtMethod* owners_method = locking_method_; uint32_t owners_dex_pc = locking_dex_pc_; + // Do this before releasing the lock so that we don't get deflated. + ++num_waiters_; monitor_lock_.Unlock(self); // Let go of locks in order. { ScopedThreadStateChange tsc(self, kBlocked); // Change to blocked and give up mutator_lock_. self->SetMonitorEnterObject(obj_); MutexLock mu2(self, monitor_lock_); // Reacquire monitor_lock_ without mutator_lock_ for Wait. if (owner_ != NULL) { // Did the owner_ give the lock up? - ++num_waiters_; monitor_contenders_.Wait(self); // Still contended so wait. - --num_waiters_; // Woken from contention. if (log_contention) { uint64_t wait_ms = MilliTime() - wait_start_ms; @@ -252,6 +252,7 @@ void Monitor::Lock(Thread* self) { self->SetMonitorEnterObject(nullptr); } monitor_lock_.Lock(self); // Reacquire locks in order. + --num_waiters_; } } @@ -431,6 +432,7 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns, * not order sensitive as we hold the pthread mutex. */ AppendToWaitSet(self); + ++num_waiters_; int prev_lock_count = lock_count_; lock_count_ = 0; owner_ = NULL; @@ -507,6 +509,7 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns, lock_count_ = prev_lock_count; locking_method_ = saved_method; locking_dex_pc_ = saved_dex_pc; + --num_waiters_; RemoveFromWaitSet(self); if (was_interrupted) { @@ -575,8 +578,12 @@ bool Monitor::Deflate(Thread* self, mirror::Object* obj) { // If the lock isn't an inflated monitor, then we don't need to deflate anything. if (lw.GetState() == LockWord::kFatLocked) { Monitor* monitor = lw.FatLockMonitor(); - CHECK(monitor != nullptr); + DCHECK(monitor != nullptr); MutexLock mu(self, monitor->monitor_lock_); + // Can't deflate if we have anybody waiting on the CV. + if (monitor->num_waiters_ > 0) { + return false; + } Thread* owner = monitor->owner_; if (owner != nullptr) { // Can't deflate if we are locked and have a hash code. @@ -587,17 +594,16 @@ bool Monitor::Deflate(Thread* self, mirror::Object* obj) { if (monitor->lock_count_ > LockWord::kThinLockMaxCount) { return false; } - // Can't deflate if we have anybody waiting on the CV. - if (monitor->num_waiters_ > 0) { - return false; - } // Deflate to a thin lock. - obj->SetLockWord(LockWord::FromThinLockId(owner->GetTid(), monitor->lock_count_)); + obj->SetLockWord(LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_)); + VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / " << monitor->lock_count_; } else if (monitor->HasHashCode()) { obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode())); + VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode(); } else { // No lock and no hash, just put an empty lock word inside the object. obj->SetLockWord(LockWord()); + VLOG(monitor) << "Deflated" << obj << " to empty lock word"; } // The monitor is deflated, mark the object as nullptr so that we know to delete it during the // next GC. @@ -1054,7 +1060,7 @@ uint32_t Monitor::GetOwnerThreadId() { } MonitorList::MonitorList() - : allow_new_monitors_(true), monitor_list_lock_("MonitorList lock"), + : allow_new_monitors_(true), monitor_list_lock_("MonitorList lock", kMonitorListLock), monitor_add_condition_("MonitorList disallow condition", monitor_list_lock_) { } @@ -1103,6 +1109,22 @@ void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) { } } +static mirror::Object* MonitorDeflateCallback(mirror::Object* object, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (Monitor::Deflate(reinterpret_cast<Thread*>(arg), object)) { + DCHECK_NE(object->GetLockWord().GetState(), LockWord::kFatLocked); + // If we deflated, return nullptr so that the monitor gets removed from the array. + return nullptr; + } + return object; // Monitor was not deflated. +} + +void MonitorList::DeflateMonitors() { + Thread* self = Thread::Current(); + Locks::mutator_lock_->AssertExclusiveHeld(self); + SweepMonitorList(MonitorDeflateCallback, reinterpret_cast<Thread*>(self)); +} + MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) { DCHECK(obj != NULL); diff --git a/runtime/monitor.h b/runtime/monitor.h index 55504b5..c459278 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -224,9 +224,11 @@ class MonitorList { void Add(Monitor* m); void SweepMonitorList(IsMarkedCallback* callback, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void DisallowNewMonitors(); - void AllowNewMonitors(); + LOCKS_EXCLUDED(monitor_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); + void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); + void DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); private: bool allow_new_monitors_ GUARDED_BY(monitor_list_lock_); diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index bab0604..6af16f4 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -14,8 +14,10 @@ * limitations under the License. */ -#include <unistd.h> +#include <algorithm> #include <fcntl.h> +#include <set> +#include <unistd.h> #include "base/logging.h" #include "class_linker.h" @@ -30,6 +32,7 @@ #include "mirror/string.h" #include "oat.h" #include "os.h" +#include "profiler.h" #include "runtime.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" @@ -101,6 +104,7 @@ static jlong DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceNa uint32_t dex_location_checksum; uint32_t* dex_location_checksum_pointer = &dex_location_checksum; + std::vector<std::string> error_msgs; std::string error_msg; if (!DexFile::GetChecksum(sourceName.c_str(), dex_location_checksum_pointer, &error_msg)) { dex_location_checksum_pointer = NULL; @@ -110,9 +114,8 @@ static jlong DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceNa const DexFile* dex_file; if (outputName.c_str() == nullptr) { // FindOrCreateOatFileForDexLocation can tolerate a missing dex_location_checksum - error_msg.clear(); dex_file = linker->FindDexFileInOatFileFromDexLocation(sourceName.c_str(), - dex_location_checksum_pointer, &error_msg); + dex_location_checksum_pointer, &error_msgs); } else { // FindOrCreateOatFileForDexLocation requires the dex_location_checksum if (dex_location_checksum_pointer == NULL) { @@ -122,12 +125,19 @@ static jlong DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceNa return 0; } dex_file = linker->FindOrCreateOatFileForDexLocation(sourceName.c_str(), dex_location_checksum, - outputName.c_str(), &error_msg); + outputName.c_str(), &error_msgs); } if (dex_file == nullptr) { ScopedObjectAccess soa(env); - CHECK(!error_msg.empty()); - ThrowIOException("%s", error_msg.c_str()); + CHECK(!error_msgs.empty()); + // The most important message is at the end. So set up nesting by going forward, which will + // wrap the existing exception as a cause for the following one. + auto it = error_msgs.begin(); + auto itEnd = error_msgs.end(); + for ( ; it != itEnd; ++it) { + ThrowWrappedIOException("%s", it->c_str()); + } + return 0; } return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_file)); @@ -230,13 +240,31 @@ static void CopyProfileFile(const char* oldfile, const char* newfile) { close(fd2); } +static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) { +#ifndef HAVE_ANDROID_OS + return defaultValue; +#else + char buf[PROP_VALUE_MAX]; + char* endptr; + + property_get(property, buf, ""); + double value = strtod(buf, &endptr); + + if (value == 0 && endptr == buf) { + value = defaultValue; + } else if (value < minValue || value > maxValue) { + value = defaultValue; + } + return value; +#endif +} + static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename, jstring javaPkgname, jboolean defer) { const bool kVerboseLogging = false; // Spammy logging. const bool kDebugLogging = true; // Logging useful for debugging. ScopedUtfChars filename(env, javaFilename); - if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) { LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist"; ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException")); @@ -282,7 +310,6 @@ static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring java struct stat profstat, prevstat; int e1 = stat(profile_file.c_str(), &profstat); int e2 = stat(prev_profile_file.c_str(), &prevstat); - if (e1 < 0) { // No profile file, need to run dex2oat if (kDebugLogging) { @@ -290,48 +317,47 @@ static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring java } return JNI_TRUE; } + if (e2 == 0) { // There is a previous profile file. Check if the profile has changed significantly. - // Let's use the file size as a proxy for significance. If the new profile is 10% - // different in size than the the old profile then we run dex2oat. - double newsize = profstat.st_size; - double oldsize = prevstat.st_size; - bool need_profile = false; - - double ratio = 0; // If the old file was empty and the new one not - if (oldsize > 0 && newsize > 0) { - ratio = newsize / oldsize; - } else if (oldsize == 0 && newsize > 0) { - need_profile = true; - } else if (oldsize > 0 && newsize == 0) { - // Unlikely to happen, but cover all the bases. - need_profile = true; - } - - double significant_difference = 10.0; -#ifdef HAVE_ANDROID_OS - // Switch off profiler if the dalvik.vm.profiler property has value 0. - char buf[PROP_VALUE_MAX]; - property_get("dalvik.vm.profiler.dex2oat.threshold", buf, "10.0"); - significant_difference = strtod(buf, nullptr); - - // Something reasonable? - if (significant_difference < 1.0 || significant_difference > 90.0) { - significant_difference = 10.0; - } -#endif // The percentage difference that we consider as being significant. - double diff_hwm = 1.0 + significant_difference/10.0; - double diff_lwm = 1.0 - significant_difference/10.0; - - if (ratio > diff_hwm || ratio < diff_lwm) { - need_profile = true; + // A change in profile is considered significant if X% (change_thr property) of the top K% + // (compile_thr property) samples has changed. + + double topKThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.compile_thr", 10.0, 90.0, 90.0); + double changeThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.change_thr", 1.0, 90.0, 10.0); + double changePercent = 0.0; + std::set<std::string> newTopK, oldTopK; + bool newOk = ProfileHelper::LoadTopKSamples(newTopK, profile_file, topKThreshold); + bool oldOk = ProfileHelper::LoadTopKSamples(oldTopK, prev_profile_file, topKThreshold); + if (!newOk || !oldOk) { + if (kDebugLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: " + << (newOk ? "" : profile_file) << " " << (oldOk ? "" : prev_profile_file); + } + } else if (newTopK.empty()) { + if (kDebugLogging && kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file; + } + // If the new topK is empty we shouldn't optimize so we leave the changePercent at 0.0. + } else { + std::set<std::string> diff; + std::set_difference(newTopK.begin(), newTopK.end(), oldTopK.begin(), oldTopK.end(), + std::inserter(diff, diff.end())); + // TODO: consider using the usedPercentage instead of the plain diff count. + changePercent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(newTopK.size()); + if (kDebugLogging && kVerboseLogging) { + std::set<std::string>::iterator end = diff.end(); + for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) { + LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it; + } + } } - if (need_profile) { + if (changePercent > changeThreshold) { if (kDebugLogging) { LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file << - " is significantly different from old profile file " << prev_profile_file << " (new: " << - newsize << ", old: " << oldsize << ", ratio: " << ratio << ")"; + " is significantly different from old profile file " << prev_profile_file << " (top " + << topKThreshold << "% samples changed in proportion of " << changePercent << "%)"; } if (!defer) { CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str()); diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 08a674f..bc8f51f 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -194,6 +194,7 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni profile_duration_s_ = 20; // Seconds. profile_interval_us_ = 500; // Microseconds. profile_backoff_coefficient_ = 2.0; + profile_start_immediately_ = true; profile_clock_source_ = kDefaultProfilerClockSource; verify_ = true; @@ -509,6 +510,8 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni if (!ParseDouble(option, ':', 1.0, 10.0, &profile_backoff_coefficient_)) { return false; } + } else if (option == "-Xprofile-start-lazy") { + profile_start_immediately_ = false; } else if (StartsWith(option, "-implicit-checks:")) { std::string checks; if (!ParseStringAfterChar(option, ':', &checks)) { diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h index 416bc78..126096a 100644 --- a/runtime/parsed_options.h +++ b/runtime/parsed_options.h @@ -79,6 +79,7 @@ class ParsedOptions { uint32_t profile_duration_s_; uint32_t profile_interval_us_; double profile_backoff_coefficient_; + bool profile_start_immediately_; ProfilerClockSource profile_clock_source_; bool verify_; diff --git a/runtime/profiler.cc b/runtime/profiler.cc index 4770a54..223fe87 100644 --- a/runtime/profiler.cc +++ b/runtime/profiler.cc @@ -16,6 +16,7 @@ #include "profiler.h" +#include <fstream> #include <sys/uio.h> #include <sys/file.h> @@ -579,5 +580,101 @@ void ProfileSampleResults::ReadPrevious(int fd) { previous_[methodname] = PreviousValue(count, size); } } -} // namespace art +bool ProfileHelper::LoadProfileMap(ProfileMap& profileMap, const std::string& fileName) { + LOG(VERBOSE) << "reading profile file " << fileName; + struct stat st; + int err = stat(fileName.c_str(), &st); + if (err == -1) { + LOG(VERBOSE) << "not found"; + return false; + } + if (st.st_size == 0) { + return true; // empty profiles are ok. + } + std::ifstream in(fileName.c_str()); + if (!in) { + LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened"; + LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid; + LOG(VERBOSE) << "me: " << getuid() << ":" << getgid(); + LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode; + LOG(VERBOSE) << "errno: " << errno; + return false; + } + // The first line contains summary information. + std::string line; + std::getline(in, line); + if (in.eof()) { + return false; + } + std::vector<std::string> summary_info; + Split(line, '/', summary_info); + if (summary_info.size() != 3) { + // Bad summary info. It should be count/total/bootpath. + return false; + } + // This is the number of hits in all methods. + uint32_t total_count = 0; + for (int i = 0 ; i < 3; ++i) { + total_count += atoi(summary_info[i].c_str()); + } + + // Now read each line until the end of file. Each line consists of 3 fields separated by '/'. + // Store the info in descending order given by the most used methods. + typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet; + ProfileSet countSet; + while (!in.eof()) { + std::getline(in, line); + if (in.eof()) { + break; + } + std::vector<std::string> info; + Split(line, '/', info); + if (info.size() != 3) { + // Malformed. + break; + } + int count = atoi(info[1].c_str()); + countSet.insert(std::make_pair(-count, info)); + } + + uint32_t curTotalCount = 0; + ProfileSet::iterator end = countSet.end(); + const ProfileData* prevData = nullptr; + for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) { + const std::string& methodname = it->second[0]; + uint32_t count = -it->first; + uint32_t size = atoi(it->second[2].c_str()); + double usedPercent = (count * 100.0) / total_count; + + curTotalCount += count; + // Methods with the same count should be part of the same top K percentage bucket. + double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count) + ? prevData->GetTopKUsedPercentage() + : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count); + + // Add it to the profile map. + ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage); + profileMap[methodname] = curData; + prevData = &curData; + } + return true; +} + +bool ProfileHelper::LoadTopKSamples(std::set<std::string>& topKSamples, const std::string& fileName, + double topKPercentage) { + ProfileMap profileMap; + bool loadOk = LoadProfileMap(profileMap, fileName); + if (!loadOk) { + return false; + } + ProfileMap::iterator end = profileMap.end(); + for (ProfileMap::iterator it = profileMap.begin(); it != end; it++) { + if (it->second.GetTopKUsedPercentage() < topKPercentage) { + topKSamples.insert(it->first); + } + } + return true; +} + +} // namespace art diff --git a/runtime/profiler.h b/runtime/profiler.h index b03b170..31fdc79 100644 --- a/runtime/profiler.h +++ b/runtime/profiler.h @@ -39,7 +39,6 @@ namespace mirror { } // namespace mirror class Thread; - // // This class holds all the results for all runs of the profiler. It also // counts the number of null methods (where we can't determine the method) and @@ -63,7 +62,7 @@ class ProfileSampleResults { private: uint32_t Hash(mirror::ArtMethod* method); static constexpr int kHashSize = 17; - Mutex& lock_; // Reference to the main profiler lock - we don't need two of them. + Mutex& lock_; // Reference to the main profiler lock - we don't need two of them. uint32_t num_samples_; // Total number of samples taken. uint32_t num_null_methods_; // Number of samples where can don't know the method. uint32_t num_boot_methods_; // Number of samples in the boot path. @@ -189,6 +188,54 @@ class BackgroundMethodSamplingProfiler { DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler); }; +// TODO: incorporate in ProfileSampleResults + +// Profile data. This is generated from previous runs of the program and stored +// in a file. It is used to determine whether to compile a particular method or not. +class ProfileData { + public: + ProfileData() : count_(0), method_size_(0), usedPercent_(0) {} + ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size, + double usedPercent, double topKUsedPercentage) : + method_name_(method_name), count_(count), method_size_(method_size), + usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) { + // TODO: currently method_size_ and count_ are unused. + UNUSED(method_size_); + UNUSED(count_); + } + + bool IsAbove(double v) const { return usedPercent_ >= v; } + double GetUsedPercent() const { return usedPercent_; } + uint32_t GetCount() const { return count_; } + double GetTopKUsedPercentage() const { return topKUsedPercentage_; } + + private: + std::string method_name_; // Method name. + uint32_t count_; // Number of times it has been called. + uint32_t method_size_; // Size of the method on dex instructions. + double usedPercent_; // Percentage of how many times this method was called. + double topKUsedPercentage_; // The percentage of the group that comprise K% of the total used + // methods this methods belongs to. +}; + +// Profile data is stored in a map, indexed by the full method name. +typedef std::map<std::string, ProfileData> ProfileMap; + +class ProfileHelper { + private: + ProfileHelper(); + + public: + // Read the profile data from the given file. Calculates the percentage for each method. + // Returns false if there was no profile file or it was malformed. + static bool LoadProfileMap(ProfileMap& profileMap, const std::string& fileName); + + // Read the profile data from the given file and computes the group that comprise + // topKPercentage of the total used methods. + static bool LoadTopKSamples(std::set<std::string>& topKMethods, const std::string& fileName, + double topKPercentage); +}; + } // namespace art #endif // ART_RUNTIME_PROFILER_H_ diff --git a/runtime/runtime.cc b/runtime/runtime.cc index edc3b33..a19fa53 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -121,6 +121,7 @@ Runtime::Runtime() profile_duration_s_(0), profile_interval_us_(0), profile_backoff_coefficient_(0), + profile_start_immediately_(true), method_trace_(false), method_trace_file_size_(0), instrumentation_(), @@ -391,7 +392,7 @@ bool Runtime::Start() { if (fd >= 0) { close(fd); } - StartProfiler(profile_output_filename_.c_str(), "", true); + StartProfiler(profile_output_filename_.c_str(), ""); } return true; @@ -616,6 +617,7 @@ bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) { profile_duration_s_ = options->profile_duration_s_; profile_interval_us_ = options->profile_interval_us_; profile_backoff_coefficient_ = options->profile_backoff_coefficient_; + profile_start_immediately_ = options->profile_start_immediately_; profile_ = options->profile_; profile_output_filename_ = options->profile_output_filename_; // TODO: move this to just be an Trace::Start argument @@ -1143,10 +1145,9 @@ void Runtime::RemoveMethodVerifier(verifier::MethodVerifier* verifier) { method_verifiers_.erase(it); } -void Runtime::StartProfiler(const char* appDir, const char* procName, bool startImmediately) { +void Runtime::StartProfiler(const char* appDir, const char* procName) { BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir, - procName, profile_interval_us_, - profile_backoff_coefficient_, startImmediately); + procName, profile_interval_us_, profile_backoff_coefficient_, profile_start_immediately_); } // Transaction support. diff --git a/runtime/runtime.h b/runtime/runtime.h index e94072c..462711e 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -374,7 +374,7 @@ class Runtime { const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader); void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path); - void StartProfiler(const char* appDir, const char* procName, bool startImmediately = false); + void StartProfiler(const char* appDir, const char* procName); void UpdateProfilerState(int state); // Transaction support. @@ -542,6 +542,8 @@ class Runtime { uint32_t profile_duration_s_; // Run profile for n seconds. uint32_t profile_interval_us_; // Microseconds between samples. double profile_backoff_coefficient_; // Coefficient to exponential backoff. + bool profile_start_immediately_; // Whether the profile should start upon app + // startup or be delayed by some random offset. bool method_trace_; std::string method_trace_file_; |