diff options
-rw-r--r-- | compiler/dex/quick/arm/call_arm.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/arm/utility_arm.cc | 14 | ||||
-rw-r--r-- | compiler/dex/quick/codegen_util.cc | 3 | ||||
-rw-r--r-- | compiler/dex/quick/gen_common.cc | 14 | ||||
-rw-r--r-- | compiler/dex/quick/gen_invoke.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 1 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 1 | ||||
-rw-r--r-- | runtime/arch/arm/fault_handler_arm.cc | 149 | ||||
-rw-r--r-- | runtime/entrypoints/entrypoint_utils.cc | 4 | ||||
-rw-r--r-- | runtime/fault_handler.cc | 25 | ||||
-rw-r--r-- | runtime/stack.cc | 61 | ||||
-rw-r--r-- | runtime/stack.h | 8 | ||||
-rw-r--r-- | runtime/thread.cc | 65 | ||||
-rw-r--r-- | runtime/thread.h | 21 |
14 files changed, 380 insertions, 42 deletions
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 175fc06..d6724f1 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -183,15 +183,18 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { LockCallTemps(); // Prepare for explicit register usage constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { - LIR* null_check_branch; + LIR* null_check_branch = nullptr; if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { null_check_branch = nullptr; // No null check. } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } } LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); @@ -216,8 +219,8 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { // Explicit null-check as slow-path is entered using an IT. GenNullCheck(rs_r0, opt_flags); LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); - MarkPossibleNullPointerException(opt_flags); NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); OpRegImm(kOpCmp, rs_r1, 0); OpIT(kCondEq, ""); NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); @@ -241,7 +244,7 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { FlushAllRegs(); LoadValueDirectFixed(rl_src, rs_r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - LIR* null_check_branch; + LIR* null_check_branch = nullptr; LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { @@ -249,9 +252,12 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { null_check_branch = nullptr; // No null check. } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } } LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + MarkPossibleNullPointerException(opt_flags); LoadConstantNoClobber(rs_r3, 0); LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); @@ -404,11 +410,17 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } } else { // Implicit stack overflow check. - // Generate a load from [sp, #-framesize]. If this is in the stack + // Generate a load from [sp, #-overflowsize]. If this is in the stack // redzone we will get a segmentation fault. - OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); - LoadWordDisp(rs_rARM_SP, 0, rs_rARM_LR); + // + // Caveat coder: if someone changes the kStackOverflowReservedBytes value + // we need to make sure that it's loadable in an immediate field of + // a sub instruction. Otherwise we will get a temp allocation and the + // code size will increase. + OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); + LoadWordDisp(rs_r12, 0, rs_r12); MarkPossibleStackOverflowException(); + OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); } } else { OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 1ec0a2c..8df5b25 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -828,6 +828,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag int encoded_disp = displacement; bool already_generated = false; int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg(); + bool null_pointer_safepoint = false; switch (size) { case kDouble: case kLong: @@ -848,6 +849,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag displacement >> 2); } else { load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg); + null_pointer_safepoint = true; LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG); } already_generated = true; @@ -939,6 +941,11 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag // TODO: in future may need to differentiate Dalvik accesses w/ spills if (r_base == rs_rARM_SP) { AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(load); + } } return load; } @@ -965,6 +972,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora int encoded_disp = displacement; bool already_generated = false; int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg(); + bool null_pointer_safepoint = false; switch (size) { case kLong: case kDouble: @@ -974,6 +982,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora displacement >> 2); } else { store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord); + null_pointer_safepoint = true; StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord); } already_generated = true; @@ -1061,6 +1070,11 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora // TODO: In future, may need to differentiate Dalvik & spill accesses if (r_base == rs_rARM_SP) { AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(store); + } } return store; } diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 717ad86..4c6c7a4 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -702,7 +702,8 @@ void Mir2Lir::CreateNativeGcMap() { uint32_t native_offset = it.NativePcOffset(); uint32_t dex_pc = it.DexPc(); const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); - CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc; + CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc << + ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file); native_gc_map_builder.AddEntry(native_offset, references); } } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2afa5ca..866ce5f 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -74,14 +74,19 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, T /* Perform null-check on a register. */ LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) { if (Runtime::Current()->ExplicitNullChecks()) { - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { - return NULL; - } - return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); + return GenExplicitNullCheck(m_reg, opt_flags); } return nullptr; } +/* Perform an explicit null-check on a register. */ +LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) { + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { + return NULL; + } + return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); +} + void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) { if (!Runtime::Current()->ExplicitNullChecks()) { if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { @@ -732,6 +737,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value()); rl_result = EvalLoc(rl_dest, reg_class, true); LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG); + MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { // Without context sensitive analysis, we must issue the most conservative barriers. // In this case, either a load or store may follow so we issue both barriers. diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index a0242d5..7689b51 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -781,7 +781,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, type, skip_this); if (pcrLabel) { - *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags); + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + LoadWordDisp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } } return call_state; } @@ -987,7 +997,17 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); if (pcrLabel) { - *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags); + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + LoadWordDisp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } } return call_state; } @@ -1299,7 +1319,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { LoadValueDirectFixed(rl_start, reg_start); } RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)); - GenNullCheck(reg_ptr, info->opt_flags); + GenExplicitNullCheck(reg_ptr, info->opt_flags); LIR* high_code_point_branch = rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr); // NOTE: not a safepoint @@ -1337,7 +1357,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) { LoadValueDirectFixed(rl_cmp, reg_cmp); RegStorage r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : RegStorage::InvalidReg(); - GenNullCheck(reg_this, info->opt_flags); + GenExplicitNullCheck(reg_this, info->opt_flags); info->opt_flags |= MIR_IGNORE_NULL_CHECK; // Record that we've null checked. // TUNING: check if rl_cmp.s_reg_low is already null checked LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr); diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index e81a037..cd3dadb 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -446,6 +446,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenNullCheck(rl_src[0].reg, opt_flags); rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg); + MarkPossibleNullPointerException(opt_flags); StoreValue(rl_dest, rl_result); break; diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 8614151..10f431f 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -564,6 +564,7 @@ class Mir2Lir : public Backend { void ForceImplicitNullCheck(RegStorage reg, int opt_flags); LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind); LIR* GenNullCheck(RegStorage m_reg, int opt_flags); + LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags); LIR* GenRegRegCheck(ConditionCode c_code, RegStorage reg1, RegStorage reg2, ThrowKind kind); void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_src2, LIR* taken, LIR* fall_through); diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc index c748ce9..abce838 100644 --- a/runtime/arch/arm/fault_handler_arm.cc +++ b/runtime/arch/arm/fault_handler_arm.cc @@ -18,9 +18,12 @@ #include "fault_handler.h" #include <sys/ucontext.h> #include "base/macros.h" +#include "base/hex_dump.h" #include "globals.h" #include "base/logging.h" #include "base/hex_dump.h" +#include "mirror/art_method.h" +#include "mirror/art_method-inl.h" #include "thread.h" #include "thread-inl.h" @@ -31,16 +34,38 @@ namespace art { extern "C" void art_quick_throw_null_pointer_exception(); +extern "C" void art_quick_throw_stack_overflow(void*); extern "C" void art_quick_test_suspend(); +// Get the size of a thumb2 instruction in bytes. +static uint32_t GetInstructionSize(uint8_t* pc) { + uint16_t instr = pc[0] | pc[1] << 8; + bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800); + uint32_t instr_size = is_32bit ? 4 : 2; + return instr_size; +} + void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintptr_t& return_pc) { struct ucontext *uc = (struct ucontext *)context; struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); uintptr_t* sp = reinterpret_cast<uint32_t*>(sc->arm_sp); + LOG(DEBUG) << "sp: " << sp; if (sp == nullptr) { return; } + // In the case of a stack overflow, the stack is not valid and we can't + // get the method from the top of the stack. However it's in r0. + uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address); + uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>( + reinterpret_cast<uint8_t*>(sp) - Thread::kStackOverflowReservedBytes); + if (overflow_addr == fault_addr) { + method = sc->arm_r0; + } else { + // The method is at the top of the stack. + method = sp[0]; + } + // Work out the return PC. This will be the address of the instruction // following the faulting ldr/str instruction. This is in thumb mode so // the instruction might be a 16 or 32 bit one. Also, the GC map always @@ -48,13 +73,8 @@ void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintpt // Need to work out the size of the instruction that caused the exception. uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc); - - uint16_t instr = ptr[0] | ptr[1] << 8; - bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800); - uint32_t instr_size = is_32bit ? 4 : 2; - - // The method is at the top of the stack. - method = sp[0]; + LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr); + uint32_t instr_size = GetInstructionSize(ptr); return_pc = (sc->arm_pc + instr_size) | 1; } @@ -71,9 +91,7 @@ bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) { struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc); - uint16_t instr = ptr[0] | ptr[1] << 8; - bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800); - uint32_t instr_size = is_32bit ? 4 : 2; + uint32_t instr_size = GetInstructionSize(ptr); sc->arm_lr = (sc->arm_pc + instr_size) | 1; // LR needs to point to gc map location sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception); LOG(DEBUG) << "Generating null pointer exception"; @@ -142,7 +160,116 @@ bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) { return false; } +// Stack overflow fault handler. +// +// This checks that the fault address is equal to the current stack pointer +// minus the overflow region size (16K typically). The instruction sequence +// that generates this signal is: +// +// sub r12,sp,#16384 +// ldr.w r12,[r12,#0] +// +// The second instruction will fault if r12 is inside the protected region +// on the stack. +// +// If we determine this is a stack overflow we need to move the stack pointer +// to the overflow region below the protected region. Because we now have +// a gap in the stack (skips over protected region), we need to arrange +// for the rest of the system to be unaware of the new stack arrangement +// and behave as if there is a fully valid stack. We do this by placing +// a unique address onto the stack followed by +// the size of the gap. The stack walker will detect this and skip over the +// gap. + +// NB. We also need to be careful of stack alignment as the ARM EABI specifies that +// stack must be 8 byte aligned when making any calls. + +// NB. The size of the gap is the difference between the previous frame's SP and +// the SP at which the size word is pushed. + bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) { - return false; + struct ucontext *uc = (struct ucontext *)context; + struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); + LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc; + LOG(DEBUG) << "sigcontext: " << std::hex << sc; + + uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp); + LOG(DEBUG) << "sp: " << static_cast<void*>(sp); + + uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address); + LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr; + LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) << + ", fault_addr: " << fault_addr; + uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes); + + // Check that the fault address is the value expected for a stack overflow. + if (fault_addr != overflow_addr) { + LOG(DEBUG) << "Not a stack overflow"; + return false; + } + + // We know this is a stack overflow. We need to move the sp to the overflow region + // the exists below the protected region. R9 contains the current Thread* so + // we can read the stack_end from that and subtract the size of the + // protected region. This creates a gap in the stack that needs to be marked. + Thread* self = reinterpret_cast<Thread*>(sc->arm_r9); + + uint8_t* prevsp = sp; + sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize; + LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp); + + // We need to find the previous frame. Remember that + // this has not yet been fully constructed because the SP has not been + // decremented. So we need to work out the size of the spill portion of the + // frame. This consists of something like: + // + // 0xb6a1d49c: e92d40e0 push {r5, r6, r7, lr} + // 0xb6a1d4a0: ed2d8a06 vpush.f32 {s16-s21} + // + // The first is encoded in the ArtMethod as the spill_mask, the second as the + // fp_spill_mask. A population count on each will give the number of registers + // in each mask. Each register is 4 bytes on ARM32. + + mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0); + uint32_t spill_mask = method->GetCoreSpillMask(); + uint32_t numcores = __builtin_popcount(spill_mask); + uint32_t fp_spill_mask = method->GetFpSpillMask(); + uint32_t numfps = __builtin_popcount(fp_spill_mask); + uint32_t spill_size = (numcores + numfps) * 4; + LOG(DEBUG) << "spill size: " << spill_size; + uint8_t* prevframe = prevsp + spill_size; + LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe); + + // NOTE: the ARM EABI needs an 8 byte alignment. In the case of ARM32 a pointer + // is 4 bytes so that, together with the offset to the previous frame is 8 + // bytes. On other architectures we will need to align the stack. + + // Push a marker onto the stack to tell the stack walker that there is a stack + // overflow and the stack is not contiguous. + + // First the offset from SP to the previous frame. + sp -= sizeof(uint32_t); + LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp); + *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp); + + // Now the gap marker (pointer sized). + sp -= sizeof(mirror::ArtMethod*); + *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker; + + // Now establish the stack pointer for the signal return. + sc->arm_sp = reinterpret_cast<uintptr_t>(sp); + + // Now arrange for the signal handler to return to art_quick_throw_stack_overflow. + // We need the LR to point to the GC map just after the fault instruction. + uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc); + uint32_t instr_size = GetInstructionSize(ptr); + sc->arm_lr = (sc->arm_pc + instr_size) | 1; // LR needs to point to gc map location + sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow); + + // The kernel will now return to the address in sc->arm_pc. We have arranged the + // stack pointer to be in the overflow region. Throwing the exception will perform + // a longjmp which will restore the stack pointer to the correct location for the + // exception catch. + return true; } } // namespace art diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc index 9e5f54c..c81706f 100644 --- a/runtime/entrypoints/entrypoint_utils.cc +++ b/runtime/entrypoints/entrypoint_utils.cc @@ -134,7 +134,9 @@ void ThrowStackOverflowError(Thread* self) { LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed."; CHECK(self->IsExceptionPending()); } - self->ResetDefaultStackEnd(); // Return to default stack size. + + bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks(); + self->ResetDefaultStackEnd(!explicit_overflow_check); // Return to default stack size. } JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty, diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc index f9f3e25..fcb567e 100644 --- a/runtime/fault_handler.cc +++ b/runtime/fault_handler.cc @@ -61,8 +61,11 @@ void FaultManager::Init() { void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) { bool handled = false; + LOG(DEBUG) << "Handling fault"; if (IsInGeneratedCode(context)) { + LOG(DEBUG) << "in generated code, looking for handler"; for (auto& handler : handlers_) { + LOG(DEBUG) << "invoking Action on handler " << handler; handled = handler->Action(sig, info, context); if (handled) { return; @@ -71,7 +74,7 @@ void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) { } if (!handled) { - LOG(INFO)<< "Caught unknown SIGSEGV in ART fault handler"; + LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler"; oldaction_.sa_sigaction(sig, info, context); } } @@ -96,19 +99,23 @@ void FaultManager::RemoveHandler(FaultHandler* handler) { bool FaultManager::IsInGeneratedCode(void *context) { // We can only be running Java code in the current thread if it // is in Runnable state. + LOG(DEBUG) << "Checking for generated code"; Thread* thread = Thread::Current(); if (thread == nullptr) { + LOG(DEBUG) << "no current thread"; return false; } ThreadState state = thread->GetState(); if (state != kRunnable) { + LOG(DEBUG) << "not runnable"; return false; } // Current thread is runnable. // Make sure it has the mutator lock. if (!Locks::mutator_lock_->IsSharedHeld(thread)) { + LOG(DEBUG) << "no lock"; return false; } @@ -120,7 +127,9 @@ bool FaultManager::IsInGeneratedCode(void *context) { GetMethodAndReturnPC(context, /*out*/potential_method, /*out*/return_pc); // If we don't have a potential method, we're outta here. + LOG(DEBUG) << "potential method: " << potential_method; if (potential_method == 0) { + LOG(DEBUG) << "no method"; return false; } @@ -133,19 +142,23 @@ bool FaultManager::IsInGeneratedCode(void *context) { // Check that the class pointer inside the object is not null and is aligned. mirror::Class* cls = method_obj->GetClass<kVerifyNone>(); if (cls == nullptr) { + LOG(DEBUG) << "not a class"; return false; } if (!IsAligned<kObjectAlignment>(cls)) { + LOG(DEBUG) << "not aligned"; return false; } if (!VerifyClassClass(cls)) { + LOG(DEBUG) << "not a class class"; return false; } // Now make sure the class is a mirror::ArtMethod. if (!cls->IsArtMethodClass()) { + LOG(DEBUG) << "not a method"; return false; } @@ -153,7 +166,15 @@ bool FaultManager::IsInGeneratedCode(void *context) { // at the return PC address. mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(potential_method); - return method->ToDexPc(return_pc, false) != DexFile::kDexNoIndex; + if (true || kIsDebugBuild) { + LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc; + const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method); + uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code); + LOG(DEBUG) << "pc offset: " << std::hex << sought_offset; + } + uint32_t dexpc = method->ToDexPc(return_pc, false); + LOG(DEBUG) << "dexpc: " << dexpc; + return dexpc != DexFile::kDexNoIndex; } // diff --git a/runtime/stack.cc b/runtime/stack.cc index c33d1ab..ab3bd85 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -16,6 +16,7 @@ #include "stack.h" +#include "base/hex_dump.h" #include "mirror/art_method-inl.h" #include "mirror/class-inl.h" #include "mirror/object.h" @@ -23,6 +24,7 @@ #include "mirror/object_array-inl.h" #include "object_utils.h" #include "runtime.h" +#include "thread.h" #include "thread_list.h" #include "throw_location.h" #include "verify_object-inl.h" @@ -30,6 +32,14 @@ namespace art { +// Define a piece of memory, the address of which can be used as a marker +// for the gap in the stack added during stack overflow handling. +static uint32_t stack_overflow_object; + +// The stack overflow gap marker is simply a valid unique address. +void* stack_overflow_gap_marker = &stack_overflow_object; + + mirror::Object* ShadowFrame::GetThisObject() const { mirror::ArtMethod* m = GetMethod(); if (m->IsStatic()) { @@ -294,20 +304,56 @@ void StackVisitor::WalkStack(bool include_transitions) { CHECK_EQ(cur_depth_, 0U); bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled(); uint32_t instrumentation_stack_depth = 0; + + bool kDebugStackWalk = false; + bool kDebugStackWalkVeryVerbose = false; // The name says it all. + + if (kDebugStackWalk) { + LOG(INFO) << "walking stack"; + } for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL; current_fragment = current_fragment->GetLink()) { cur_shadow_frame_ = current_fragment->GetTopShadowFrame(); cur_quick_frame_ = current_fragment->GetTopQuickFrame(); cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc(); + if (kDebugStackWalkVeryVerbose) { + LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_; + LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_; + } + if (cur_quick_frame_ != NULL) { // Handle quick stack frames. // Can't be both a shadow and a quick fragment. DCHECK(current_fragment->GetTopShadowFrame() == NULL); mirror::ArtMethod* method = *cur_quick_frame_; while (method != NULL) { - SanityCheckFrame(); - bool should_continue = VisitFrame(); - if (UNLIKELY(!should_continue)) { - return; + // Check for a stack overflow gap marker. + if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) { + // Marker for a stack overflow. This is followed by the offset from the + // current SP to the next frame. There is a gap in the stack here. Jump + // the gap silently. + // Caveat coder: the layout of the overflow marker depends on the architecture. + // The first element is address sized (8 bytes on a 64 bit machine). The second + // element is 32 bits. So be careful with those address calculations. + + // Get the address of the offset, just beyond the marker pointer. + byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t); + uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr); + CHECK_GT(gap, Thread::kStackOverflowProtectedSize); + mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>( + reinterpret_cast<byte*>(gapsizeaddr) + gap); + if (kDebugStackWalk) { + LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " << + next_frame; + } + cur_quick_frame_ = next_frame; + method = *next_frame; + CHECK(method != nullptr); + } else { + SanityCheckFrame(); + bool should_continue = VisitFrame(); + if (UNLIKELY(!should_continue)) { + return; + } } if (context_ != NULL) { context_->FillCalleeSaves(*this); @@ -317,6 +363,9 @@ void StackVisitor::WalkStack(bool include_transitions) { size_t return_pc_offset = method->GetReturnPcOffsetInBytes(); byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset; uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr); + if (kDebugStackWalkVeryVerbose) { + LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc; + } if (UNLIKELY(exit_stubs_installed)) { // While profiling, the return pc is restored from the side stack, except when walking // the stack for an exception where the side stack will be unwound in VisitFrame. @@ -349,6 +398,10 @@ void StackVisitor::WalkStack(bool include_transitions) { cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame); cur_depth_++; method = *cur_quick_frame_; + if (kDebugStackWalkVeryVerbose) { + LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_; + LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_; + } } } else if (cur_shadow_frame_ != NULL) { do { diff --git a/runtime/stack.h b/runtime/stack.h index 4ee5de1..ab903d6 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -100,6 +100,14 @@ enum VRegBaseRegNum : int { kVRegNonSpecialTempBaseReg = -3, }; +// Special object used to mark the gap in the stack placed when a stack +// overflow fault occurs during implicit stack checking. This is not +// a real object - it is used simply as a valid address to which a +// mirror::ArtMethod* can be compared during a stack walk. It is inserted +// into the stack during the stack overflow signal handling to mark the gap +// in which the memory is protected against read and write. +extern void* stack_overflow_gap_marker; + // A reference from the shadow stack to a MirrorType object within the Java heap. template<class MirrorType> class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> { diff --git a/runtime/thread.cc b/runtime/thread.cc index 38e4204..3692b9f 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -215,10 +215,16 @@ static size_t FixStackSize(size_t stack_size) { stack_size = PTHREAD_STACK_MIN; } - // It's likely that callers are trying to ensure they have at least a certain amount of - // stack space, so we should add our reserved space on top of what they requested, rather - // than implicitly take it away from them. - stack_size += Thread::kStackOverflowReservedBytes; + if (Runtime::Current()->ExplicitStackOverflowChecks()) { + // It's likely that callers are trying to ensure they have at least a certain amount of + // stack space, so we should add our reserved space on top of what they requested, rather + // than implicitly take it away from them. + stack_size += Thread::kStackOverflowReservedBytes; + } else { + // If we are going to use implicit stack checks, allocate space for the protected + // region at the bottom of the stack. + stack_size += Thread::kStackOverflowImplicitCheckSize; + } // Some systems require the stack size to be a multiple of the system page size, so round up. stack_size = RoundUp(stack_size, kPageSize); @@ -226,6 +232,39 @@ static size_t FixStackSize(size_t stack_size) { return stack_size; } +// Install a protected region in the stack. This is used to trigger a SIGSEGV if a stack +// overflow is detected. It is located right below the stack_end_. Just below that +// is the StackOverflow reserved region used when creating the StackOverflow +// exception. +void Thread::InstallImplicitProtection(bool is_main_stack) { + byte* pregion = stack_end_; + + constexpr uint32_t kMarker = 0xdadadada; + uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion); + if (*marker == kMarker) { + // The region has already been set up. + return; + } + // Add marker so that we can detect a second attempt to do this. + *marker = kMarker; + + pregion -= kStackOverflowProtectedSize; + + // Touch the pages in the region to map them in. Otherwise mprotect fails. Only + // need to do this on the main stack. + if (is_main_stack) { + memset(pregion, 0x55, kStackOverflowProtectedSize); + } + VLOG(threads) << "installing stack protected region at " << std::hex << + static_cast<void*>(pregion) << " to " << + static_cast<void*>(pregion + kStackOverflowProtectedSize - 1); + + if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) { + LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:" + << strerror(errno); + } +} + void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) { CHECK(java_peer != nullptr); Thread* self = static_cast<JNIEnvExt*>(env)->self; @@ -472,7 +511,22 @@ void Thread::InitStackHwm() { #endif // Set stack_end_ to the bottom of the stack saving space of stack overflows - ResetDefaultStackEnd(); + bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks(); + ResetDefaultStackEnd(implicit_stack_check); + + // Install the protected region if we are doing implicit overflow checks. + if (implicit_stack_check) { + if (is_main_thread) { + // The main thread has a 16K protected region at the bottom. We need + // to install our own region so we need to move the limits + // of the stack to make room for it. + constexpr uint32_t kDelta = 16 * KB; + stack_begin_ += kDelta; + stack_end_ += kDelta; + stack_size_ -= kDelta; + } + InstallImplicitProtection(is_main_thread); + } // Sanity check. int stack_variable; @@ -967,6 +1021,7 @@ Thread::Thread(bool daemon) pthread_self_(0), no_thread_suspension_(0), last_no_thread_suspension_cause_(nullptr), + suspend_trigger_(reinterpret_cast<uintptr_t*>(&suspend_trigger_)), thread_exit_check_count_(0), thread_local_start_(nullptr), thread_local_pos_(nullptr), diff --git a/runtime/thread.h b/runtime/thread.h index 32875e6..63d22c5 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -112,6 +112,14 @@ class PACKED(4) Thread { static constexpr size_t kStackOverflowReservedUsableBytes = kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes; + // For implicit overflow checks we reserve an extra piece of memory at the bottom + // of the stack (lowest memory). The higher portion of the memory + // is protected against reads and the lower is available for use while + // throwing the StackOverflow exception. + static constexpr size_t kStackOverflowProtectedSize = 32 * KB; + static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize + + kStackOverflowReservedBytes; + // Creates a new native thread corresponding to the given managed peer. // Used to implement Thread.start. static void CreateNativeThread(JNIEnv* env, jobject peer, size_t stack_size, bool daemon); @@ -461,12 +469,21 @@ class PACKED(4) Thread { void SetStackEndForStackOverflow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Set the stack end to that to be used during regular execution - void ResetDefaultStackEnd() { + void ResetDefaultStackEnd(bool implicit_overflow_check) { // Our stacks grow down, so we want stack_end_ to be near there, but reserving enough room // to throw a StackOverflowError. - stack_end_ = stack_begin_ + kStackOverflowReservedBytes; + if (implicit_overflow_check) { + // For implicit checks we also need to add in the protected region above the + // overflow region. + stack_end_ = stack_begin_ + kStackOverflowImplicitCheckSize; + } else { + stack_end_ = stack_begin_ + kStackOverflowReservedBytes; + } } + // Install the protected region for implicit stack checks. + void InstallImplicitProtection(bool is_main_stack); + bool IsHandlingStackOverflow() const { return stack_end_ == stack_begin_; } |