Implement implicit stack overflow checks

This also fixes some failing run tests due to missing null pointer markers. The implementation of the implicit stack overflow checks introduces the ability to have a gap in the stack that is skipped during stack walk backs. This gap is protected against read/write and is used to trigger a SIGSEGV at function entry if the stack will overflow. Change-Id: I0c3e214c8b87dc250cf886472c6d327b5d58653e
author: Dave Allison <dallison@google.com> 2014-03-27 15:10:22 -0700
committer: Dave Allison <dallison@google.com> 2014-03-31 18:04:08 -0700
commit: f943914730db8ad2ff03d49a2cacd31885d08fd7 (patch)
tree: 885a781e5f8bd852e2c1615108ae7b17576a6567
parent: cfd5acf281b0c509f86b13d73c6a8dfa3ea9922c (diff)
download: art-f943914730db8ad2ff03d49a2cacd31885d08fd7.zip
art-f943914730db8ad2ff03d49a2cacd31885d08fd7.tar.gz
art-f943914730db8ad2ff03d49a2cacd31885d08fd7.tar.bz2
14 files changed, 380 insertions, 42 deletions
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 175fc06..d6724f1 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -183,15 +183,18 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
-    LIR* null_check_branch;
+    LIR* null_check_branch = nullptr;
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
     NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
@@ -216,8 +219,8 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
-    MarkPossibleNullPointerException(opt_flags);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
     OpIT(kCondEq, "");
     NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
@@ -241,7 +244,7 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch;
+  LIR* null_check_branch = nullptr;
   LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
@@ -249,9 +252,12 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
     StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
@@ -404,11 +410,17 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       }
     } else {
       // Implicit stack overflow check.
-      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-      LoadWordDisp(rs_rARM_SP, 0, rs_rARM_LR);
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      LoadWordDisp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
+      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 1ec0a2c..8df5b25 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -828,6 +828,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
   int encoded_disp = displacement;
   bool already_generated = false;
   int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
     case kLong:
@@ -848,6 +849,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
                          displacement >> 2);
         } else {
           load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg);
+          null_pointer_safepoint = true;
           LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG);
         }
         already_generated = true;
@@ -939,6 +941,11 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  } else {
+     // We might need to generate a safepoint if we have two store instructions (wide or double).
+     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+       MarkSafepointPC(load);
+     }
   }
   return load;
 }
@@ -965,6 +972,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
   int encoded_disp = displacement;
   bool already_generated = false;
   int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kLong:
     case kDouble:
@@ -974,6 +982,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
                           displacement >> 2);
         } else {
           store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord);
+          null_pointer_safepoint = true;
           StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord);
         }
         already_generated = true;
@@ -1061,6 +1070,11 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  } else {
+    // We might need to generate a safepoint if we have two store instructions (wide or double).
+    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+      MarkSafepointPC(store);
+    }
   }
   return store;
 }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 717ad86..4c6c7a4 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -702,7 +702,8 @@ void Mir2Lir::CreateNativeGcMap() {
     uint32_t native_offset = it.NativePcOffset();
     uint32_t dex_pc = it.DexPc();
     const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
+    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc <<
+        ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
     native_gc_map_builder.AddEntry(native_offset, references);
   }
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2afa5ca..866ce5f 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -74,14 +74,19 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, T
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
   if (Runtime::Current()->ExplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return NULL;
-    }
-    return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+    return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
 }
 
+/* Perform an explicit null-check on a register.  */
+LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return NULL;
+  }
+  return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+}
+
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
   if (!Runtime::Current()->ExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
@@ -732,6 +737,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
         OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
         rl_result = EvalLoc(rl_dest, reg_class, true);
         LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+        MarkPossibleNullPointerException(opt_flags);
         if (field_info.IsVolatile()) {
           // Without context sensitive analysis, we must issue the most conservative barriers.
           // In this case, either a load or store may follow so we issue both barriers.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a0242d5..7689b51 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -781,7 +781,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
                            type, skip_this);
 
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -987,7 +997,17 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -1299,7 +1319,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
     LoadValueDirectFixed(rl_start, reg_start);
   }
   RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf));
-  GenNullCheck(reg_ptr, info->opt_flags);
+  GenExplicitNullCheck(reg_ptr, info->opt_flags);
   LIR* high_code_point_branch =
       rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr);
   // NOTE: not a safepoint
@@ -1337,7 +1357,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
   LoadValueDirectFixed(rl_cmp, reg_cmp);
   RegStorage r_tgt = (cu_->instruction_set != kX86) ?
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : RegStorage::InvalidReg();
-  GenNullCheck(reg_this, info->opt_flags);
+  GenExplicitNullCheck(reg_this, info->opt_flags);
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
   // TUNING: check if rl_cmp.s_reg_low is already null checked
   LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e81a037..cd3dadb 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -446,6 +446,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg);
+      MarkPossibleNullPointerException(opt_flags);
       StoreValue(rl_dest, rl_result);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8614151..10f431f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -564,6 +564,7 @@ class Mir2Lir : public Backend {
     void ForceImplicitNullCheck(RegStorage reg, int opt_flags);
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
+    LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenRegRegCheck(ConditionCode c_code, RegStorage reg1, RegStorage reg2, ThrowKind kind);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index c748ce9..abce838 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -18,9 +18,12 @@
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "base/macros.h"
+#include "base/hex_dump.h"
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "thread.h"
 #include "thread-inl.h"
 
@@ -31,16 +34,38 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
 extern "C" void art_quick_test_suspend();
 
+// Get the size of a thumb2 instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint16_t instr = pc[0] | pc[1] << 8;
+  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
+  uint32_t instr_size = is_32bit ? 4 : 2;
+  return instr_size;
+}
+
 void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintptr_t& return_pc) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uintptr_t* sp = reinterpret_cast<uint32_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << sp;
   if (sp == nullptr) {
     return;
   }
 
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in r0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(sp) - Thread::kStackOverflowReservedBytes);
+  if (overflow_addr == fault_addr) {
+    method = sc->arm_r0;
+  } else {
+    // The method is at the top of the stack.
+    method = sp[0];
+  }
+
   // Work out the return PC.  This will be the address of the instruction
   // following the faulting ldr/str instruction.  This is in thumb mode so
   // the instruction might be a 16 or 32 bit one.  Also, the GC map always
@@ -48,13 +73,8 @@ void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintpt
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
-
-  // The method is at the top of the stack.
-  method = sp[0];
+  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  uint32_t instr_size = GetInstructionSize(ptr);
 
   return_pc = (sc->arm_pc + instr_size) | 1;
 }
@@ -71,9 +91,7 @@ bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
 
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
+  uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   LOG(DEBUG) << "Generating null pointer exception";
@@ -142,7 +160,116 @@ bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
   return false;
 }
 
+// Stack overflow fault handler.
+//
+// This checks that the fault address is equal to the current stack pointer
+// minus the overflow region size (16K typically).  The instruction sequence
+// that generates this signal is:
+//
+// sub r12,sp,#16384
+// ldr.w r12,[r12,#0]
+//
+// The second instruction will fault if r12 is inside the protected region
+// on the stack.
+//
+// If we determine this is a stack overflow we need to move the stack pointer
+// to the overflow region below the protected region.  Because we now have
+// a gap in the stack (skips over protected region), we need to arrange
+// for the rest of the system to be unaware of the new stack arrangement
+// and behave as if there is a fully valid stack.  We do this by placing
+// a unique address onto the stack followed by
+// the size of the gap.  The stack walker will detect this and skip over the
+// gap.
+
+// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
+// stack must be 8 byte aligned when making any calls.
+
+// NB. The size of the gap is the difference between the previous frame's SP and
+// the SP at which the size word is pushed.
+
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = (struct ucontext *)context;
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
+  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+
+  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
+  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+    ", fault_addr: " << fault_addr;
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    LOG(DEBUG) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // the exists below the protected region.  R9 contains the current Thread* so
+  // we can read the stack_end from that and subtract the size of the
+  // protected region.  This creates a gap in the stack that needs to be marked.
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+
+  uint8_t* prevsp = sp;
+  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
+  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
+
+  // We need to find the previous frame.  Remember that
+  // this has not yet been fully constructed because the SP has not been
+  // decremented.  So we need to work out the size of the spill portion of the
+  // frame.  This consists of something like:
+  //
+  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
+  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
+  //
+  // The first is encoded in the ArtMethod as the spill_mask, the second as the
+  // fp_spill_mask.  A population count on each will give the number of registers
+  // in each mask.  Each register is 4 bytes on ARM32.
+
+  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
+  uint32_t spill_mask = method->GetCoreSpillMask();
+  uint32_t numcores = __builtin_popcount(spill_mask);
+  uint32_t fp_spill_mask = method->GetFpSpillMask();
+  uint32_t numfps = __builtin_popcount(fp_spill_mask);
+  uint32_t spill_size = (numcores + numfps) * 4;
+  LOG(DEBUG) << "spill size: " << spill_size;
+  uint8_t* prevframe = prevsp + spill_size;
+  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
+
+  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
+  // is 4 bytes so that, together with the offset to the previous frame is 8
+  // bytes.  On other architectures we will need to align the stack.
+
+  // Push a marker onto the stack to tell the stack walker that there is a stack
+  // overflow and the stack is not contiguous.
+
+  // First the offset from SP to the previous frame.
+  sp -= sizeof(uint32_t);
+  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
+  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
+
+  // Now the gap marker (pointer sized).
+  sp -= sizeof(mirror::ArtMethod*);
+  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+
+  // Now establish the stack pointer for the signal return.
+  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
+  // We need the LR to point to the GC map just after the fault instruction.
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
+  uint32_t instr_size = GetInstructionSize(ptr);
+  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+
+  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
+  // stack pointer to be in the overflow region.  Throwing the exception will perform
+  // a longjmp which will restore the stack pointer to the correct location for the
+  // exception catch.
+  return true;
 }
 }       // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 9e5f54c..c81706f 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -134,7 +134,9 @@ void ThrowStackOverflowError(Thread* self) {
     LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed.";
     CHECK(self->IsExceptionPending());
   }
-  self->ResetDefaultStackEnd();  // Return to default stack size.
+
+  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
+  self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
 JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index f9f3e25..fcb567e 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -61,8 +61,11 @@ void FaultManager::Init() {
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   bool handled = false;
+  LOG(DEBUG) << "Handling fault";
   if (IsInGeneratedCode(context)) {
+    LOG(DEBUG) << "in generated code, looking for handler";
     for (auto& handler : handlers_) {
+      LOG(DEBUG) << "invoking Action on handler " << handler;
       handled = handler->Action(sig, info, context);
       if (handled) {
         return;
@@ -71,7 +74,7 @@ void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   }
 
   if (!handled) {
-    LOG(INFO)<< "Caught unknown SIGSEGV in ART fault handler";
+    LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
     oldaction_.sa_sigaction(sig, info, context);
   }
 }
@@ -96,19 +99,23 @@ void FaultManager::RemoveHandler(FaultHandler* handler) {
 bool FaultManager::IsInGeneratedCode(void *context) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
+  LOG(DEBUG) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
+    LOG(DEBUG) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
+    LOG(DEBUG) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
+    LOG(DEBUG) << "no lock";
     return false;
   }
 
@@ -120,7 +127,9 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   GetMethodAndReturnPC(context, /*out*/potential_method, /*out*/return_pc);
 
   // If we don't have a potential method, we're outta here.
+  LOG(DEBUG) << "potential method: " << potential_method;
   if (potential_method == 0) {
+    LOG(DEBUG) << "no method";
     return false;
   }
 
@@ -133,19 +142,23 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   // Check that the class pointer inside the object is not null and is aligned.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
+    LOG(DEBUG) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
+    LOG(DEBUG) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
+    LOG(DEBUG) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
+    LOG(DEBUG) << "not a method";
     return false;
   }
 
@@ -153,7 +166,15 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   // at the return PC address.
   mirror::ArtMethod* method =
       reinterpret_cast<mirror::ArtMethod*>(potential_method);
-  return method->ToDexPc(return_pc, false) != DexFile::kDexNoIndex;
+  if (true || kIsDebugBuild) {
+    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method);
+    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
+    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+  }
+  uint32_t dexpc = method->ToDexPc(return_pc, false);
+  LOG(DEBUG) << "dexpc: " << dexpc;
+  return dexpc != DexFile::kDexNoIndex;
 }
 
 //
diff --git a/runtime/stack.cc b/runtime/stack.cc
index c33d1ab..ab3bd85 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -16,6 +16,7 @@
 
 #include "stack.h"
 
+#include "base/hex_dump.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -23,6 +24,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
+#include "thread.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "verify_object-inl.h"
@@ -30,6 +32,14 @@
 
 namespace art {
 
+// Define a piece of memory, the address of which can be used as a marker
+// for the gap in the stack added during stack overflow handling.
+static uint32_t stack_overflow_object;
+
+// The stack overflow gap marker is simply a valid unique address.
+void* stack_overflow_gap_marker = &stack_overflow_object;
+
+
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -294,20 +304,56 @@ void StackVisitor::WalkStack(bool include_transitions) {
   CHECK_EQ(cur_depth_, 0U);
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
+
+  bool kDebugStackWalk = false;
+  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
+
+  if (kDebugStackWalk) {
+    LOG(INFO) << "walking stack";
+  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
+    if (kDebugStackWalkVeryVerbose) {
+      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
+      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
+    }
+
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        SanityCheckFrame();
-        bool should_continue = VisitFrame();
-        if (UNLIKELY(!should_continue)) {
-          return;
+        // Check for a stack overflow gap marker.
+        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
+          // Marker for a stack overflow.  This is followed by the offset from the
+          // current SP to the next frame.  There is a gap in the stack here.  Jump
+          // the gap silently.
+          // Caveat coder: the layout of the overflow marker depends on the architecture.
+          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
+          //   element is 32 bits.  So be careful with those address calculations.
+
+          // Get the address of the offset, just beyond the marker pointer.
+          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
+          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
+          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
+          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
+            reinterpret_cast<byte*>(gapsizeaddr) + gap);
+          if (kDebugStackWalk) {
+            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
+                next_frame;
+          }
+          cur_quick_frame_ = next_frame;
+          method = *next_frame;
+          CHECK(method != nullptr);
+        } else {
+          SanityCheckFrame();
+          bool should_continue = VisitFrame();
+          if (UNLIKELY(!should_continue)) {
+            return;
+          }
         }
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
@@ -317,6 +363,9 @@ void StackVisitor::WalkStack(bool include_transitions) {
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
+        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -349,6 +398,10 @@ void StackVisitor::WalkStack(bool include_transitions) {
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
+          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
+        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 4ee5de1..ab903d6 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -100,6 +100,14 @@ enum VRegBaseRegNum : int {
   kVRegNonSpecialTempBaseReg = -3,
 };
 
+// Special object used to mark the gap in the stack placed when a stack
+// overflow fault occurs during implicit stack checking.  This is not
+// a real object - it is used simply as a valid address to which a
+// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
+// into the stack during the stack overflow signal handling to mark the gap
+// in which the memory is protected against read and write.
+extern void* stack_overflow_gap_marker;
+
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 38e4204..3692b9f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -215,10 +215,16 @@ static size_t FixStackSize(size_t stack_size) {
     stack_size = PTHREAD_STACK_MIN;
   }
 
-  // It's likely that callers are trying to ensure they have at least a certain amount of
-  // stack space, so we should add our reserved space on top of what they requested, rather
-  // than implicitly take it away from them.
-  stack_size += Thread::kStackOverflowReservedBytes;
+  if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // It's likely that callers are trying to ensure they have at least a certain amount of
+    // stack space, so we should add our reserved space on top of what they requested, rather
+    // than implicitly take it away from them.
+    stack_size += Thread::kStackOverflowReservedBytes;
+  } else {
+    // If we are going to use implicit stack checks, allocate space for the protected
+    // region at the bottom of the stack.
+    stack_size += Thread::kStackOverflowImplicitCheckSize;
+  }
 
   // Some systems require the stack size to be a multiple of the system page size, so round up.
   stack_size = RoundUp(stack_size, kPageSize);
@@ -226,6 +232,39 @@ static size_t FixStackSize(size_t stack_size) {
   return stack_size;
 }
 
+// Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
+// overflow is detected.  It is located right below the stack_end_.  Just below that
+// is the StackOverflow reserved region used when creating the StackOverflow
+// exception.
+void Thread::InstallImplicitProtection(bool is_main_stack) {
+  byte* pregion = stack_end_;
+
+  constexpr uint32_t kMarker = 0xdadadada;
+  uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
+  if (*marker == kMarker) {
+    // The region has already been set up.
+    return;
+  }
+  // Add marker so that we can detect a second attempt to do this.
+  *marker = kMarker;
+
+  pregion -= kStackOverflowProtectedSize;
+
+  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
+  // need to do this on the main stack.
+  if (is_main_stack) {
+    memset(pregion, 0x55, kStackOverflowProtectedSize);
+  }
+  VLOG(threads) << "installing stack protected region at " << std::hex <<
+      static_cast<void*>(pregion) << " to " <<
+      static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
+
+  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
+    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
+        << strerror(errno);
+  }
+}
+
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
   CHECK(java_peer != nullptr);
   Thread* self = static_cast<JNIEnvExt*>(env)->self;
@@ -472,7 +511,22 @@ void Thread::InitStackHwm() {
 #endif
 
   // Set stack_end_ to the bottom of the stack saving space of stack overflows
-  ResetDefaultStackEnd();
+  bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  ResetDefaultStackEnd(implicit_stack_check);
+
+  // Install the protected region if we are doing implicit overflow checks.
+  if (implicit_stack_check) {
+    if (is_main_thread) {
+      // The main thread has a 16K protected region at the bottom.  We need
+      // to install our own region so we need to move the limits
+      // of the stack to make room for it.
+      constexpr uint32_t kDelta = 16 * KB;
+      stack_begin_ += kDelta;
+      stack_end_ += kDelta;
+      stack_size_ -= kDelta;
+    }
+    InstallImplicitProtection(is_main_thread);
+  }
 
   // Sanity check.
   int stack_variable;
@@ -967,6 +1021,7 @@ Thread::Thread(bool daemon)
       pthread_self_(0),
       no_thread_suspension_(0),
       last_no_thread_suspension_cause_(nullptr),
+      suspend_trigger_(reinterpret_cast<uintptr_t*>(&suspend_trigger_)),
       thread_exit_check_count_(0),
       thread_local_start_(nullptr),
       thread_local_pos_(nullptr),
diff --git a/runtime/thread.h b/runtime/thread.h
index 32875e6..63d22c5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -112,6 +112,14 @@ class PACKED(4) Thread {
   static constexpr size_t kStackOverflowReservedUsableBytes =
       kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
+  // For implicit overflow checks we reserve an extra piece of memory at the bottom
+  // of the stack (lowest memory).  The higher portion of the memory
+  // is protected against reads and the lower is available for use while
+  // throwing the StackOverflow exception.
+  static constexpr size_t kStackOverflowProtectedSize = 32 * KB;
+  static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    kStackOverflowReservedBytes;
+
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
   static void CreateNativeThread(JNIEnv* env, jobject peer, size_t stack_size, bool daemon);
@@ -461,12 +469,21 @@ class PACKED(4) Thread {
   void SetStackEndForStackOverflow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Set the stack end to that to be used during regular execution
-  void ResetDefaultStackEnd() {
+  void ResetDefaultStackEnd(bool implicit_overflow_check) {
     // Our stacks grow down, so we want stack_end_ to be near there, but reserving enough room
     // to throw a StackOverflowError.
-    stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    if (implicit_overflow_check) {
+      // For implicit checks we also need to add in the protected region above the
+      // overflow region.
+      stack_end_ = stack_begin_ + kStackOverflowImplicitCheckSize;
+    } else {
+      stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    }
   }
 
+  // Install the protected region for implicit stack checks.
+  void InstallImplicitProtection(bool is_main_stack);
+
   bool IsHandlingStackOverflow() const {
     return stack_end_ == stack_begin_;
   }
author	Dave Allison <dallison@google.com>	2014-03-27 15:10:22 -0700
committer	Dave Allison <dallison@google.com>	2014-03-31 18:04:08 -0700
commit	f943914730db8ad2ff03d49a2cacd31885d08fd7 (patch)
tree	885a781e5f8bd852e2c1615108ae7b17576a6567
parent	cfd5acf281b0c509f86b13d73c6a8dfa3ea9922c (diff)
download	art-f943914730db8ad2ff03d49a2cacd31885d08fd7.zip art-f943914730db8ad2ff03d49a2cacd31885d08fd7.tar.gz art-f943914730db8ad2ff03d49a2cacd31885d08fd7.tar.bz2