14 files changed, 380 insertions, 42 deletions
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 175fc06..d6724f1 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -183,15 +183,18 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
-    LIR* null_check_branch;
+    LIR* null_check_branch = nullptr;
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
     NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
@@ -216,8 +219,8 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
-    MarkPossibleNullPointerException(opt_flags);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
     OpIT(kCondEq, "");
     NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
@@ -241,7 +244,7 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch;
+  LIR* null_check_branch = nullptr;
   LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
@@ -249,9 +252,12 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
     StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
@@ -404,11 +410,17 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       }
     } else {
       // Implicit stack overflow check.
-      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-      LoadWordDisp(rs_rARM_SP, 0, rs_rARM_LR);
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      LoadWordDisp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
+      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 1ec0a2c..8df5b25 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -828,6 +828,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
   int encoded_disp = displacement;
   bool already_generated = false;
   int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
     case kLong:
@@ -848,6 +849,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
                          displacement >> 2);
         } else {
           load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg);
+          null_pointer_safepoint = true;
           LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG);
         }
         already_generated = true;
@@ -939,6 +941,11 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  } else {
+     // We might need to generate a safepoint if we have two store instructions (wide or double).
+     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+       MarkSafepointPC(load);
+     }
   }
   return load;
 }
@@ -965,6 +972,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
   int encoded_disp = displacement;
   bool already_generated = false;
   int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kLong:
     case kDouble:
@@ -974,6 +982,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
                           displacement >> 2);
         } else {
           store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord);
+          null_pointer_safepoint = true;
           StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord);
         }
         already_generated = true;
@@ -1061,6 +1070,11 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  } else {
+    // We might need to generate a safepoint if we have two store instructions (wide or double).
+    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+      MarkSafepointPC(store);
+    }
   }
   return store;
 }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 717ad86..4c6c7a4 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -702,7 +702,8 @@ void Mir2Lir::CreateNativeGcMap() {
     uint32_t native_offset = it.NativePcOffset();
     uint32_t dex_pc = it.DexPc();
     const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
+    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc <<
+        ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
     native_gc_map_builder.AddEntry(native_offset, references);
   }
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2afa5ca..866ce5f 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -74,14 +74,19 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, T
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
   if (Runtime::Current()->ExplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return NULL;
-    }
-    return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+    return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
 }
 
+/* Perform an explicit null-check on a register.  */
+LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return NULL;
+  }
+  return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+}
+
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
   if (!Runtime::Current()->ExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
@@ -732,6 +737,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
         OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
         rl_result = EvalLoc(rl_dest, reg_class, true);
         LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+        MarkPossibleNullPointerException(opt_flags);
         if (field_info.IsVolatile()) {
           // Without context sensitive analysis, we must issue the most conservative barriers.
           // In this case, either a load or store may follow so we issue both barriers.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a0242d5..7689b51 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -781,7 +781,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
                            type, skip_this);
 
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -987,7 +997,17 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -1299,7 +1319,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
     LoadValueDirectFixed(rl_start, reg_start);
   }
   RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf));
-  GenNullCheck(reg_ptr, info->opt_flags);
+  GenExplicitNullCheck(reg_ptr, info->opt_flags);
   LIR* high_code_point_branch =
       rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr);
   // NOTE: not a safepoint
@@ -1337,7 +1357,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
   LoadValueDirectFixed(rl_cmp, reg_cmp);
   RegStorage r_tgt = (cu_->instruction_set != kX86) ?
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : RegStorage::InvalidReg();
-  GenNullCheck(reg_this, info->opt_flags);
+  GenExplicitNullCheck(reg_this, info->opt_flags);
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
   // TUNING: check if rl_cmp.s_reg_low is already null checked
   LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e81a037..cd3dadb 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -446,6 +446,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg);
+      MarkPossibleNullPointerException(opt_flags);
       StoreValue(rl_dest, rl_result);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8614151..10f431f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -564,6 +564,7 @@ class Mir2Lir : public Backend {
     void ForceImplicitNullCheck(RegStorage reg, int opt_flags);
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
+    LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenRegRegCheck(ConditionCode c_code, RegStorage reg1, RegStorage reg2, ThrowKind kind);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index c748ce9..abce838 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -18,9 +18,12 @@
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "base/macros.h"
+#include "base/hex_dump.h"
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "thread.h"
 #include "thread-inl.h"
 
@@ -31,16 +34,38 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
 extern "C" void art_quick_test_suspend();
 
+// Get the size of a thumb2 instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint16_t instr = pc[0] | pc[1] << 8;
+  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
+  uint32_t instr_size = is_32bit ? 4 : 2;
+  return instr_size;
+}
+
 void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintptr_t& return_pc) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uintptr_t* sp = reinterpret_cast<uint32_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << sp;
   if (sp == nullptr) {
     return;
   }
 
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in r0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(sp) - Thread::kStackOverflowReservedBytes);
+  if (overflow_addr == fault_addr) {
+    method = sc->arm_r0;
+  } else {
+    // The method is at the top of the stack.
+    method = sp[0];
+  }
+
   // Work out the return PC.  This will be the address of the instruction
   // following the faulting ldr/str instruction.  This is in thumb mode so
   // the instruction might be a 16 or 32 bit one.  Also, the GC map always
@@ -48,13 +73,8 @@ void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintpt
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
-
-  // The method is at the top of the stack.
-  method = sp[0];
+  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  uint32_t instr_size = GetInstructionSize(ptr);
 
   return_pc = (sc->arm_pc + instr_size) | 1;
 }
@@ -71,9 +91,7 @@ bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
 
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
+  uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   LOG(DEBUG) << "Generating null pointer exception";
@@ -142,7 +160,116 @@ bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
   return false;
 }
 
+// Stack overflow fault handler.
+//
+// This checks that the fault address is equal to the current stack pointer
+// minus the overflow region size (16K typically).  The instruction sequence
+// that generates this signal is:
+//
+// sub r12,sp,#16384
+// ldr.w r12,[r12,#0]
+//
+// The second instruction will fault if r12 is inside the protected region
+// on the stack.
+//
+// If we determine this is a stack overflow we need to move the stack pointer
+// to the overflow region below the protected region.  Because we now have
+// a gap in the stack (skips over protected region), we need to arrange
+// for the rest of the system to be unaware of the new stack arrangement
+// and behave as if there is a fully valid stack.  We do this by placing
+// a unique address onto the stack followed by
+// the size of the gap.  The stack walker will detect this and skip over the
+// gap.
+
+// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
+// stack must be 8 byte aligned when making any calls.
+
+// NB. The size of the gap is the difference between the previous frame's SP and
+// the SP at which the size word is pushed.
+
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = (struct ucontext *)context;
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
+  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+
+  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
+  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+    ", fault_addr: " << fault_addr;
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    LOG(DEBUG) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // the exists below the protected region.  R9 contains the current Thread* so
+  // we can read the stack_end from that and subtract the size of the
+  // protected region.  This creates a gap in the stack that needs to be marked.
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+
+  uint8_t* prevsp = sp;
+  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
+  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
+
+  // We need to find the previous frame.  Remember that
+  // this has not yet been fully constructed because the SP has not been
+  // decremented.  So we need to work out the size of the spill portion of the
+  // frame.  This consists of something like:
+  //
+  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
+  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
+  //
+  // The first is encoded in the ArtMethod as the spill_mask, the second as the
+  // fp_spill_mask.  A population count on each will give the number of registers
+  // in each mask.  Each register is 4 bytes on ARM32.
+
+  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
+  uint32_t spill_mask = method->GetCoreSpillMask();
+  uint32_t numcores = __builtin_popcount(spill_mask);
+  uint32_t fp_spill_mask = method->GetFpSpillMask();
+  uint32_t numfps = __builtin_popcount(fp_spill_mask);
+  uint32_t spill_size = (numcores + numfps) * 4;
+  LOG(DEBUG) << "spill size: " << spill_size;
+  uint8_t* prevframe = prevsp + spill_size;
+  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
+
+  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
+  // is 4 bytes so that, together with the offset to the previous frame is 8
+  // bytes.  On other architectures we will need to align the stack.
+
+  // Push a marker onto the stack to tell the stack walker that there is a stack
+  // overflow and the stack is not contiguous.
+
+  // First the offset from SP to the previous frame.
+  sp -= sizeof(uint32_t);
+  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
+  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
+
+  // Now the gap marker (pointer sized).
+  sp -= sizeof(mirror::ArtMethod*);
+  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+
+  // Now establish the stack pointer for the signal return.
+  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
+  // We need the LR to point to the GC map just after the fault instruction.
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
+  uint32_t instr_size = GetInstructionSize(ptr);
+  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+
+  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
+  // stack pointer to be in the overflow region.  Throwing the exception will perform
+  // a longjmp which will restore the stack pointer to the correct location for the
+  // exception catch.
+  return true;
 }
 }       // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 9e5f54c..c81706f 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -134,7 +134,9 @@ void ThrowStackOverflowError(Thread* self) {
     LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed.";
     CHECK(self->IsExceptionPending());
   }
-  self->ResetDefaultStackEnd();  // Return to default stack size.
+
+  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
+  self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
 JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index f9f3e25..fcb567e 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -61,8 +61,11 @@ void FaultManager::Init() {
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   bool handled = false;
+  LOG(DEBUG) << "Handling fault";
   if (IsInGeneratedCode(context)) {
+    LOG(DEBUG) << "in generated code, looking for handler";
     for (auto& handler : handlers_) {
+      LOG(DEBUG) << "invoking Action on handler " << handler;
       handled = handler->Action(sig, info, context);
       if (handled) {
         return;
@@ -71,7 +74,7 @@ void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   }
 
   if (!handled) {
-    LOG(INFO)<< "Caught unknown SIGSEGV in ART fault handler";
+    LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
     oldaction_.sa_sigaction(sig, info, context);
   }
 }
@@ -96,19 +99,23 @@ void FaultManager::RemoveHandler(FaultHandler* handler) {
 bool FaultManager::IsInGeneratedCode(void *context) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
+  LOG(DEBUG) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
+    LOG(DEBUG) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
+    LOG(DEBUG) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
+    LOG(DEBUG) << "no lock";
     return false;
   }
 
@@ -120,7 +127,9 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   GetMethodAndReturnPC(context, /*out*/potential_method, /*out*/return_pc);
 
   // If we don't have a potential method, we're outta here.
+  LOG(DEBUG) << "potential method: " << potential_method;
   if (potential_method == 0) {
+    LOG(DEBUG) << "no method";
     return false;
   }
 
@@ -133,19 +142,23 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   // Check that the class pointer inside the object is not null and is aligned.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
+    LOG(DEBUG) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
+    LOG(DEBUG) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
+    LOG(DEBUG) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
+    LOG(DEBUG) << "not a method";
     return false;
   }
 
@@ -153,7 +166,15 @@ bool FaultManager::IsInGeneratedCode(void *context) {
   // at the return PC address.
   mirror::ArtMethod* method =
       reinterpret_cast<mirror::ArtMethod*>(potential_method);
-  return method->ToDexPc(return_pc, false) != DexFile::kDexNoIndex;
+  if (true || kIsDebugBuild) {
+    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method);
+    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
+    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+  }
+  uint32_t dexpc = method->ToDexPc(return_pc, false);
+  LOG(DEBUG) << "dexpc: " << dexpc;
+  return dexpc != DexFile::kDexNoIndex;
 }
 
 //
diff --git a/runtime/stack.cc b/runtime/stack.cc
index c33d1ab..ab3bd85 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -16,6 +16,7 @@
 
 #include "stack.h"
 
+#include "base/hex_dump.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -23,6 +24,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
+#include "thread.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "verify_object-inl.h"
@@ -30,6 +32,14 @@
 
 namespace art {
 
+// Define a piece of memory, the address of which can be used as a marker
+// for the gap in the stack added during stack overflow handling.
+static uint32_t stack_overflow_object;
+
+// The stack overflow gap marker is simply a valid unique address.
+void* stack_overflow_gap_marker = &stack_overflow_object;
+
+
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -294,20 +304,56 @@ void StackVisitor::WalkStack(bool include_transitions) {
   CHECK_EQ(cur_depth_, 0U);
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
+
+  bool kDebugStackWalk = false;
+  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
+
+  if (kDebugStackWalk) {
+    LOG(INFO) << "walking stack";
+  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
+    if (kDebugStackWalkVeryVerbose) {
+      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
+      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
+    }
+
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        SanityCheckFrame();
-        bool should_continue = VisitFrame();
-        if (UNLIKELY(!should_continue)) {
-          return;
+        // Check for a stack overflow gap marker.
+        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
+          // Marker for a stack overflow.  This is followed by the offset from the
+          // current SP to the next frame.  There is a gap in the stack here.  Jump
+          // the gap silently.
+          // Caveat coder: the layout of the overflow marker depends on the architecture.
+          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
+          //   element is 32 bits.  So be careful with those address calculations.
+
+          // Get the address of the offset, just beyond the marker pointer.
+          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
+          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
+          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
+          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
+            reinterpret_cast<byte*>(gapsizeaddr) + gap);
+          if (kDebugStackWalk) {
+            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
+                next_frame;
+          }
+          cur_quick_frame_ = next_frame;
+          method = *next_frame;
+          CHECK(method != nullptr);
+        } else {
+          SanityCheckFrame();
+          bool should_continue = VisitFrame();
+          if (UNLIKELY(!should_continue)) {
+            return;
+          }
         }
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
@@ -317,6 +363,9 @@ void StackVisitor::WalkStack(bool include_transitions) {
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
+        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -349,6 +398,10 @@ void StackVisitor::WalkStack(bool include_transitions) {
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
+          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
+        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 4ee5de1..ab903d6 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -100,6 +100,14 @@ enum VRegBaseRegNum : int {
   kVRegNonSpecialTempBaseReg = -3,
 };
 
+// Special object used to mark the gap in the stack placed when a stack
+// overflow fault occurs during implicit stack checking.  This is not
+// a real object - it is used simply as a valid address to which a
+// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
+// into the stack during the stack overflow signal handling to mark the gap
+// in which the memory is protected against read and write.
+extern void* stack_overflow_gap_marker;
+
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 38e4204..3692b9f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -215,10 +215,16 @@ static size_t FixStackSize(size_t stack_size) {
     stack_size = PTHREAD_STACK_MIN;
   }
 
-  // It's likely that callers are trying to ensure they have at least a certain amount of
-  // stack space, so we should add our reserved space on top of what they requested, rather
-  // than implicitly take it away from them.
-  stack_size += Thread::kStackOverflowReservedBytes;
+  if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // It's likely that callers are trying to ensure they have at least a certain amount of
+    // stack space, so we should add our reserved space on top of what they requested, rather
+    // than implicitly take it away from them.
+    stack_size += Thread::kStackOverflowReservedBytes;
+  } else {
+    // If we are going to use implicit stack checks, allocate space for the protected
+    // region at the bottom of the stack.
+    stack_size += Thread::kStackOverflowImplicitCheckSize;
+  }
 
   // Some systems require the stack size to be a multiple of the system page size, so round up.
   stack_size = RoundUp(stack_size, kPageSize);
@@ -226,6 +232,39 @@ static size_t FixStackSize(size_t stack_size) {
   return stack_size;
 }
 
+// Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
+// overflow is detected.  It is located right below the stack_end_.  Just below that
+// is the StackOverflow reserved region used when creating the StackOverflow
+// exception.
+void Thread::InstallImplicitProtection(bool is_main_stack) {
+  byte* pregion = stack_end_;
+
+  constexpr uint32_t kMarker = 0xdadadada;
+  uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
+  if (*marker == kMarker) {
+    // The region has already been set up.
+    return;
+  }
+  // Add marker so that we can detect a second attempt to do this.
+  *marker = kMarker;
+
+  pregion -= kStackOverflowProtectedSize;
+
+  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
+  // need to do this on the main stack.
+  if (is_main_stack) {
+    memset(pregion, 0x55, kStackOverflowProtectedSize);
+  }
+  VLOG(threads) << "installing stack protected region at " << std::hex <<
+      static_cast<void*>(pregion) << " to " <<
+      static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
+
+  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
+    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
+        << strerror(errno);
+  }
+}
+
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
   CHECK(java_peer != nullptr);
   Thread* self = static_cast<JNIEnvExt*>(env)->self;
@@ -472,7 +511,22 @@ void Thread::InitStackHwm() {
 #endif
 
   // Set stack_end_ to the bottom of the stack saving space of stack overflows
-  ResetDefaultStackEnd();
+  bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  ResetDefaultStackEnd(implicit_stack_check);
+
+  // Install the protected region if we are doing implicit overflow checks.
+  if (implicit_stack_check) {
+    if (is_main_thread) {
+      // The main thread has a 16K protected region at the bottom.  We need
+      // to install our own region so we need to move the limits
+      // of the stack to make room for it.
+      constexpr uint32_t kDelta = 16 * KB;
+      stack_begin_ += kDelta;
+      stack_end_ += kDelta;
+      stack_size_ -= kDelta;
+    }
+    InstallImplicitProtection(is_main_thread);
+  }
 
   // Sanity check.
   int stack_variable;
@@ -967,6 +1021,7 @@ Thread::Thread(bool daemon)
       pthread_self_(0),
       no_thread_suspension_(0),
       last_no_thread_suspension_cause_(nullptr),
+      suspend_trigger_(reinterpret_cast<uintptr_t*>(&suspend_trigger_)),
       thread_exit_check_count_(0),
       thread_local_start_(nullptr),
       thread_local_pos_(nullptr),
diff --git a/runtime/thread.h b/runtime/thread.h
index 32875e6..63d22c5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -112,6 +112,14 @@ class PACKED(4) Thread {
   static constexpr size_t kStackOverflowReservedUsableBytes =
       kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
+  // For implicit overflow checks we reserve an extra piece of memory at the bottom
+  // of the stack (lowest memory).  The higher portion of the memory
+  // is protected against reads and the lower is available for use while
+  // throwing the StackOverflow exception.
+  static constexpr size_t kStackOverflowProtectedSize = 32 * KB;
+  static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    kStackOverflowReservedBytes;
+
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
   static void CreateNativeThread(JNIEnv* env, jobject peer, size_t stack_size, bool daemon);
@@ -461,12 +469,21 @@ class PACKED(4) Thread {
   void SetStackEndForStackOverflow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Set the stack end to that to be used during regular execution
-  void ResetDefaultStackEnd() {
+  void ResetDefaultStackEnd(bool implicit_overflow_check) {
     // Our stacks grow down, so we want stack_end_ to be near there, but reserving enough room
     // to throw a StackOverflowError.
-    stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    if (implicit_overflow_check) {
+      // For implicit checks we also need to add in the protected region above the
+      // overflow region.
+      stack_end_ = stack_begin_ + kStackOverflowImplicitCheckSize;
+    } else {
+      stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    }
   }
 
+  // Install the protected region for implicit stack checks.
+  void InstallImplicitProtection(bool is_main_stack);
+
   bool IsHandlingStackOverflow() const {
     return stack_end_ == stack_begin_;
   }