Inflate contended lock word by suspending owner.

Bug 6961405. Don't inflate monitors for Notify and NotifyAll. Tidy lock word, handle recursive lock case alongside unlocked case and move assembly out of line (except for ARM quick). Also handle null in out-of-line assembly as the test is quick and the enter/exit code is already a safepoint. To gain ownership of a monitor on behalf of another thread, monitor contenders must not hold the monitor_lock_, so they wait on a condition variable. Reduce size of per mutex contention log. Be consistent in calling thin lock thread ids just thread ids. Fix potential thread death races caused by the use of FindThreadByThreadId, make it invariant that returned threads are either self or suspended now. Code size reduction on ARM boot.oat 0.2%. Old nexus 7 speedup 0.25%, new nexus 7 speedup 1.4%, nexus 10 speedup 2.24%, nexus 4 speedup 2.09% on DeltaBlue. Change-Id: Id52558b914f160d9c8578fdd7fc8199a9598576a
author: Ian Rogers <irogers@google.com> 2013-10-01 19:45:43 -0700
committer: Ian Rogers <irogers@google.com> 2013-10-02 09:31:55 -0700
commit: d9c4fc94fa618617f94e1de9af5f034549100753 (patch)
tree: 1305efbbc3d4bc306c0947bb6d4b01553667f98e
parent: 7ef126ce0593929bcf8fb73d8b1119ce3b95b3f2 (diff)
download: art-d9c4fc94fa618617f94e1de9af5f034549100753.zip
art-d9c4fc94fa618617f94e1de9af5f034549100753.tar.gz
art-d9c4fc94fa618617f94e1de9af5f034549100753.tar.bz2
53 files changed, 1192 insertions, 1043 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 66ff461..fc2f02b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -74,7 +74,6 @@ LIBART_COMPILER_SRC_FILES := \
 	llvm/md_builder.cc \
 	llvm/runtime_support_builder.cc \
 	llvm/runtime_support_builder_arm.cc \
-	llvm/runtime_support_builder_thumb2.cc \
 	llvm/runtime_support_builder_x86.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arm/assembler_arm.cc \
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index bba2ec5..401da2a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -440,88 +440,120 @@ void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
 }
 
 /*
- * Handle simple case (thin lock) inline.  If it's complicated, bail
- * out to the heavyweight lock/unlock routines.  We'll use dedicated
- * registers here in order to be in the right position in case we
- * to bail to oat[Lock/Unlock]Object(self, object)
- *
- * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object
- * r1 -> object [arg1 for oat[Lock/Unlock]Object
- * r2 -> intial contents of object->lock, later result of strex
- * r3 -> self->thread_id
- * r12 -> allow to be used by utilities as general temp
- *
- * The result of the strex is 0 if we acquire the lock.
- *
- * See comments in monitor.cc for the layout of the lock word.
- * Of particular interest to this code is the test for the
- * simple case - which we handle inline.  For monitor enter, the
- * simple case is thin lock, held by no-one.  For monitor exit,
- * the simple case is thin lock, held by the unlocking thread with
- * a recurse count of 0.
- *
- * A minor complication is that there is a field in the lock word
- * unrelated to locking: the hash state.  This field must be ignored, but
- * preserved.
- *
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
  */
 void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  NewLIR3(kThumb2Ldrex, r1, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);  // Get object->lock
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondEq, "");
-  NewLIR4(kThumb2Strex, r1, r2, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondNe, "T");
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kLoadLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    LIR* null_check_branch;
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL);
+    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL);
+
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    not_unlocked_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artLockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    lock_success_branch->target = success_target;
+    GenMemBarrier(kLoadLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondEq, "");
+    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondNe, "T");
+    // Go expensive route - artLockObjectFromCode(self, obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kLoadLoad);
+  }
 }
 
 /*
- * For monitor unlock, we don't have to use ldrex/strex.  Once
- * we've determined that the lock is thin and that we own it with
- * a zero recursion count, it's safe to punch it back to the
- * initial, unlock thin state with a store word.
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * and can only give away ownership if its suspended.
  */
 void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+  LIR* null_check_branch;
   LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  OpRegRegImm(kOpAnd, r3, r1,
-              (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegReg(kOpSub, r1, r2);
-  OpIT(kCondEq, "EE");
-  StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kStoreLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);
+    LoadConstantNoClobber(r3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL);
+    StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    slow_unlock_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artUnlockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    unlock_success_branch->target = success_target;
+    GenMemBarrier(kStoreLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    LoadConstantNoClobber(r3, 0);
+    // Is lock unheld on lock or held by us (==thread_id) on unlock?
+    OpRegReg(kOpCmp, r1, r2);
+    OpIT(kCondEq, "EE");
+    StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    // Go expensive route - UnlockObjectFromCode(obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kStoreLoad);
+  }
 }
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 07782d9..4fa0387 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -24,8 +24,7 @@
 
 namespace art {
 
-LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1,
-         int src2, LIR* target) {
+LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) {
   OpRegReg(kOpCmp, src1, src2);
   return OpCondBranch(cond, target);
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4dd55d7..f38225a 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1761,4 +1761,16 @@ void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
   suspend_launchpads_.Insert(launch_pad);
 }
 
+/* Call out to helper assembly routine that will null check obj and then lock it. */
+void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pLockObject), rl_src, true);
+}
+
+/* Call out to helper assembly routine that will null check obj and then unlock it. */
+void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index d53c012..9a5ca2c 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,36 +261,6 @@ void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
   MarkSafepointPC(call_inst);
 }
 
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pLockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pUnlockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -318,6 +288,7 @@ void MipsMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) {
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 8d0b347..95b2e77 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -123,8 +123,6 @@ class MipsMir2Lir : public Mir2Lir {
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                                int first_bit, int second_bit);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 7d6f968..71b68fe 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -629,8 +629,6 @@ class Mir2Lir : public Backend {
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
-    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0;
-    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit, int first_bit,
@@ -689,6 +687,10 @@ class Mir2Lir : public Backend {
     virtual bool InexpensiveConstantLong(int64_t value) = 0;
     virtual bool InexpensiveConstantDouble(int64_t value) = 0;
 
+    // May be optimized by targets.
+    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
+
     // Temp workaround
     void Workaround7250540(RegLocation rl_dest, int value);
 
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 2be2aa9..7fad6f0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -150,43 +150,6 @@ void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
                           rX86_ARG1, true);
 }
 
-void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rCX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rCX, opt_flags);
-  // If lock is unheld, try to grab it quickly with compare and exchange
-  // TODO: copy and clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR2(kX86Xor32RR, rAX, rAX);
-  NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-  // If lock is held, go the expensive route - artLockObjectFromCode(self, obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pLockObject), rCX, true);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-}
-
-void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rAX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rAX, opt_flags);
-  // If lock is held by the current thread, clear it to quickly release it
-  // TODO: clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value());
-  OpRegReg(kOpSub, rCX, rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-  NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX);
-  LIR* branch2 = NewLIR1(kX86Jmp8, 0);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-  // Otherwise, go the expensive route - UnlockObjectFromCode(obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rAX, true);
-  branch2->target = NewLIR0(kPseudoTargetLabel);
-}
-
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 0f28110..29fb3a5 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -123,8 +123,6 @@ class X86Mir2Lir : public Mir2Lir {
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result,
                                                int lit, int first_bit, int second_bit);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6464a4c..d4be7c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,6 +23,8 @@
 #include "compiler/oat_writer.h"
 #include "gc/space/image_space.h"
 #include "image.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
 #include "signal_catcher.h"
 #include "UniquePtr.h"
 #include "utils.h"
@@ -158,7 +160,7 @@ TEST_F(ImageTest, WriteRead) {
       // non image classes should be in a space after the image.
       EXPECT_GT(reinterpret_cast<byte*>(klass), image_end) << descriptor;
     }
-    EXPECT_TRUE(Monitor::IsValidLockWord(*klass->GetRawLockWordAddress()));
+    EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord()));
   }
 }
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index aa439cc..feb495e 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -82,7 +82,6 @@
 #include "ir_builder.h"
 #include "os.h"
 #include "runtime_support_builder_arm.h"
-#include "runtime_support_builder_thumb2.h"
 #include "runtime_support_builder_x86.h"
 #include "utils_llvm.h"
 
@@ -118,12 +117,10 @@ LlvmCompilationUnit::LlvmCompilationUnit(const CompilerLLVM* compiler_llvm, size
   default:
     runtime_support_.reset(new RuntimeSupportBuilder(*context_, *module_, *irb_));
     break;
+  case kThumb2:
   case kArm:
     runtime_support_.reset(new RuntimeSupportBuilderARM(*context_, *module_, *irb_));
     break;
-  case kThumb2:
-    runtime_support_.reset(new RuntimeSupportBuilderThumb2(*context_, *module_, *irb_));
-    break;
   case kX86:
     runtime_support_.reset(new RuntimeSupportBuilderX86(*context_, *module_, *irb_));
     break;
diff --git a/compiler/llvm/runtime_support_builder.cc b/compiler/llvm/runtime_support_builder.cc
index 24e283d..c825fbf 100644
--- a/compiler/llvm/runtime_support_builder.cc
+++ b/compiler/llvm/runtime_support_builder.cc
@@ -164,89 +164,13 @@ void RuntimeSupportBuilder::EmitTestSuspend() {
 /* Monitor */
 
 void RuntimeSupportBuilder::EmitLockObject(::llvm::Value* object) {
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* real_monitor =
-      irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, unheld and not recursively acquired.
-  Value* unheld = irb_.CreateICmpEQ(real_monitor, irb_.getInt32(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "lock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "lock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(unheld, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-
-  // Calculate new monitor: new = old | (lock_id << LW_LOCK_OWNER_SHIFT)
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getInt32Ty(), kTBAARuntimeInfo);
-
-  Value* owner = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* new_monitor = irb_.CreateOr(monitor, owner);
-
-  // Atomically update monitor.
-  Value* old_monitor =
-      irb_.CompareExchangeObjectOffset(object,
-                                       mirror::Object::MonitorOffset().Int32Value(),
-                                       monitor, new_monitor, kTBAARuntimeInfo);
-
-  Value* retry_slow_path = irb_.CreateICmpEQ(old_monitor, monitor);
-  irb_.CreateCondBr(retry_slow_path, bb_cont, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 void RuntimeSupportBuilder::EmitUnlockObject(::llvm::Value* object) {
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getJIntTy(),
-                               kTBAARuntimeInfo);
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* my_monitor = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* hash_state = irb_.CreateAnd(monitor, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  Value* real_monitor = irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, held by us and not recursively acquired
-  Value* is_fast_path = irb_.CreateICmpEQ(real_monitor, my_monitor);
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "unlock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "unlock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "unlock_cont", parent_func);
-  irb_.CreateCondBr(is_fast_path, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-  // Set all bits to zero (except hash state)
-  irb_.StoreToObjectOffset(object,
-                           mirror::Object::MonitorOffset().Int32Value(),
-                           hash_state,
-                           kTBAARuntimeInfo);
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::UnlockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 
diff --git a/compiler/llvm/runtime_support_builder.h b/compiler/llvm/runtime_support_builder.h
index e92ac0a..898611a 100644
--- a/compiler/llvm/runtime_support_builder.h
+++ b/compiler/llvm/runtime_support_builder.h
@@ -64,8 +64,8 @@ class RuntimeSupportBuilder {
   virtual void EmitTestSuspend();
 
   /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
+  void EmitLockObject(::llvm::Value* object);
+  void EmitUnlockObject(::llvm::Value* object);
 
   /* MarkGCCard */
   virtual void EmitMarkGCCard(::llvm::Value* value, ::llvm::Value* target_addr);
diff --git a/compiler/llvm/runtime_support_builder_arm.cc b/compiler/llvm/runtime_support_builder_arm.cc
index 569d825..cad4624 100644
--- a/compiler/llvm/runtime_support_builder_arm.cc
+++ b/compiler/llvm/runtime_support_builder_arm.cc
@@ -116,24 +116,5 @@ Value* RuntimeSupportBuilderARM::EmitSetCurrentThread(Value* thread) {
   return old_thread_register;
 }
 
-
-/* Monitor */
-
-void RuntimeSupportBuilderARM::EmitLockObject(Value* object) {
-  RuntimeSupportBuilder::EmitLockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
-void RuntimeSupportBuilderARM::EmitUnlockObject(Value* object) {
-  RuntimeSupportBuilder::EmitUnlockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
 }  // namespace llvm
 }  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_arm.h b/compiler/llvm/runtime_support_builder_arm.h
index 5a353d7..0d01509 100644
--- a/compiler/llvm/runtime_support_builder_arm.h
+++ b/compiler/llvm/runtime_support_builder_arm.h
@@ -34,10 +34,6 @@ class RuntimeSupportBuilderARM : public RuntimeSupportBuilder {
   virtual void EmitStoreToThreadOffset(int64_t offset, ::llvm::Value* value,
                                        TBAASpecialType s_ty);
   virtual ::llvm::Value* EmitSetCurrentThread(::llvm::Value* thread);
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
 };
 
 }  // namespace llvm
diff --git a/compiler/llvm/runtime_support_builder_thumb2.cc b/compiler/llvm/runtime_support_builder_thumb2.cc
deleted file mode 100644
index eff29c8..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "runtime_support_builder_thumb2.h"
-
-#include "ir_builder.h"
-#include "mirror/object.h"
-#include "monitor.h"
-#include "thread.h"
-#include "utils_llvm.h"
-
-#include <llvm/IR/DerivedTypes.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/InlineAsm.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Type.h>
-
-#include <inttypes.h>
-#include <vector>
-
-using ::llvm::BasicBlock;
-using ::llvm::Function;
-using ::llvm::FunctionType;
-using ::llvm::InlineAsm;
-using ::llvm::Type;
-using ::llvm::Value;
-
-namespace art {
-namespace llvm {
-
-
-void RuntimeSupportBuilderThumb2::EmitLockObject(Value* object) {
-  FunctionType* func_ty = FunctionType::get(/*Result=*/irb_.getInt32Ty(),
-                                            /*Params=*/irb_.getJObjectTy(),
-                                            /*isVarArg=*/false);
-  // $0: result
-  // $1: object
-  // $2: temp
-  // $3: temp
-  std::string asms;
-  StringAppendF(&asms, "add $3, $1, #%" PRId32 "\n", mirror::Object::MonitorOffset().Int32Value());
-  StringAppendF(&asms, "ldr $2, [r9, #%" PRId32 "]\n", Thread::ThinLockIdOffset().Int32Value());
-  StringAppendF(&asms, "ldrex $0, [$3]\n");
-  StringAppendF(&asms, "lsl $2, $2, %d\n", LW_LOCK_OWNER_SHIFT);
-  StringAppendF(&asms, "bfi $2, $0, #0, #%d\n", LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "bfc $0, #%d, #%d\n", LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "cmp $0, #0\n");
-  StringAppendF(&asms, "it eq\n");
-  StringAppendF(&asms, "strexeq $0, $2, [$3]\n");
-
-  InlineAsm* func = InlineAsm::get(func_ty, asms, "=&l,l,~l,~l", true);
-
-  Value* retry_slow_path = irb_.CreateCall(func, object);
-  retry_slow_path = irb_.CreateICmpNE(retry_slow_path, irb_.getJInt(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* basic_block_lock = BasicBlock::Create(context_, "lock", parent_func);
-  BasicBlock* basic_block_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(retry_slow_path, basic_block_lock, basic_block_cont, kUnlikely);
-
-  irb_.SetInsertPoint(basic_block_lock);
-  Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
-  irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(basic_block_cont);
-
-  irb_.SetInsertPoint(basic_block_cont);
-  {  // Memory barrier
-    FunctionType* asm_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                              /*isVarArg=*/false);
-    InlineAsm* func = InlineAsm::get(asm_ty, "dmb sy", "", true);
-    irb_.CreateCall(func);
-  }
-}
-
-
-}  // namespace llvm
-}  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_thumb2.h b/compiler/llvm/runtime_support_builder_thumb2.h
deleted file mode 100644
index c47a274..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-#define ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-
-#include "runtime_support_builder_arm.h"
-
-namespace art {
-namespace llvm {
-
-class RuntimeSupportBuilderThumb2 : public RuntimeSupportBuilderARM {
- public:
-  RuntimeSupportBuilderThumb2(::llvm::LLVMContext& context, ::llvm::Module& module, IRBuilder& irb)
-    : RuntimeSupportBuilderARM(context, module, irb) {}
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-};
-
-}  // namespace llvm
-}  // namespace art
-
-#endif  // ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 52584cf..a046391 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -38,7 +38,7 @@ LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
 LOCAL_SHARED_LIBRARIES := libnativehelper
-LOCAL_LDFLAGS := -ldl
+LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 5edf759..d8abbf1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -243,6 +243,7 @@ LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
 	jdwp/jdwp.h \
 	jdwp/jdwp_constants.h \
 	locks.h \
+	lock_word.h \
 	mirror/class.h \
 	thread.h \
 	thread_state.h \
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index ed3d476..69fb9c3 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -27,5 +27,7 @@
 #define THREAD_FLAGS_OFFSET 0
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5b2dd6c..cb61698 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -318,22 +318,67 @@ ENTRY art_quick_handle_fill_data
 END art_quick_handle_fill_data
 
     /*
-     * Entry from managed code that calls artLockObjectFromCode, may block for GC.
+     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
+     * possibly null object to lock.
      */
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
+    cbz    r0, slow_lock
+retry_lock:
+    ldrex  r1, [r0, #LOCK_WORD_OFFSET]
+    ldrt   r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_lock                  @ lock word contains a monitor
+    bne    already_thin
+    @ unlocked case - r2 holds thread id with count of 0
+    strex  r3, r2, [r0, #LOCK_WORD_OFFSET]
+    cbnz   r3, strex_fail             @ store failed, retry
+    bx lr
+strex_fail:
+    b retry_lock                      @ unlikely forward branch, need to reload and recheck r1/r2
+already_thin:
+    eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r2, r2                     @ zero top 16 bits
+    cbnz   r2, slow_lock              @ lock word and self thread id's match -> recursive lock
+                                      @ else contention, go to slow path
+    adds   r2, r1, #65536             @ increment count in lock word placing in r2 for storing
+    bmi    slow_lock                  @ if we overflow the count go slow
+    str    r2, [r0, #LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    bx lr
+slow_lock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
     bl     artLockObjectFromCode      @ (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+     * r0 holds the possibly null object to lock.
      */
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
+    cbz    r0, slow_unlock
+    ldr    r1, [r0, #LOCK_WORD_OFFSET]
+    ldr    r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_unlock                @ lock word contains a monitor
+    eor    r3, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r3, r3                     @ zero top 16 bits
+    cbnz   r3, slow_unlock            @ do lock word and self thread id's match?
+    cmp    r1, #65536
+    bpl    recursive_thin_unlock
+    @ transition to unlocked, r3 holds 0
+    str    r3, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+recursive_thin_unlock:
+    sub    r1, r1, #65536
+    str    r1, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+slow_unlock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case exception allocation triggers GC
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index ea908be..75eef60 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -24,6 +24,7 @@ namespace art {
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index 1092910..d4e0927 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -23,5 +23,7 @@
 #define THREAD_SELF_OFFSET 40
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 06b2203..6be73d1 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -401,14 +401,85 @@ TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorage
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_EAX_NOT_ZERO
 
-ONE_ARG_DOWNCALL art_quick_lock_object, artLockObjectFromCode, ret
-ONE_ARG_DOWNCALL art_quick_unlock_object, artUnlockObjectFromCode, RETURN_IF_EAX_ZERO
-
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
+DEFINE_FUNCTION art_quick_lock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_lock
+retry_lock:
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_lock                        // lock word contains a monitor
+    jnz  already_thin                     // lock word contains a thin lock
+    // unlocked case - %edx holds thread id with count of 0
+    movl %eax, %ecx                       // remember object in case of retry
+    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
+    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%ecx)
+    jnz  cmpxchg_fail                     // cmpxchg failed retry
+    ret
+cmpxchg_fail:
+    movl  %ecx, %eax                       // restore eax
+    jmp  retry_lock
+already_thin:
+    cmpw %ax, %dx                         // do we hold the lock already?
+    jne  slow_lock
+    addl LITERAL(65536), %eax             // increment recursion count
+    jb   slow_lock                        // count overflowed so go slow
+    movl %eax, LOCK_WORD_OFFSET(%ecx)     // update lockword, cmpxchg not necessary as we hold lock
+    ret
+slow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artLockObjectFromCode    // artLockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object
+
+DEFINE_FUNCTION art_quick_unlock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_unlock
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_unlock                      // lock word contains a monitor
+    cmpw %cx, %dx                         // does the thread id match?
+    jne  slow_unlock
+    cmpl LITERAL(65536), %ecx
+    jae  recursive_thin_unlock
+    movl LITERAL(0), LOCK_WORD_OFFSET(%eax)
+    ret
+recursive_thin_unlock:
+    subl LITERAL(65536), %ecx
+    mov  %ecx, LOCK_WORD_OFFSET(%eax)
+    ret
+slow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artUnlockObjectFromCode    // artUnlockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object
+
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                     // alignment padding
-    PUSH ecx                    // pass arg2
+    PUSH ecx                     // pass arg2
     PUSH eax                     // pass arg1
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index dd3e7dd..7e0aee0 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -134,6 +134,7 @@ void Thread::InitCpu() {
 
   // Sanity check other offsets.
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index aca93a5..d2eaf8e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -21,6 +21,9 @@
 // check.
 #define SUSPEND_CHECK_INTERVAL (1000)
 
+// Offsets within java.lang.Object.
+#define LOCK_WORD_OFFSET 4
+
 // Offsets within java.lang.String.
 #define STRING_VALUE_OFFSET 8
 #define STRING_COUNT_OFFSET 12
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b048bbb..249f031 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -54,17 +54,17 @@ struct AllMutexData {
   std::set<BaseMutex*>* all_mutexes;
   AllMutexData() : all_mutexes(NULL) {}
 };
-static struct AllMutexData all_mutex_data[kAllMutexDataSize];
+static struct AllMutexData gAllMutexData[kAllMutexDataSize];
 
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
       NanoSleep(100);
     }
   }
@@ -75,7 +75,7 @@ class ScopedAllMutexesLock {
 BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    std::set<BaseMutex*>** all_mutexes_ptr = &all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>** all_mutexes_ptr = &gAllMutexData->all_mutexes;
     if (*all_mutexes_ptr == NULL) {
       // We leak the global set of all mutexes to avoid ordering issues in global variable
       // construction/destruction.
@@ -88,7 +88,7 @@ BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(n
 BaseMutex::~BaseMutex() {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    all_mutex_data->all_mutexes->erase(this);
+    gAllMutexData->all_mutexes->erase(this);
   }
 }
 
@@ -96,13 +96,13 @@ void BaseMutex::DumpAll(std::ostream& os) {
   if (kLogLockContentions) {
     os << "Mutex logging:\n";
     ScopedAllMutexesLock mu(reinterpret_cast<const BaseMutex*>(-1));
-    std::set<BaseMutex*>* all_mutexes = all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>* all_mutexes = gAllMutexData->all_mutexes;
     if (all_mutexes == NULL) {
       // No mutexes have been created yet during at startup.
       return;
     }
     typedef std::set<BaseMutex*>::const_iterator It;
-    os << "(Contented)\n";
+    os << "(Contended)\n";
     for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) {
       BaseMutex* mutex = *it;
       if (mutex->HasEverContended()) {
@@ -127,7 +127,8 @@ void BaseMutex::CheckSafeToWait(Thread* self) {
     return;
   }
   if (kDebugLocking) {
-    CHECK(self->GetHeldMutex(level_) == this) << "Waiting on unacquired mutex: " << name_;
+    CHECK(self->GetHeldMutex(level_) == this || level_ == kMonitorLock)
+        << "Waiting on unacquired mutex: " << name_;
     bool bad_mutexes_held = false;
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index ee37388..feb8a6c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -58,7 +58,7 @@ const bool kLogLockContentions = false;
 // futex.
 const bool kLogLockContentions = false;
 #endif
-const size_t kContentionLogSize = 64;
+const size_t kContentionLogSize = 4;
 const size_t kContentionLogDataSize = kLogLockContentions ? 1 : 0;
 const size_t kAllMutexDataSize = kLogLockContentions ? 1 : 0;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index ae57aa3..57bd57e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -674,15 +674,15 @@ JDWP::JdwpError Dbg::GetMonitorInfo(JDWP::ObjectId object_id, JDWP::ExpandBuf* r
   Locks::mutator_lock_->ExclusiveUnlock(self);
   Locks::mutator_lock_->SharedLock(self);
 
-  if (monitor_info.owner != NULL) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner->GetPeer()));
+  if (monitor_info.owner_ != NULL) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner_->GetPeer()));
   } else {
     expandBufAddObjectId(reply, gRegistry->Add(NULL));
   }
-  expandBufAdd4BE(reply, monitor_info.entry_count);
-  expandBufAdd4BE(reply, monitor_info.waiters.size());
-  for (size_t i = 0; i < monitor_info.waiters.size(); ++i) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters[i]->GetPeer()));
+  expandBufAdd4BE(reply, monitor_info.entry_count_);
+  expandBufAdd4BE(reply, monitor_info.waiters_.size());
+  for (size_t i = 0; i < monitor_info.waiters_.size(); ++i) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters_[i]->GetPeer()));
   }
   return JDWP::ERR_NONE;
 }
@@ -1935,7 +1935,8 @@ JDWP::JdwpError Dbg::SuspendThread(JDWP::ObjectId thread_id, bool request_suspen
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), request_suspension, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer.get(), request_suspension, true,
+                                                   &timed_out);
   if (thread != NULL) {
     return JDWP::ERR_NONE;
   } else if (timed_out) {
@@ -2412,7 +2413,8 @@ class ScopedThreadSuspension {
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
         jobject thread_peer = gRegistry->GetJObject(thread_id);
         bool timed_out;
-        Thread* suspended_thread = Thread::SuspendForDebugger(thread_peer, true, &timed_out);
+        Thread* suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
+                                                                   &timed_out);
         CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kWaitingForDebuggerSuspension);
         if (suspended_thread == NULL) {
           // Thread terminated from under us while suspending.
@@ -3012,7 +3014,7 @@ void Dbg::DdmSendThreadNotification(Thread* t, uint32_t type) {
 
   if (type == CHUNK_TYPE("THDE")) {
     uint8_t buf[4];
-    JDWP::Set4BE(&buf[0], t->GetThinLockId());
+    JDWP::Set4BE(&buf[0], t->GetThreadId());
     Dbg::DdmSendChunk(CHUNK_TYPE("THDE"), 4, buf);
   } else {
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
@@ -3022,7 +3024,7 @@ void Dbg::DdmSendThreadNotification(Thread* t, uint32_t type) {
     const jchar* chars = (name.get() != NULL) ? name->GetCharArray()->GetData() : NULL;
 
     std::vector<uint8_t> bytes;
-    JDWP::Append4BE(bytes, t->GetThinLockId());
+    JDWP::Append4BE(bytes, t->GetThreadId());
     JDWP::AppendUtf16BE(bytes, chars, char_count);
     CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2);
     Dbg::DdmSendChunk(type, bytes);
@@ -3545,7 +3547,7 @@ void Dbg::RecordAllocation(mirror::Class* type, size_t byte_count) {
   AllocRecord* record = &recent_allocation_records_[gAllocRecordHead];
   record->type = type;
   record->byte_count = byte_count;
-  record->thin_lock_id = self->GetThinLockId();
+  record->thin_lock_id = self->GetThreadId();
 
   // Fill in the stack trace.
   AllocRecordStackVisitor visitor(self, record);
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index e87dc96..e9e6c5a 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -378,7 +378,6 @@ static inline void CheckSuspend(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mut
   for (;;) {
     if (thread->ReadFlag(kCheckpointRequest)) {
       thread->RunCheckpointFunction();
-      thread->AtomicClearFlag(kCheckpointRequest);
     } else if (thread->ReadFlag(kSuspendRequest)) {
       thread->FullSuspendCheck();
     } else {
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 36ca604..2102ab1 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -15,28 +15,40 @@
  */
 
 #include "callee_save_frame.h"
+#include "common_throws.h"
 #include "mirror/object-inl.h"
 
 namespace art {
 
-extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self,
-                                       mirror::ArtMethod** sp)
-    UNLOCK_FUNCTION(monitor_lock_) {
+extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);  // Assumed to have been checked before entry
-  // MonitorExit may throw exception
-  return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-enter)");
+    return -1;  // Failure.
+  } else {
+    obj->MonitorEnter(self);  // May block
+    DCHECK(self->HoldsLock(obj));
+    DCHECK(!self->IsExceptionPending());
+    return 0;  // Success.
+    // Only possible exception is NPE and is handled before entry
+  }
 }
 
-extern "C" void artLockObjectFromCode(mirror::Object* obj, Thread* thread,
-                                      mirror::ArtMethod** sp)
-    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
-  FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);        // Assumed to have been checked before entry
-  obj->MonitorEnter(thread);  // May block
-  DCHECK(thread->HoldsLock(obj));
-  // Only possible exception is NPE and is handled before entry
-  DCHECK(!thread->IsExceptionPending());
+extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    UNLOCK_FUNCTION(monitor_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-exit)");
+    return -1;  // Failure.
+  } else {
+    // MonitorExit may throw exception.
+    return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  }
 }
 
 }  // namespace art
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 8be9b21..b1b664d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -434,7 +434,7 @@ class SharedLibrary {
         class_loader_(class_loader),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
-        jni_on_load_thread_id_(Thread::Current()->GetThinLockId()),
+        jni_on_load_thread_id_(Thread::Current()->GetThreadId()),
         jni_on_load_result_(kPending) {
   }
 
@@ -459,7 +459,7 @@ class SharedLibrary {
     {
       MutexLock mu(self, jni_on_load_lock_);
 
-      if (jni_on_load_thread_id_ == self->GetThinLockId()) {
+      if (jni_on_load_thread_id_ == self->GetThreadId()) {
         // Check this so we don't end up waiting for ourselves.  We need to return "true" so the
         // caller can continue.
         LOG(INFO) << *self << " recursive attempt to load library " << "\"" << path_ << "\"";
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
new file mode 100644
index 0000000..30bf9bb
--- /dev/null
+++ b/runtime/lock_word-inl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_INL_H_
+#define ART_RUNTIME_LOCK_WORD_INL_H_
+
+#include "lock_word.h"
+
+namespace art {
+
+inline uint32_t LockWord::ThinLockOwner() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockOwnerShift) & kThinLockOwnerMask;
+}
+
+inline uint32_t LockWord::ThinLockCount() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockCountShift) & kThinLockCountMask;
+}
+
+inline Monitor* LockWord::FatLockMonitor() const {
+  DCHECK_EQ(GetState(), kFatLocked);
+  return reinterpret_cast<Monitor*>(value_ << 1);
+}
+
+inline LockWord::LockWord() : value_(0) {
+  DCHECK_EQ(GetState(), kUnlocked);
+}
+
+inline LockWord::LockWord(Monitor* mon)
+    : value_((reinterpret_cast<uint32_t>(mon) >> 1) | (kStateFat << kStateShift)) {
+  DCHECK_EQ(FatLockMonitor(), mon);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_LOCK_WORD_INL_H_
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
new file mode 100644
index 0000000..cd4bfbb
--- /dev/null
+++ b/runtime/lock_word.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_H_
+#define ART_RUNTIME_LOCK_WORD_H_
+
+#include <iosfwd>
+#include <stdint.h>
+
+#include "base/logging.h"
+
+namespace art {
+namespace mirror {
+  class Object;
+}  // namespace mirror
+
+class Monitor;
+
+/* The lock value itself as stored in mirror::Object::monitor_.  The MSB of the lock encodes its
+ * state.  When cleared, the lock is in the "thin" state and its bits are formatted as follows:
+ *
+ *  |3|32222222222111|11111110000000000|
+ *  |1|09876543210987|65432109876543210|
+ *  |0| lock count   | thread id       |
+ *
+ * When set, the lock is in the "fat" state and its bits are formatted as follows:
+ *
+ *  |3|3222222222211111111110000000000|
+ *  |1|0987654321098765432109876543210|
+ *  |1| Monitor* >> 1                 |
+ */
+class LockWord {
+ public:
+  enum {
+    // Number of bits to encode the state, currently just fat or thin/unlocked.
+    kStateSize = 1,
+    // Number of bits to encode the thin lock owner.
+    kThinLockOwnerSize = 16,
+    // Remaining bits are the recursive lock count.
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize,
+
+    // Thin lock bits. Owner in lowest bits.
+    kThinLockOwnerShift = 0,
+    kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1,
+    // Count in higher bits.
+    kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
+    kThinLockCountMask = (1 << kThinLockCountShift) - 1,
+    kThinLockMaxCount = kThinLockCountMask,
+
+    // State in the highest bits.
+    kStateShift = kThinLockCountSize + kThinLockCountShift,
+    kStateMask = (1 << kStateSize) - 1,
+    kStateThinOrUnlocked = 0,
+    kStateFat = 1,
+  };
+
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count) {
+    CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockOwnerMask));
+    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift));
+  }
+
+  enum LockState {
+    kUnlocked,    // No lock owners.
+    kThinLocked,  // Single uncontended owner.
+    kFatLocked    // See associated monitor.
+  };
+
+  LockState GetState() const {
+    if (value_ == 0) {
+      return kUnlocked;
+    } else if (((value_ >> kStateShift) & kStateMask) == kStateThinOrUnlocked) {
+      return kThinLocked;
+    } else {
+      return kFatLocked;
+    }
+  }
+
+  // Return the owner thin lock thread id.
+  uint32_t ThinLockOwner() const;
+
+  // Return the number of times a lock value has been locked.
+  uint32_t ThinLockCount() const;
+
+  // Return the Monitor encoded in a fat lock.
+  Monitor* FatLockMonitor() const;
+
+  // Default constructor with no lock ownership.
+  LockWord();
+
+  // Constructor a lock word for inflation to use a Monitor.
+  explicit LockWord(Monitor* mon);
+
+  bool operator==(const LockWord& rhs) {
+    return GetValue() == rhs.GetValue();
+  }
+
+ private:
+  explicit LockWord(uint32_t val) : value_(val) {}
+
+  uint32_t GetValue() const {
+    return value_;
+  }
+
+  // Only Object should be converting LockWords to/from uints.
+  friend class mirror::Object;
+
+  // The encoded value holding all the state.
+  uint32_t value_;
+};
+std::ostream& operator<<(std::ostream& os, const LockWord::LockState& code);
+
+}  // namespace art
+
+
+#endif  // ART_RUNTIME_LOCK_WORD_H_
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index c6db5b9..b16c2f7 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -60,7 +60,7 @@ void Class::SetStatus(Status new_status, Thread* self) {
     }
     if (new_status >= kStatusResolved || old_status >= kStatusResolved) {
       // When classes are being resolved the resolution code should hold the lock.
-      CHECK_EQ(GetThinLockId(), self->GetThinLockId())
+      CHECK_EQ(GetLockOwnerThreadId(), self->GetThreadId())
             << "Attempt to change status of class while not holding its lock: "
             << PrettyClass(this) << " " << old_status << " -> " << new_status;
     }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5ed3db3..e659108 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -24,6 +24,7 @@
 #include "atomic.h"
 #include "array-inl.h"
 #include "class.h"
+#include "lock_word-inl.h"
 #include "monitor.h"
 #include "runtime.h"
 #include "throwable.h"
@@ -43,8 +44,21 @@ inline void Object::SetClass(Class* new_klass) {
   SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
-inline uint32_t Object::GetThinLockId() {
-  return Monitor::GetThinLockId(monitor_);
+inline LockWord Object::GetLockWord() {
+  return LockWord(GetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), true));
+}
+
+inline void Object::SetLockWord(LockWord new_val) {
+  SetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), new_val.GetValue(), true);
+}
+
+inline bool Object::CasLockWord(LockWord old_val, LockWord new_val) {
+  return CasField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(),
+                    new_val.GetValue());
+}
+
+inline uint32_t Object::GetLockOwnerThreadId() {
+  return Monitor::GetLockOwnerThreadId(this);
 }
 
 inline void Object::MonitorEnter(Thread* self) {
@@ -238,6 +252,13 @@ inline size_t Object::SizeOf() const {
   return result;
 }
 
+inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  int32_t* addr = reinterpret_cast<int32_t*>(raw_addr);
+  return android_atomic_release_cas(old_value, new_value, addr) == 0;
+}
+
 inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 003581a..e3f5c10 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -26,6 +26,7 @@
 namespace art {
 
 class ImageWriter;
+class LockWord;
 struct ObjectOffsets;
 class Thread;
 
@@ -95,14 +96,10 @@ class MANAGED Object {
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  volatile int32_t* GetRawLockWordAddress() {
-    byte* raw_addr = reinterpret_cast<byte*>(this) +
-        OFFSET_OF_OBJECT_MEMBER(Object, monitor_).Int32Value();
-    int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
-    return const_cast<volatile int32_t*>(word_addr);
-  }
-
-  uint32_t GetThinLockId();
+  LockWord GetLockWord();
+  void SetLockWord(LockWord new_val);
+  bool CasLockWord(LockWord old_val, LockWord new_val);
+  uint32_t GetLockOwnerThreadId();
 
   void MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
@@ -226,6 +223,8 @@ class MANAGED Object {
     }
   }
 
+  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value);
+
   uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const;
 
   void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index e7ab2d4..1ceaa5d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -23,6 +23,7 @@
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
+#include "lock_word-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -37,36 +38,20 @@
 namespace art {
 
 /*
- * Every Object has a monitor associated with it, but not every Object is
- * actually locked.  Even the ones that are locked do not need a
- * full-fledged monitor until a) there is actual contention or b) wait()
- * is called on the Object.
+ * Every Object has a monitor associated with it, but not every Object is actually locked.  Even
+ * the ones that are locked do not need a full-fledged monitor until a) there is actual contention
+ * or b) wait() is called on the Object.
  *
- * For Android, we have implemented a scheme similar to the one described
- * in Bacon et al.'s "Thin locks: featherweight synchronization for Java"
- * (ACM 1998).  Things are even easier for us, though, because we have
- * a full 32 bits to work with.
+ * For Android, we have implemented a scheme similar to the one described in Bacon et al.'s
+ * "Thin locks: featherweight synchronization for Java" (ACM 1998).  Things are even easier for us,
+ * though, because we have a full 32 bits to work with.
  *
- * The two states of an Object's lock are referred to as "thin" and
- * "fat".  A lock may transition from the "thin" state to the "fat"
- * state and this transition is referred to as inflation.  Once a lock
- * has been inflated it remains in the "fat" state indefinitely.
+ * The two states of an Object's lock are referred to as "thin" and "fat".  A lock may transition
+ * from the "thin" state to the "fat" state and this transition is referred to as inflation. Once
+ * a lock has been inflated it remains in the "fat" state indefinitely.
  *
- * The lock value itself is stored in Object.lock.  The LSB of the
- * lock encodes its state.  When cleared, the lock is in the "thin"
- * state and its bits are formatted as follows:
- *
- *    [31 ---- 19] [18 ---- 3] [2 ---- 1] [0]
- *     lock count   thread id  hash state  0
- *
- * When set, the lock is in the "fat" state and its bits are formatted
- * as follows:
- *
- *    [31 ---- 3] [2 ---- 1] [0]
- *      pointer   hash state  1
- *
- * For an in-depth description of the mechanics of thin-vs-fat locking,
- * read the paper referred to above.
+ * The lock value itself is stored in mirror::Object::monitor_ and the representation is described
+ * in the LockWord value type.
  *
  * Monitors provide:
  *  - mutually exclusive access to resources
@@ -74,32 +59,11 @@ namespace art {
  *
  * In effect, they fill the role of both mutexes and condition variables.
  *
- * Only one thread can own the monitor at any time.  There may be several
- * threads waiting on it (the wait call unlocks it).  One or more waiting
- * threads may be getting interrupted or notified at any given time.
- *
- * TODO: the various members of monitor are not SMP-safe.
+ * Only one thread can own the monitor at any time.  There may be several threads waiting on it
+ * (the wait call unlocks it).  One or more waiting threads may be getting interrupted or notified
+ * at any given time.
  */
 
-// The shape is the bottom bit; either LW_SHAPE_THIN or LW_SHAPE_FAT.
-#define LW_SHAPE_MASK 0x1
-#define LW_SHAPE(x) static_cast<int>((x) & LW_SHAPE_MASK)
-
-/*
- * Monitor accessor.  Extracts a monitor structure pointer from a fat
- * lock.  Performs no error checking.
- */
-#define LW_MONITOR(x) \
-  (reinterpret_cast<Monitor*>((x) & ~((LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT) | LW_SHAPE_MASK)))
-
-/*
- * Lock recursion count field.  Contains a count of the number of times
- * a lock has been recursively acquired.
- */
-#define LW_LOCK_COUNT_MASK 0x1fff
-#define LW_LOCK_COUNT_SHIFT 19
-#define LW_LOCK_COUNT(x) (((x) >> LW_LOCK_COUNT_SHIFT) & LW_LOCK_COUNT_MASK)
-
 bool (*Monitor::is_sensitive_thread_hook_)() = NULL;
 uint32_t Monitor::lock_profiling_threshold_ = 0;
 
@@ -117,29 +81,43 @@ void Monitor::Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread
 
 Monitor::Monitor(Thread* owner, mirror::Object* obj)
     : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
       owner_(owner),
       lock_count_(0),
       obj_(obj),
       wait_set_(NULL),
       locking_method_(NULL),
       locking_dex_pc_(0) {
-  monitor_lock_.Lock(owner);
+  // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
+  // with the owner unlocking the thin-lock.
+  CHECK(owner == Thread::Current() || owner->IsSuspended());
+}
+
+bool Monitor::Install(Thread* self) {
+  MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
+  CHECK(owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  uint32_t thin = *obj->GetRawLockWordAddress();
-  lock_count_ = LW_LOCK_COUNT(thin);
-  thin &= LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT;
-  thin |= reinterpret_cast<uint32_t>(this) | LW_SHAPE_FAT;
-  // Publish the updated lock word.
-  android_atomic_release_store(thin, obj->GetRawLockWordAddress());
+  LockWord thin(obj_->GetLockWord());
+  if (thin.GetState() != LockWord::kThinLocked) {
+    // The owner_ is suspended but another thread beat us to install a monitor.
+    CHECK_EQ(thin.GetState(), LockWord::kFatLocked);
+    return false;
+  }
+  CHECK_EQ(owner_->GetThreadId(), thin.ThinLockOwner());
+  lock_count_ = thin.ThinLockCount();
+  LockWord fat(this);
+  // Publish the updated lock word, which may race with other threads.
+  bool success = obj_->CasLockWord(thin, fat);
   // Lock profiling.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = owner->GetCurrentMethod(&locking_dex_pc_);
+  if (success && lock_profiling_threshold_ != 0) {
+    locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
   }
+  return success;
 }
 
 Monitor::~Monitor() {
-  DCHECK(obj_ != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj_->GetRawLockWordAddress()), LW_SHAPE_FAT);
+  CHECK(obj_ != NULL);
+  CHECK_EQ(obj_->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 /*
@@ -190,64 +168,56 @@ void Monitor::RemoveFromWaitSet(Thread *thread) {
   }
 }
 
-mirror::Object* Monitor::GetObject() {
-  return obj_;
-}
-
 void Monitor::SetObject(mirror::Object* object) {
   obj_ = object;
 }
 
 void Monitor::Lock(Thread* self) {
-  if (owner_ == self) {
-    lock_count_++;
-    return;
-  }
-
-  if (!monitor_lock_.TryLock(self)) {
-    uint64_t waitStart = 0;
-    uint64_t waitEnd = 0;
-    uint32_t wait_threshold = lock_profiling_threshold_;
-    const mirror::ArtMethod* current_locking_method = NULL;
-    uint32_t current_locking_dex_pc = 0;
-    {
-      ScopedThreadStateChange tsc(self, kBlocked);
-      if (wait_threshold != 0) {
-        waitStart = NanoTime() / 1000;
-      }
-      current_locking_method = locking_method_;
-      current_locking_dex_pc = locking_dex_pc_;
-
-      monitor_lock_.Lock(self);
-      if (wait_threshold != 0) {
-        waitEnd = NanoTime() / 1000;
+  MutexLock mu(self, monitor_lock_);
+  while (true) {
+    if (owner_ == NULL) {  // Unowned.
+      owner_ = self;
+      CHECK_EQ(lock_count_, 0);
+      // When debugging, save the current monitor holder for future
+      // acquisition failures to use in sampled logging.
+      if (lock_profiling_threshold_ != 0) {
+        locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
       }
+      return;
+    } else if (owner_ == self) {  // Recursive.
+      lock_count_++;
+      return;
     }
-
-    if (wait_threshold != 0) {
-      uint64_t wait_ms = (waitEnd - waitStart) / 1000;
-      uint32_t sample_percent;
-      if (wait_ms >= wait_threshold) {
-        sample_percent = 100;
-      } else {
-        sample_percent = 100 * wait_ms / wait_threshold;
-      }
-      if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-        const char* current_locking_filename;
-        uint32_t current_locking_line_number;
-        TranslateLocation(current_locking_method, current_locking_dex_pc,
-                          current_locking_filename, current_locking_line_number);
-        LogContentionEvent(self, wait_ms, sample_percent, current_locking_filename, current_locking_line_number);
+    // Contended.
+    const bool log_contention = (lock_profiling_threshold_ != 0);
+    uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
+    const mirror::ArtMethod* owners_method = locking_method_;
+    uint32_t owners_dex_pc = locking_dex_pc_;
+    monitor_lock_.Unlock(self);  // Let go of locks in order.
+    {
+      ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
+      MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+      if (owner_ != NULL) {  // Did the owner_ give the lock up?
+        monitor_contenders_.Wait(self);  // Still contended so wait.
+        // Woken from contention.
+        if (log_contention) {
+          uint64_t wait_ms = MilliTime() - wait_start_ms;
+          uint32_t sample_percent;
+          if (wait_ms >= lock_profiling_threshold_) {
+            sample_percent = 100;
+          } else {
+            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+          }
+          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+            const char* owners_filename;
+            uint32_t owners_line_number;
+            TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
+            LogContentionEvent(self, wait_ms, sample_percent, owners_filename, owners_line_number);
+          }
+        }
       }
     }
-  }
-  owner_ = self;
-  DCHECK_EQ(lock_count_, 0);
-
-  // When debugging, save the current monitor holder for future
-  // acquisition failures to use in sampled logging.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+    monitor_lock_.Lock(self);  // Reacquire locks in order.
   }
 }
 
@@ -261,10 +231,11 @@ static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...)
   Thread* self = Thread::Current();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   self->ThrowNewExceptionV(throw_location, "Ljava/lang/IllegalMonitorStateException;", fmt, args);
-  if (!Runtime::Current()->IsStarted()) {
+  if (!Runtime::Current()->IsStarted() || VLOG_IS_ON(monitor)) {
     std::ostringstream ss;
     self->Dump(ss);
-    LOG(ERROR) << self->GetException(NULL)->Dump() << "\n" << ss.str();
+    LOG(Runtime::Current()->IsStarted() ? INFO : ERROR)
+        << self->GetException(NULL)->Dump() << "\n" << ss.str();
   }
   va_end(args);
 }
@@ -290,7 +261,7 @@ void Monitor::FailedUnlock(mirror::Object* o, Thread* expected_owner, Thread* fo
     // Acquire thread list lock so threads won't disappear from under us.
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
     // Re-read owner now that we hold lock.
-    current_owner = (monitor != NULL) ? monitor->owner_ : NULL;
+    current_owner = (monitor != NULL) ? monitor->GetOwner() : NULL;
     // Get short descriptions of the threads involved.
     current_owner_string = ThreadToString(current_owner);
     expected_owner_string = ThreadToString(expected_owner);
@@ -338,8 +309,9 @@ void Monitor::FailedUnlock(mirror::Object* o, Thread* expected_owner, Thread* fo
   }
 }
 
-bool Monitor::Unlock(Thread* self, bool for_wait) {
+bool Monitor::Unlock(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   Thread* owner = owner_;
   if (owner == self) {
     // We own the monitor, so nobody else can be in here.
@@ -347,17 +319,11 @@ bool Monitor::Unlock(Thread* self, bool for_wait) {
       owner_ = NULL;
       locking_method_ = NULL;
       locking_dex_pc_ = 0;
-      monitor_lock_.Unlock(self);
+      // Wake a contender.
+      monitor_contenders_.Signal(self);
     } else {
       --lock_count_;
     }
-  } else if (for_wait) {
-    // Wait should have already cleared the fields.
-    DCHECK_EQ(lock_count_, 0);
-    DCHECK(owner == NULL);
-    DCHECK(locking_method_ == NULL);
-    DCHECK_EQ(locking_dex_pc_, 0u);
-    monitor_lock_.Unlock(self);
   } else {
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
@@ -396,12 +362,14 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
   DCHECK(self != NULL);
   DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
 
+  monitor_lock_.Lock(self);
+
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+    monitor_lock_.Unlock(self);
     return;
   }
-  monitor_lock_.AssertHeld(self);
 
   // We need to turn a zero-length timed wait into a regular wait because
   // Object.wait(0, 0) is defined as Object.wait(0), which is defined as Object.wait().
@@ -409,16 +377,12 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
     why = kWaiting;
   }
 
-  WaitWithLock(self, ms, ns, interruptShouldThrow, why);
-}
-
-void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
-                           bool interruptShouldThrow, ThreadState why) {
   // Enforce the timeout range.
   if (ms < 0 || ns < 0 || ns > 999999) {
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/IllegalArgumentException;",
                              "timeout arguments out of range: ms=%lld ns=%d", ms, ns);
+    monitor_lock_.Unlock(self);
     return;
   }
 
@@ -460,7 +424,8 @@ void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
     self->wait_monitor_ = this;
 
     // Release the monitor lock.
-    Unlock(self, true);
+    monitor_contenders_.Signal(self);
+    monitor_lock_.Unlock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
     if (self->interrupted_) {
@@ -493,9 +458,9 @@ void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
     self->wait_monitor_ = NULL;
   }
 
-  // Re-acquire the monitor lock.
+  // Re-acquire the monitor and lock.
   Lock(self);
-
+  monitor_lock_.Lock(self);
   self->wait_mutex_->AssertNotHeld(self);
 
   /*
@@ -527,20 +492,17 @@ void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
       self->ThrowNewException(throw_location, "Ljava/lang/InterruptedException;", NULL);
     }
   }
+  monitor_lock_.Unlock(self);
 }
 
 void Monitor::Notify(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyWithLock(self);
-}
-
-void Monitor::NotifyWithLock(Thread* self) {
   // Signal the first waiting thread in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -558,16 +520,12 @@ void Monitor::NotifyWithLock(Thread* self) {
 
 void Monitor::NotifyAll(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyAllWithLock();
-}
-
-void Monitor::NotifyAllWithLock() {
   // Signal all threads in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -578,182 +536,130 @@ void Monitor::NotifyAllWithLock() {
 }
 
 /*
- * Changes the shape of a monitor from thin to fat, preserving the
- * internal lock state. The calling thread must own the lock.
+ * Changes the shape of a monitor from thin to fat, preserving the internal lock state. The calling
+ * thread must own the lock or the owner must be suspended. There's a race with other threads
+ * inflating the lock and so the caller should read the monitor following the call.
  */
-void Monitor::Inflate(Thread* self, mirror::Object* obj) {
+void Monitor::Inflate(Thread* self, Thread* owner, mirror::Object* obj) {
   DCHECK(self != NULL);
+  DCHECK(owner != NULL);
   DCHECK(obj != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj->GetRawLockWordAddress()), LW_SHAPE_THIN);
-  DCHECK_EQ(LW_LOCK_OWNER(*obj->GetRawLockWordAddress()), static_cast<int32_t>(self->GetThinLockId()));
 
   // Allocate and acquire a new monitor.
-  Monitor* m = new Monitor(self, obj);
-  VLOG(monitor) << "monitor: thread " << self->GetThinLockId()
-                << " created monitor " << m << " for object " << obj;
-  Runtime::Current()->GetMonitorList()->Add(m);
+  UniquePtr<Monitor> m(new Monitor(owner, obj));
+  if (m->Install(self)) {
+    VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
+                    << " created monitor " << m.get() << " for object " << obj;
+    Runtime::Current()->GetMonitorList()->Add(m.release());
+  }
+  CHECK_EQ(obj->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 void Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-  uint32_t sleepDelayNs;
-  uint32_t minSleepDelayNs = 1000000;  /* 1 millisecond */
-  uint32_t maxSleepDelayNs = 1000000000;  /* 1 second */
-  uint32_t thin, newThin;
-
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
-  uint32_t threadId = self->GetThinLockId();
- retry:
-  thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is a thin lock.  The owner field is used to
-     * determine the acquire method, ordered by cost.
-     */
-    if (LW_LOCK_OWNER(thin) == threadId) {
-      /*
-       * The calling thread owns the lock.  Increment the
-       * value of the recursion count field.
-       */
-      *thinp += 1 << LW_LOCK_COUNT_SHIFT;
-      if (LW_LOCK_COUNT(*thinp) == LW_LOCK_COUNT_MASK) {
-        /*
-         * The reacquisition limit has been reached.  Inflate
-         * the lock so the next acquire will not overflow the
-         * recursion count field.
-         */
-        Inflate(self, obj);
-      }
-    } else if (LW_LOCK_OWNER(thin) == 0) {
-      // The lock is unowned. Install the thread id of the calling thread into the owner field.
-      // This is the common case: compiled code will have tried this before calling back into
-      // the runtime.
-      newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-      if (android_atomic_acquire_cas(thin, newThin, thinp) != 0) {
-        // The acquire failed. Try again.
-        goto retry;
+  uint32_t thread_id = self->GetThreadId();
+  size_t contention_count = 0;
+
+  while (true) {
+    LockWord lock_word = obj->GetLockWord();
+    switch (lock_word.GetState()) {
+      case LockWord::kUnlocked: {
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
+        if (obj->CasLockWord(lock_word, thin_locked)) {
+          return;  // Success!
+        }
+        continue;  // Go again.
       }
-    } else {
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p (a %s) owned by %d",
-                                    threadId, thinp, PrettyTypeOf(obj).c_str(), LW_LOCK_OWNER(thin));
-      // The lock is owned by another thread. Notify the runtime that we are about to wait.
-      self->monitor_enter_object_ = obj;
-      self->TransitionFromRunnableToSuspended(kBlocked);
-      // Spin until the thin lock is released or inflated.
-      sleepDelayNs = 0;
-      for (;;) {
-        thin = *thinp;
-        // Check the shape of the lock word. Another thread
-        // may have inflated the lock while we were waiting.
-        if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-          if (LW_LOCK_OWNER(thin) == 0) {
-            // The lock has been released. Install the thread id of the
-            // calling thread into the owner field.
-            newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-            if (android_atomic_acquire_cas(thin, newThin, thinp) == 0) {
-              // The acquire succeed. Break out of the loop and proceed to inflate the lock.
-              break;
-            }
+      case LockWord::kThinLocked: {
+        uint32_t owner_thread_id = lock_word.ThinLockOwner();
+        if (owner_thread_id == thread_id) {
+          // We own the lock, increase the recursion count.
+          uint32_t new_count = lock_word.ThinLockCount() + 1;
+          if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+            obj->SetLockWord(thin_locked);
+            return;  // Success!
+          } else {
+            // We'd overflow the recursion count, so inflate the monitor.
+            Inflate(self, self, obj);
+          }
+        } else {
+          // Contention.
+          contention_count++;
+          if (contention_count <= Runtime::Current()->GetMaxSpinsBeforeThinkLockInflation()) {
+            NanoSleep(1000);  // Sleep for 1us and re-attempt.
           } else {
-            // The lock has not been released. Yield so the owning thread can run.
-            if (sleepDelayNs == 0) {
-              sched_yield();
-              sleepDelayNs = minSleepDelayNs;
-            } else {
-              NanoSleep(sleepDelayNs);
-              // Prepare the next delay value. Wrap to avoid once a second polls for eternity.
-              if (sleepDelayNs < maxSleepDelayNs / 2) {
-                sleepDelayNs *= 2;
-              } else {
-                sleepDelayNs = minSleepDelayNs;
+            contention_count = 0;
+            // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
+            ScopedThreadStateChange tsc(self, kBlocked);
+            bool timed_out;
+            ThreadList* thread_list = Runtime::Current()->GetThreadList();
+            if (lock_word == obj->GetLockWord()) {  // If lock word hasn't changed.
+              Thread* owner = thread_list->SuspendThreadByThreadId(lock_word.ThinLockOwner(), false,
+                                                                   &timed_out);
+              if (owner != NULL) {
+                // We succeeded in suspending the thread, check the lock's status didn't change.
+                lock_word = obj->GetLockWord();
+                if (lock_word.GetState() == LockWord::kThinLocked &&
+                    lock_word.ThinLockOwner() == owner_thread_id) {
+                  // Go ahead and inflate the lock.
+                  Inflate(self, owner, obj);
+                }
+                thread_list->Resume(owner, false);
               }
             }
           }
-        } else {
-          // The thin lock was inflated by another thread. Let the runtime know we are no longer
-          // waiting and try again.
-          VLOG(monitor) << StringPrintf("monitor: thread %d found lock %p surprise-fattened by another thread", threadId, thinp);
-          self->monitor_enter_object_ = NULL;
-          self->TransitionFromSuspendedToRunnable();
-          goto retry;
         }
+        continue;  // Start from the beginning.
+      }
+      case LockWord::kFatLocked: {
+        Monitor* mon = lock_word.FatLockMonitor();
+        mon->Lock(self);
+        return;  // Success!
       }
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p done", threadId, thinp);
-      // We have acquired the thin lock. Let the runtime know that we are no longer waiting.
-      self->monitor_enter_object_ = NULL;
-      self->TransitionFromSuspendedToRunnable();
-      // Fatten the lock.
-      Inflate(self, obj);
-      VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p", threadId, thinp);
     }
-  } else {
-    // The lock is a fat lock.
-    VLOG(monitor) << StringPrintf("monitor: thread %d locking fat lock %p (%p) %p on a %s",
-                                  threadId, thinp, LW_MONITOR(*thinp),
-                                  reinterpret_cast<void*>(*thinp), PrettyTypeOf(obj).c_str());
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    LW_MONITOR(*thinp)->Lock(self);
   }
 }
 
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-
   DCHECK(self != NULL);
-  // DCHECK_EQ(self->GetState(), kRunnable);
   DCHECK(obj != NULL);
 
-  /*
-   * Cache the lock word as its value can change while we are
-   * examining its state.
-   */
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is thin.  We must ensure that the lock is owned
-     * by the given thread before unlocking it.
-     */
-    if (LW_LOCK_OWNER(thin) == self->GetThinLockId()) {
-      /*
-       * We are the lock owner.  It is safe to update the lock
-       * without CAS as lock ownership guards the lock itself.
-       */
-      if (LW_LOCK_COUNT(thin) == 0) {
-        /*
-         * The lock was not recursively acquired, the common
-         * case.  Unlock by clearing all bits except for the
-         * hash state.
-         */
-        thin &= (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT);
-        android_atomic_release_store(thin, thinp);
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      FailedUnlock(obj, self, NULL, NULL);
+      return false;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        // TODO: there's a race here with the owner dying while we unlock.
+        Thread* owner =
+            Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+        FailedUnlock(obj, self, owner, NULL);
+        return false;  // Failure.
       } else {
-        /*
-         * The object was recursively acquired.  Decrement the
-         * lock recursion count field.
-         */
-        *thinp -= 1 << LW_LOCK_COUNT_SHIFT;
+        // We own the lock, decrease the recursion count.
+        if (lock_word.ThinLockCount() != 0) {
+          uint32_t new_count = lock_word.ThinLockCount() - 1;
+          LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+          obj->SetLockWord(thin_locked);
+        } else {
+          obj->SetLockWord(LockWord());
+        }
+        return true;  // Success!
       }
-    } else {
-      /*
-       * We do not own the lock.  The JVM spec requires that we
-       * throw an exception in this case.
-       */
-      FailedUnlock(obj, self, NULL, NULL);
-      return false;
     }
-  } else {
-    /*
-     * The lock is fat.  We must check to see if Unlock has
-     * raised any exceptions before continuing.
-     */
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    if (!LW_MONITOR(*thinp)->Unlock(self, false)) {
-      // An exception has been raised.  Do not fall through.
-      return false;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->Unlock(self);
     }
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
-  return true;
 }
 
 /*
@@ -761,84 +667,91 @@ bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
  */
 void Monitor::Wait(Thread* self, mirror::Object *obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, we need to fatten it.
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+        return;  // Failure.
+      } else {
+        // We own the lock, inflate to enqueue ourself on the Monitor.
+        Inflate(self, self, obj);
+        lock_word = obj->GetLockWord();
+      }
+      break;
     }
-
-    /* This thread holds the lock.  We need to fatten the lock
-     * so 'self' can block on it.  Don't update the object lock
-     * field yet, because 'self' needs to acquire the lock before
-     * any other thread gets a chance.
-     */
-    Inflate(self, obj);
-    VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p by wait()", self->GetThinLockId(), thinp);
+    case LockWord::kFatLocked:
+      break;  // Already set for a wait.
   }
-  LW_MONITOR(*thinp)->Wait(self, ms, ns, interruptShouldThrow, why);
+  Monitor* mon = lock_word.FatLockMonitor();
+  mon->Wait(self, ms, ns, interruptShouldThrow, why);
 }
 
-void Monitor::Notify(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+void Monitor::InflateAndNotify(Thread* self, mirror::Object* obj, bool notify_all) {
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
+        return;  // Failure.
+      } else {
+        // We own the lock but there's no Monitor and therefore no waiters.
+        return;  // Success.
+      }
+    }
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      if (notify_all) {
+        mon->NotifyAll(self);
+      } else {
+        mon->Notify(self);
+      }
+      return;  // Success.
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the Notify is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->Notify(self);
   }
 }
 
-void Monitor::NotifyAll(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+uint32_t Monitor::GetLockOwnerThreadId(mirror::Object* obj) {
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
-      ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
-      return;
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      return ThreadList::kInvalidThreadId;
+    case LockWord::kThinLocked:
+      return lock_word.ThinLockOwner();
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->GetOwnerThreadId();
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the NotifyAll is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->NotifyAll(self);
-  }
-}
-
-uint32_t Monitor::GetThinLockId(uint32_t raw_lock_word) {
-  if (LW_SHAPE(raw_lock_word) == LW_SHAPE_THIN) {
-    return LW_LOCK_OWNER(raw_lock_word);
-  } else {
-    Thread* owner = LW_MONITOR(raw_lock_word)->owner_;
-    return owner ? owner->GetThinLockId() : 0;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return ThreadList::kInvalidThreadId;
   }
 }
 
 void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
   ThreadState state = thread->GetState();
 
-  mirror::Object* object = NULL;
-  uint32_t lock_owner = ThreadList::kInvalidId;
+  int32_t object_identity_hashcode = 0;
+  uint32_t lock_owner = ThreadList::kInvalidThreadId;
+  std::string pretty_type;
   if (state == kWaiting || state == kTimedWaiting || state == kSleeping) {
     if (state == kSleeping) {
       os << "  - sleeping on ";
@@ -850,14 +763,18 @@ void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
       MutexLock mu(self, *thread->wait_mutex_);
       Monitor* monitor = thread->wait_monitor_;
       if (monitor != NULL) {
-        object = monitor->obj_;
+        mirror::Object* object = monitor->obj_;
+        object_identity_hashcode = object->IdentityHashCode();
+        pretty_type = PrettyTypeOf(object);
       }
     }
   } else if (state == kBlocked) {
     os << "  - waiting to lock ";
-    object = thread->monitor_enter_object_;
+    mirror::Object* object = thread->monitor_enter_object_;
     if (object != NULL) {
-      lock_owner = object->GetThinLockId();
+      object_identity_hashcode = object->IdentityHashCode();
+      lock_owner = object->GetLockOwnerThreadId();
+      pretty_type = PrettyTypeOf(object);
     }
   } else {
     // We're not waiting on anything.
@@ -865,10 +782,10 @@ void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
   }
 
   // - waiting on <0x6008c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
-  os << "<" << object << "> (a " << PrettyTypeOf(object) << ")";
+  os << StringPrintf("<0x%08x> (a %s)", object_identity_hashcode, pretty_type.c_str());
 
   // - waiting to lock <0x613f83d8> (a java.lang.Object) held by thread 5
-  if (lock_owner != ThreadList::kInvalidId) {
+  if (lock_owner != ThreadList::kInvalidThreadId) {
     os << " held by thread " << lock_owner;
   }
 
@@ -879,18 +796,15 @@ mirror::Object* Monitor::GetContendedMonitor(Thread* thread) {
   // This is used to implement JDWP's ThreadReference.CurrentContendedMonitor, and has a bizarre
   // definition of contended that includes a monitor a thread is trying to enter...
   mirror::Object* result = thread->monitor_enter_object_;
-  if (result != NULL) {
-    return result;
-  }
-  // ...but also a monitor that the thread is waiting on.
-  {
+  if (result == NULL) {
+    // ...but also a monitor that the thread is waiting on.
     MutexLock mu(Thread::Current(), *thread->wait_mutex_);
     Monitor* monitor = thread->wait_monitor_;
     if (monitor != NULL) {
-      return monitor->obj_;
+      result = monitor->GetObject();
     }
   }
-  return NULL;
+  return result;
 }
 
 void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::Object*, void*),
@@ -955,41 +869,56 @@ void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::O
   }
 }
 
-bool Monitor::IsValidLockWord(int32_t lock_word) {
-  if (lock_word == 0) {
-    return true;
-  } else if (LW_SHAPE(lock_word) == LW_SHAPE_FAT) {
-    Monitor* mon = LW_MONITOR(lock_word);
-    MonitorList* list = Runtime::Current()->GetMonitorList();
-    MutexLock mu(Thread::Current(), list->monitor_list_lock_);
-    bool found = false;
-    for (Monitor* list_mon : list->list_) {
-      if (mon == list_mon) {
-        found = true;
-        break;
+bool Monitor::IsValidLockWord(LockWord lock_word) {
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      // Nothing to check.
+      return true;
+    case LockWord::kThinLocked:
+      // Basic sanity check of owner.
+      return lock_word.ThinLockOwner() != ThreadList::kInvalidThreadId;
+    case LockWord::kFatLocked: {
+      // Check the  monitor appears in the monitor list.
+      Monitor* mon = lock_word.FatLockMonitor();
+      MonitorList* list = Runtime::Current()->GetMonitorList();
+      MutexLock mu(Thread::Current(), list->monitor_list_lock_);
+      for (Monitor* list_mon : list->list_) {
+        if (mon == list_mon) {
+          return true;  // Found our monitor.
+        }
       }
+      return false;  // Fail - unowned monitor in an object.
     }
-    return found;
-  } else {
-    // TODO: thin lock validity checking.
-    return LW_SHAPE(lock_word) == LW_SHAPE_THIN;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
 }
 
 void Monitor::TranslateLocation(const mirror::ArtMethod* method, uint32_t dex_pc,
-                                const char*& source_file, uint32_t& line_number) const {
+                                const char** source_file, uint32_t* line_number) const {
   // If method is null, location is unknown
   if (method == NULL) {
-    source_file = "";
-    line_number = 0;
+    *source_file = "";
+    *line_number = 0;
     return;
   }
   MethodHelper mh(method);
-  source_file = mh.GetDeclaringClassSourceFile();
-  if (source_file == NULL) {
-    source_file = "";
+  *source_file = mh.GetDeclaringClassSourceFile();
+  if (*source_file == NULL) {
+    *source_file = "";
+  }
+  *line_number = mh.GetLineNumFromDexPC(dex_pc);
+}
+
+uint32_t Monitor::GetOwnerThreadId() {
+  MutexLock mu(Thread::Current(), monitor_lock_);
+  Thread* owner = owner_;
+  if (owner != NULL) {
+    return owner->GetThreadId();
+  } else {
+    return ThreadList::kInvalidThreadId;
   }
-  line_number = mh.GetLineNumFromDexPC(dex_pc);
 }
 
 MonitorList::MonitorList()
@@ -1041,22 +970,26 @@ void MonitorList::SweepMonitorList(RootVisitor visitor, void* arg) {
   }
 }
 
-MonitorInfo::MonitorInfo(mirror::Object* o) : owner(NULL), entry_count(0) {
-  uint32_t lock_word = *o->GetRawLockWordAddress();
-  if (LW_SHAPE(lock_word) == LW_SHAPE_THIN) {
-    uint32_t owner_thin_lock_id = LW_LOCK_OWNER(lock_word);
-    if (owner_thin_lock_id != 0) {
-      owner = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(owner_thin_lock_id);
-      entry_count = 1 + LW_LOCK_COUNT(lock_word);
-    }
-    // Thin locks have no waiters.
-  } else {
-    CHECK_EQ(LW_SHAPE(lock_word), LW_SHAPE_FAT);
-    Monitor* monitor = LW_MONITOR(lock_word);
-    owner = monitor->owner_;
-    entry_count = 1 + monitor->lock_count_;
-    for (Thread* waiter = monitor->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
-      waiters.push_back(waiter);
+MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) {
+  DCHECK(obj != NULL);
+
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      break;
+    case LockWord::kThinLocked:
+      owner_ = Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+      entry_count_ = 1 + lock_word.ThinLockCount();
+      // Thin locks have no waiters.
+      break;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      owner_ = mon->owner_;
+      entry_count_ = 1 + mon->lock_count_;
+      for (Thread* waiter = mon->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
+        waiters_.push_back(waiter);
+      }
+      break;
     }
   }
 }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 71fe716..044f76e 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,47 +30,28 @@
 
 namespace art {
 
-/*
- * Monitor shape field. Used to distinguish thin locks from fat locks.
- */
-#define LW_SHAPE_THIN 0
-#define LW_SHAPE_FAT 1
-
-/*
- * Hash state field.  Used to signify that an object has had its
- * identity hash code exposed or relocated.
- */
-#define LW_HASH_STATE_UNHASHED 0
-#define LW_HASH_STATE_HASHED 1
-#define LW_HASH_STATE_HASHED_AND_MOVED 3
-#define LW_HASH_STATE_MASK 0x3
-#define LW_HASH_STATE_SHIFT 1
-#define LW_HASH_STATE(x) (((x) >> LW_HASH_STATE_SHIFT) & LW_HASH_STATE_MASK)
-
-/*
- * Lock owner field.  Contains the thread id of the thread currently
- * holding the lock.
- */
-#define LW_LOCK_OWNER_MASK 0xffff
-#define LW_LOCK_OWNER_SHIFT 3
-#define LW_LOCK_OWNER(x) (((x) >> LW_LOCK_OWNER_SHIFT) & LW_LOCK_OWNER_MASK)
-
 namespace mirror {
   class ArtMethod;
   class Object;
 }  // namespace mirror
+class LockWord;
 class Thread;
 class StackVisitor;
 
 class Monitor {
  public:
+  // The default number of spins that are done before thread suspension is used to forcibly inflate
+  // a lock word. See Runtime::max_spins_before_thin_lock_inflation_.
+  constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;
+
   ~Monitor();
 
   static bool IsSensitiveThread();
   static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
 
-  static uint32_t GetThinLockId(uint32_t raw_lock_word)
-      NO_THREAD_SAFETY_ANALYSIS;  // Reading lock owner without holding lock is racy.
+  // Return the thread id of the lock owner or 0 when there is no owner.
+  static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   static void MonitorEnter(Thread* thread, mirror::Object* obj)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_)
@@ -80,9 +61,13 @@ class Monitor {
       UNLOCK_FUNCTION(monitor_lock_);
 
   static void Notify(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    InflateAndNotify(self, obj, false);
+  }
   static void NotifyAll(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    InflateAndNotify(self, obj, true);
+  }
   static void Wait(Thread* self, mirror::Object* obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -92,7 +77,8 @@ class Monitor {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Used to implement JDWP's ThreadReference.CurrentContendedMonitor.
-  static mirror::Object* GetContendedMonitor(Thread* thread);
+  static mirror::Object* GetContendedMonitor(Thread* thread)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Calls 'callback' once for each lock held in the single stack frame represented by
   // the current state of 'stack_visitor'.
@@ -100,19 +86,33 @@ class Monitor {
                          void* callback_context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static bool IsValidLockWord(int32_t lock_word);
+  static bool IsValidLockWord(LockWord lock_word);
+
+  // TODO: SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  mirror::Object* GetObject() const {
+    return obj_;
+  }
 
-  mirror::Object* GetObject();
   void SetObject(mirror::Object* object);
 
+  Thread* GetOwner() const NO_THREAD_SAFETY_ANALYSIS {
+    return owner_;
+  }
+
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Install the monitor into its object, may fail if another thread installs a different monitor
+  // first.
+  bool Install(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void AppendToWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
   void RemoveFromWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
 
-  static void Inflate(Thread* self, mirror::Object* obj)
+  static void Inflate(Thread* self, Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
@@ -123,43 +123,49 @@ class Monitor {
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Lock(Thread* self) EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
-  bool Unlock(Thread* thread, bool for_wait) UNLOCK_FUNCTION(monitor_lock_);
+  void Lock(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Unlock(Thread* thread)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Notify(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyWithLock(Thread* self)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  static void InflateAndNotify(Thread* self, mirror::Object* obj, bool notify_all)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void NotifyAll(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyAllWithLock()
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  void Notify(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void NotifyAll(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
   void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow, ThreadState why)
-      NO_THREAD_SAFETY_ANALYSIS;
-  void WaitWithLock(Thread* self, int64_t ms, int32_t ns, bool interruptShouldThrow, ThreadState why)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
   void TranslateLocation(const mirror::ArtMethod* method, uint32_t pc,
-                         const char*& source_file, uint32_t& line_number) const
+                         const char** source_file, uint32_t* line_number) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  uint32_t GetOwnerThreadId();
+
   static bool (*is_sensitive_thread_hook_)();
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
   // Which thread currently owns the lock?
-  Thread* volatile owner_;
+  Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of (for debugging).
+  // What object are we part of.
   mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
@@ -205,9 +211,9 @@ class MonitorInfo {
  public:
   explicit MonitorInfo(mirror::Object* o) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Thread* owner;
-  size_t entry_count;
-  std::vector<Thread*> waiters;
+  Thread* owner_;
+  size_t entry_count_;
+  std::vector<Thread*> waiters_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(MonitorInfo);
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 8efa072..d89290b 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -81,7 +81,7 @@ void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample
   mirror::ArtMethod* m = self->GetCurrentMethod(&pc);
   const char* filename;
   uint32_t line_number;
-  TranslateLocation(m, pc, filename, line_number);
+  TranslateLocation(m, pc, &filename, &line_number);
   cp = EventLogWriteString(cp, filename, strlen(filename));
 
   // Emit the source code line number, 5 bytes.
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index eaf67b8..5508270 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -34,7 +34,7 @@ static jobject GetThreadStack(JNIEnv* env, jobject peer) {
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -42,7 +42,7 @@ static jobject GetThreadStack(JNIEnv* env, jobject peer) {
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
     return trace;
   } else {
     if (timed_out) {
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index f8eeb29..9b83206 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -26,7 +26,7 @@ static jobject DexCache_getDexNative(JNIEnv* env, jobject javaDexCache) {
   ScopedObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
   // Should only be called while holding the lock on the dex cache.
-  DCHECK_EQ(dex_cache->GetThinLockId(), soa.Self()->GetThinLockId());
+  DCHECK_EQ(dex_cache->GetLockOwnerThreadId(), soa.Self()->GetThreadId());
   const DexFile* dex_file = dex_cache->GetDexFile();
   if (dex_file == NULL) {
     return NULL;
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index e85ef09..a9de086 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -122,13 +122,13 @@ static void Thread_nativeSetName(JNIEnv* env, jobject peer, jstring java_name) {
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     {
       ScopedObjectAccess soa(env);
       thread->SetThreadName(name.c_str());
     }
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
   } else if (timed_out) {
     LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
         "failed to suspend within a generous timeout.";
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 0676968..4f81a0b 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -44,22 +44,10 @@ static jboolean DdmVmInternal_getRecentAllocationStatus(JNIEnv*, jclass) {
  * NULL on failure, e.g. if the threadId couldn't be found.
  */
 static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint thin_lock_id) {
-  ScopedLocalRef<jobject> peer(env, NULL);
-  {
-    Thread* t = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(thin_lock_id);
-    if (t == NULL) {
-      return NULL;
-    }
-    ScopedObjectAccess soa(env);
-    peer.reset(soa.AddLocalReference<jobject>(t->GetPeer()));
-  }
-  if (peer.get() == NULL) {
-    return NULL;
-  }
-
   // Suspend thread to build stack trace.
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), true, &timed_out);
+  Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -67,7 +55,7 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    thread_list->Resume(thread, false);
     return Thread::InternalStackTraceToStackTraceElementArray(env, trace);
   } else {
     if (timed_out) {
@@ -115,7 +103,7 @@ static void ThreadStatsGetterCallback(Thread* t, void* context) {
   GetTaskStats(t->GetTid(), &native_thread_state, &utime, &stime, &task_cpu);
 
   std::vector<uint8_t>& bytes = *reinterpret_cast<std::vector<uint8_t>*>(context);
-  JDWP::Append4BE(bytes, t->GetThinLockId());
+  JDWP::Append4BE(bytes, t->GetThreadId());
   JDWP::Append1BE(bytes, Dbg::ToJdwpThreadStatus(t->GetState()));
   JDWP::Append4BE(bytes, t->GetTid());
   JDWP::Append4BE(bytes, utime);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index f83db90..692cecc 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -36,7 +36,8 @@ namespace art {
 
 class ObjectLock {
  public:
-  explicit ObjectLock(Thread* self, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  explicit ObjectLock(Thread* self, mirror::Object* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : self_(self), obj_(object) {
     CHECK(object != NULL);
     obj_->MonitorEnter(self_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b4ce37f..8a20bbc 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -75,6 +75,7 @@ Runtime::Runtime()
       is_explicit_gc_disabled_(false),
       default_stack_size_(0),
       heap_(NULL),
+      max_spins_before_thin_lock_inflation_(Monitor::kDefaultMaxSpinsBeforeThinLockInflation),
       monitor_list_(NULL),
       thread_list_(NULL),
       intern_table_(NULL),
@@ -350,6 +351,7 @@ Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, b
   // Only the main GC thread, no workers.
   parsed->conc_gc_threads_ = 0;
   parsed->stack_size_ = 0;  // 0 means default.
+  parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
 
   parsed->is_compiler_ = false;
@@ -510,6 +512,10 @@ Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, b
         return NULL;
       }
       parsed->stack_size_ = size;
+    } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
+      parsed->max_spins_before_thin_lock_inflation_ =
+          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
+                  nullptr, 10);
     } else if (option == "-XX:LongPauseLogThreshold") {
       parsed->long_pause_log_threshold_ =
           ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
@@ -866,6 +872,8 @@ bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
   default_stack_size_ = options->stack_size_;
   stack_trace_file_ = options->stack_trace_file_;
 
+  max_spins_before_thin_lock_inflation_ = options->max_spins_before_thin_lock_inflation_;
+
   monitor_list_ = new MonitorList;
   thread_list_ = new ThreadList;
   intern_table_ = new InternTable;
@@ -901,7 +909,7 @@ bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
   // objects. We can't supply a thread group yet; it will be fixed later. Since we are the main
   // thread, we do not get a java peer.
   Thread* self = Thread::Attach("main", false, NULL, false);
-  CHECK_EQ(self->thin_lock_id_, ThreadList::kMainId);
+  CHECK_EQ(self->thin_lock_thread_id_, ThreadList::kMainThreadId);
   CHECK(self != NULL);
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 552cfdf..36b0bd6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -112,6 +112,7 @@ class Runtime {
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
     size_t stack_size_;
+    size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
     size_t lock_profiling_threshold_;
     std::string stack_trace_file_;
@@ -283,6 +284,10 @@ class Runtime {
     return java_vm_;
   }
 
+  size_t GetMaxSpinsBeforeThinkLockInflation() const {
+    return max_spins_before_thin_lock_inflation_;
+  }
+
   MonitorList* GetMonitorList() const {
     return monitor_list_;
   }
@@ -455,6 +460,8 @@ class Runtime {
 
   gc::Heap* heap_;
 
+  // The number of spins that are done before thread suspension is used to forcibly inflate.
+  size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;
 
   ThreadList* thread_list_;
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 15eb27d..fe62e25 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -147,7 +147,6 @@ void SignalCatcher::HandleSigQuit() {
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   if (self->ReadFlag(kCheckpointRequest)) {
     self->RunCheckpointFunction();
-    self->AtomicClearFlag(kCheckpointRequest);
   }
   self->EndAssertNoThreadSuspension(old_cause);
   thread_list->ResumeAll();
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 4552062..7d28785 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -80,17 +80,16 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
   union StateAndFlags new_state_and_flags;
   do {
     old_state_and_flags = state_and_flags_;
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0)) {
+      RunCheckpointFunction();
+      continue;
+    }
     // Copy over flags and try to clear the checkpoint bit if it is set.
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest;
     new_state_and_flags.as_struct.state = new_state;
     // CAS the value without a memory barrier, that will occur in the unlock below.
   } while (UNLIKELY(android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                        &state_and_flags_.as_int) != 0));
-  // If we toggled the checkpoint flag we must have cleared it.
-  uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags;
-  if (UNLIKELY((flag_change & kCheckpointRequest) != 0)) {
-    RunCheckpointFunction();
-  }
   // Release share on mutator_lock_.
   Locks::mutator_lock_->SharedUnlock(this);
 }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7040337..de14dbb 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -311,7 +311,7 @@ void Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm) {
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
   DCHECK_EQ(Thread::Current(), this);
 
-  thin_lock_id_ = thread_list->AllocThreadId(this);
+  thin_lock_thread_id_ = thread_list->AllocThreadId(this);
   InitStackHwm();
 
   jni_env_ = new JNIEnvExt(this, java_vm);
@@ -476,9 +476,9 @@ void Thread::InitStackHwm() {
 
 void Thread::ShortDump(std::ostream& os) const {
   os << "Thread[";
-  if (GetThinLockId() != 0) {
+  if (GetThreadId() != 0) {
     // If we're in kStarting, we won't have a thin lock id or tid yet.
-    os << GetThinLockId()
+    os << GetThreadId()
              << ",tid=" << GetTid() << ',';
   }
   os << GetState()
@@ -574,18 +574,32 @@ void Thread::RunCheckpointFunction() {
   ATRACE_BEGIN("Checkpoint function");
   checkpoint_function_->Run(this);
   ATRACE_END();
+  checkpoint_function_ = NULL;
+  AtomicClearFlag(kCheckpointRequest);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
-  CHECK(!ReadFlag(kCheckpointRequest)) << "Already have a pending checkpoint request";
-  checkpoint_function_ = function;
   union StateAndFlags old_state_and_flags = state_and_flags_;
+  if (old_state_and_flags.as_struct.state != kRunnable) {
+    return false;  // Fail, thread is suspended and so can't run a checkpoint.
+  }
+  if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
+    return false;  // Fail, already a checkpoint pending.
+  }
+  CHECK(checkpoint_function_ == NULL);
+  checkpoint_function_ = function;
+  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   old_state_and_flags.as_struct.state = kRunnable;
   union StateAndFlags new_state_and_flags = old_state_and_flags;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                          &state_and_flags_.as_int);
+  if (UNLIKELY(succeeded != 0)) {
+    // The thread changed state before the checkpoint was installed.
+    CHECK(checkpoint_function_ == function);
+    checkpoint_function_ = NULL;
+  }
   return succeeded == 0;
 }
 
@@ -600,88 +614,6 @@ void Thread::FullSuspendCheck() {
   VLOG(threads) << this << " self-reviving";
 }
 
-Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timed_out) {
-  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
-  useconds_t total_delay_us = 0;
-  useconds_t delay_us = 0;
-  bool did_suspend_request = false;
-  *timed_out = false;
-  while (true) {
-    Thread* thread;
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      Thread* self = soa.Self();
-      MutexLock mu(self, *Locks::thread_list_lock_);
-      thread = Thread::FromManagedThread(soa, peer);
-      if (thread == NULL) {
-        JNIEnv* env = self->GetJniEnv();
-        ScopedLocalRef<jstring> scoped_name_string(env,
-                                                   (jstring)env->GetObjectField(peer,
-                                                              WellKnownClasses::java_lang_Thread_name));
-        ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
-        if (scoped_name_chars.c_str() == NULL) {
-            LOG(WARNING) << "No such thread for suspend: " << peer;
-            env->ExceptionClear();
-        } else {
-            LOG(WARNING) << "No such thread for suspend: " << peer << ":" << scoped_name_chars.c_str();
-        }
-
-        return NULL;
-      }
-      {
-        MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
-        if (request_suspension) {
-          thread->ModifySuspendCount(soa.Self(), +1, true /* for_debugger */);
-          request_suspension = false;
-          did_suspend_request = true;
-        }
-        // IsSuspended on the current thread will fail as the current thread is changed into
-        // Runnable above. As the suspend count is now raised if this is the current thread
-        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
-        // to just explicitly handle the current thread in the callers to this code.
-        CHECK_NE(thread, soa.Self()) << "Attempt to suspend the current thread for the debugger";
-        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
-        // count, or else we've waited and it has self suspended) or is the current thread, we're
-        // done.
-        if (thread->IsSuspended()) {
-          return thread;
-        }
-        if (total_delay_us >= kTimeoutUs) {
-          LOG(ERROR) << "Thread suspension timed out: " << peer;
-          if (did_suspend_request) {
-            thread->ModifySuspendCount(soa.Self(), -1, true /* for_debugger */);
-          }
-          *timed_out = true;
-          return NULL;
-        }
-      }
-      // Release locks and come out of runnable state.
-    }
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<LockLevel>(i));
-      if (held_mutex != NULL) {
-        LOG(FATAL) << "Holding " << held_mutex->GetName()
-            << " while sleeping for thread suspension";
-      }
-    }
-    {
-      useconds_t new_delay_us = delay_us * 2;
-      CHECK_GE(new_delay_us, delay_us);
-      if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
-        delay_us = new_delay_us;
-      }
-    }
-    if (delay_us == 0) {
-      sched_yield();
-      // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
-      delay_us = 500;
-    } else {
-      usleep(delay_us);
-      total_delay_us += delay_us;
-    }
-  }
-}
-
 void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
   std::string group_name;
   int priority;
@@ -718,7 +650,7 @@ void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
       os << " daemon";
     }
     os << " prio=" << priority
-       << " tid=" << thread->GetThinLockId()
+       << " tid=" << thread->GetThreadId()
        << " " << thread->GetState();
     if (thread->IsStillStarting()) {
       os << " (still starting up)";
@@ -968,9 +900,9 @@ Thread::Thread(bool daemon)
       jpeer_(NULL),
       stack_begin_(NULL),
       stack_size_(0),
+      thin_lock_thread_id_(0),
       stack_trace_sample_(NULL),
       trace_clock_base_(0),
-      thin_lock_id_(0),
       tid_(0),
       wait_mutex_(new Mutex("a thread wait mutex")),
       wait_cond_(new ConditionVariable("a thread wait condition variable", *wait_mutex_)),
@@ -1718,7 +1650,7 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset, size_t size_of_
   DO_THREAD_OFFSET(self_);
   DO_THREAD_OFFSET(stack_end_);
   DO_THREAD_OFFSET(suspend_count_);
-  DO_THREAD_OFFSET(thin_lock_id_);
+  DO_THREAD_OFFSET(thin_lock_thread_id_);
   // DO_THREAD_OFFSET(top_of_managed_stack_);
   // DO_THREAD_OFFSET(top_of_managed_stack_pc_);
   DO_THREAD_OFFSET(top_sirt_);
@@ -2001,7 +1933,7 @@ bool Thread::HoldsLock(mirror::Object* object) {
   if (object == NULL) {
     return false;
   }
-  return object->GetThinLockId() == thin_lock_id_;
+  return object->GetLockOwnerThreadId() == thin_lock_thread_id_;
 }
 
 // RootVisitor parameters are: (const Object* obj, size_t vreg, const StackVisitor* visitor).
diff --git a/runtime/thread.h b/runtime/thread.h
index 2d9e009..3aa1373 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -154,7 +154,8 @@ class PACKED(4) Thread {
   void ModifySuspendCount(Thread* self, int delta, bool for_debugger)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
-  bool RequestCheckpoint(Closure* function);
+  bool RequestCheckpoint(Closure* function)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
@@ -175,14 +176,6 @@ class PACKED(4) Thread {
       UNLOCK_FUNCTION(Locks::mutator_lock_)
       ALWAYS_INLINE;
 
-  // Wait for a debugger suspension on the thread associated with the given peer. Returns the
-  // thread on success, else NULL. If the thread should be suspended then request_suspension should
-  // be true on entry. If the suspension times out then *timeout is set to true.
-  static Thread* SuspendForDebugger(jobject peer,  bool request_suspension, bool* timed_out)
-      LOCKS_EXCLUDED(Locks::mutator_lock_,
-                     Locks::thread_list_lock_,
-                     Locks::thread_suspend_count_lock_);
-
   // Once called thread suspension will cause an assertion failure.
 #ifndef NDEBUG
   const char* StartAssertNoThreadSuspension(const char* cause) {
@@ -219,7 +212,7 @@ class PACKED(4) Thread {
     return daemon_;
   }
 
-  bool HoldsLock(mirror::Object*);
+  bool HoldsLock(mirror::Object*) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Changes the priority of this thread to match that of the java.lang.Thread object.
@@ -237,8 +230,8 @@ class PACKED(4) Thread {
    */
   static int GetNativePriority();
 
-  uint32_t GetThinLockId() const {
-    return thin_lock_id_;
+  uint32_t GetThreadId() const {
+    return thin_lock_thread_id_;
   }
 
   pid_t GetTid() const {
@@ -414,7 +407,7 @@ class PACKED(4) Thread {
   }
 
   static ThreadOffset ThinLockIdOffset() {
-    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_id_));
+    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
   }
 
   static ThreadOffset CardTableOffset() {
@@ -702,18 +695,18 @@ class PACKED(4) Thread {
   // Size of the stack
   size_t stack_size_;
 
-  // Pointer to previous stack trace captured by sampling profiler.
-  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
-
-  // The clock base used for tracing.
-  uint64_t trace_clock_base_;
-
   // Thin lock thread id. This is a small integer used by the thin lock implementation.
   // This is not to be confused with the native thread's tid, nor is it the value returned
   // by java.lang.Thread.getId --- this is a distinct value, used only for locking. One
   // important difference between this id and the ids visible to managed code is that these
   // ones get reused (to ensure that they fit in the number of bits available).
-  uint32_t thin_lock_id_;
+  uint32_t thin_lock_thread_id_;
+
+  // Pointer to previous stack trace captured by sampling profiler.
+  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
+
+  // The clock base used for tracing.
+  uint64_t trace_clock_base_;
 
   // System thread id.
   pid_t tid_;
@@ -722,13 +715,16 @@ class PACKED(4) Thread {
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
   mutable Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Condition variable waited upon during a wait.
   ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
-  // Pointer to the monitor lock we're currently waiting on (or NULL).
+  // Pointer to the monitor lock we're currently waiting on or NULL if not waiting.
   Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool32_t interrupted_ GUARDED_BY(wait_mutex_);
-  // The next thread in the wait set this thread is part of.
+  // The next thread in the wait set this thread is part of or NULL if not waiting.
   Thread* wait_next_;
+
+
   // If we're blocked in MonitorEnter, this is the object we're trying to lock.
   mirror::Object* monitor_enter_object_;
 
@@ -785,7 +781,8 @@ class PACKED(4) Thread {
   // Cause for last suspension.
   const char* last_no_thread_suspension_cause_;
 
-  // Pending checkpoint functions.
+  // Pending checkpoint function or NULL if non-pending. Installation guarding by
+  // Locks::thread_suspend_count_lock_.
   Closure* checkpoint_function_;
 
  public:
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 44cf810..ff1ed2a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -17,6 +17,8 @@
 #include "thread_list.h"
 
 #include <dirent.h>
+#include <ScopedLocalRef.h>
+#include <ScopedUtfChars.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -24,8 +26,13 @@
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "debugger.h"
+#include "jni_internal.h"
+#include "lock_word.h"
+#include "monitor.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -33,6 +40,7 @@ ThreadList::ThreadList()
     : allocated_ids_lock_("allocated thread ids lock"),
       suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
+  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
 
 ThreadList::~ThreadList() {
@@ -160,18 +168,19 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
     // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
       if (thread != self) {
-        for (;;) {
+        while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
             // This thread will run it's checkpoint some time in the near future.
             count++;
             break;
           } else {
             // We are probably suspended, try to make sure that we stay suspended.
-            MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
             // The thread switched back to runnable.
             if (thread->GetState() == kRunnable) {
+              // Spurious fail, try again.
               continue;
             }
             thread->ModifySuspendCount(self, +1, false);
@@ -204,7 +213,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
       }
     }
     // We know for sure that the thread is suspended at this point.
-    thread->RunCheckpointFunction();
+    checkpoint_function->Run(thread);
     {
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
       thread->ModifySuspendCount(self, -1, false);
@@ -322,6 +331,178 @@ void ThreadList::Resume(Thread* thread, bool for_debugger) {
   VLOG(threads) << "Resume(" << *thread << ") complete";
 }
 
+static void ThreadSuspendByPeerWarning(Thread* self, int level, const char* message, jobject peer) {
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedLocalRef<jstring>
+      scoped_name_string(env, (jstring)env->GetObjectField(peer,
+                                                          WellKnownClasses::java_lang_Thread_name));
+  ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
+  if (scoped_name_chars.c_str() == NULL) {
+      LOG(level) << message << ": " << peer;
+      env->ExceptionClear();
+  } else {
+      LOG(level) << message << ": " << peer << ":" << scoped_name_chars.c_str();
+  }
+}
+
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep and total_delay_us
+// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
+// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
+    if (held_mutex != NULL) {
+      LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
+    }
+  }
+  {
+    useconds_t new_delay_us = (*delay_us) * 2;
+    CHECK_GE(new_delay_us, *delay_us);
+    if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+      *delay_us = new_delay_us;
+    }
+  }
+  if ((*delay_us) == 0) {
+    sched_yield();
+    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+    (*delay_us) = 500;
+  } else {
+    usleep(*delay_us);
+    (*total_delay_us) += (*delay_us);
+  }
+}
+
+Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
+                                        bool debug_suspension, bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  while (true) {
+    Thread* thread;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      thread = Thread::FromManagedThread(soa, peer);
+      if (thread == NULL) {
+        ThreadSuspendByPeerWarning(self, WARNING, "No such thread for suspend", peer);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (request_suspension) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          request_suspension = false;
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByPeerWarning(self, ERROR, "Thread suspension timed out", peer);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+static void ThreadSuspendByThreadIdWarning(int level, const char* message, uint32_t thread_id) {
+  LOG(level) << StringPrintf("%s: %d", message, thread_id);
+}
+
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
+                                            bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  CHECK_NE(thread_id, kInvalidThreadId);
+  while (true) {
+    Thread* thread = NULL;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      for (const auto& it : list_) {
+        if (it->GetThreadId() == thread_id) {
+          thread = it;
+          break;
+        }
+      }
+      if (thread == NULL) {
+        // There's a race in inflating a lock and the owner giving up ownership and then dying.
+        ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (!did_suspend_request) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByThreadIdWarning(ERROR, "Thread suspension timed out", thread_id);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  for (const auto& thread : list_) {
+    if (thread->GetThreadId() == thin_lock_id) {
+      CHECK(thread == self || thread->IsSuspended());
+      return thread;
+    }
+  }
+  return NULL;
+}
+
 void ThreadList::SuspendAllForDebugger() {
   Thread* self = Thread::Current();
   Thread* debug_thread = Dbg::GetDebugThread();
@@ -528,8 +709,8 @@ void ThreadList::Unregister(Thread* self) {
   // suspend and so on, must happen at this point, and not in ~Thread.
   self->Destroy();
 
-  uint32_t thin_lock_id = self->thin_lock_id_;
-  self->thin_lock_id_ = 0;
+  uint32_t thin_lock_id = self->thin_lock_thread_id_;
+  self->thin_lock_thread_id_ = 0;
   ReleaseThreadId(self, thin_lock_id);
   while (self != NULL) {
     // Remove and delete the Thread* while holding the thread_list_lock_ and
@@ -609,14 +790,4 @@ void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
   allocated_ids_.reset(id);
 }
 
-Thread* ThreadList::FindThreadByThinLockId(uint32_t thin_lock_id) {
-  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
-  for (const auto& thread : list_) {
-    if (thread->GetThinLockId() == thin_lock_id) {
-      return thread;
-    }
-  }
-  return NULL;
-}
-
 }  // namespace art
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 3df3e2c..b1b3e88 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_THREAD_LIST_H_
 
 #include "base/mutex.h"
+#include "jni.h"
 #include "root_visitor.h"
 
 #include <bitset>
@@ -31,8 +32,8 @@ class TimingLogger;
 class ThreadList {
  public:
   static const uint32_t kMaxThreadId = 0xFFFF;
-  static const uint32_t kInvalidId = 0;
-  static const uint32_t kMainId = 1;
+  static const uint32_t kInvalidThreadId = 0;
+  static const uint32_t kMainThreadId = 1;
 
   explicit ThreadList();
   ~ThreadList();
@@ -59,6 +60,30 @@ class ThreadList {
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
+
+  // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
+  // else NULL. The peer is used to identify the thread to avoid races with the thread terminating.
+  // If the thread should be suspended then value of request_suspension should be true otherwise
+  // the routine will wait for a previous suspend request. If the suspension times out then *timeout
+  // is set to true.
+  static Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
+                                     bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
+  // thread on success else NULL. The thread id is used to identify the thread to avoid races with
+  // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
+  // thread, that may be terminating. If the suspension times out then *timeout is set to true.
+  Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Find an already suspended thread (or self) by its id.
+  Thread* FindThreadByThreadId(uint32_t thin_lock_id);
+
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
   // of the suspend check. Returns how many checkpoints we should expect to run.
   size_t RunCheckpoint(Closure* checkpoint_function);
@@ -99,8 +124,6 @@ class ThreadList {
     return list_;
   }
 
-  Thread* FindThreadByThinLockId(uint32_t thin_lock_id);
-
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
author	Ian Rogers <irogers@google.com>	2013-10-01 19:45:43 -0700
committer	Ian Rogers <irogers@google.com>	2013-10-02 09:31:55 -0700
commit	d9c4fc94fa618617f94e1de9af5f034549100753 (patch)
tree	1305efbbc3d4bc306c0947bb6d4b01553667f98e
parent	7ef126ce0593929bcf8fb73d8b1119ce3b95b3f2 (diff)
download	art-d9c4fc94fa618617f94e1de9af5f034549100753.zip art-d9c4fc94fa618617f94e1de9af5f034549100753.tar.gz art-d9c4fc94fa618617f94e1de9af5f034549100753.tar.bz2