summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRazvan A Lupusoru <razvan.a.lupusoru@intel.com>2014-02-25 17:41:08 -0800
committerIan Rogers <irogers@google.com>2014-03-26 16:20:09 -0700
commit99ad7230ccaace93bf323dea9790f35fe991a4a2 (patch)
tree095705c674703953bf4c50f6a30a105420b770b5
parenta9e3d2ccfdbf7f4c7b1508bcb2b774037399b1d4 (diff)
downloadart-99ad7230ccaace93bf323dea9790f35fe991a4a2.zip
art-99ad7230ccaace93bf323dea9790f35fe991a4a2.tar.gz
art-99ad7230ccaace93bf323dea9790f35fe991a4a2.tar.bz2
Relaxed memory barriers for x86
X86 provides stronger memory guarantees and thus the memory barriers can be optimized. This patch ensures that all memory barriers for x86 are treated as scheduling barriers. And in cases where a barrier is needed (StoreLoad case), an mfence is used. Change-Id: I13d02bf3f152083ba9f358052aedb583b0d48640 Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
-rw-r--r--compiler/dex/compiler_enums.h12
-rw-r--r--compiler/dex/quick/arm/int_arm.cc15
-rw-r--r--compiler/dex/quick/gen_common.cc31
-rw-r--r--compiler/dex/quick/gen_invoke.cc20
-rw-r--r--compiler/dex/quick/local_optimizations.cc8
-rw-r--r--compiler/dex/quick/mir_to_lir-inl.h4
-rw-r--r--compiler/dex/quick/mir_to_lir.cc7
-rw-r--r--compiler/dex/quick/mir_to_lir.h10
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc1
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h2
-rw-r--r--compiler/dex/quick/x86/int_x86.cc12
-rw-r--r--compiler/dex/quick/x86/target_x86.cc48
-rw-r--r--compiler/dex/quick/x86/x86_lir.h1
-rw-r--r--disassembler/disassembler_x86.cc6
14 files changed, 157 insertions, 20 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 718468f..eb4a336 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -339,7 +339,16 @@ enum DividePattern {
std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-// Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details Without context sensitive analysis, the most conservative set of barriers
+ * must be issued to ensure the Java Memory Model. Thus the recipe is as follows:
+ * -# Use StoreStore barrier before volatile store.
+ * -# Use StoreLoad barrier after volatile store.
+ * -# Use LoadLoad and LoadStore barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ * class has final fields.
+ */
enum MemBarrierKind {
kLoadStore,
kLoadLoad,
@@ -364,6 +373,7 @@ enum OpFeatureFlags {
kPCRelFixup, // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
kRegDef0,
kRegDef1,
+ kRegDef2,
kRegDefA,
kRegDefD,
kRegDefFPCSList0,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 1d959fa..93d48d6 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -777,6 +777,9 @@ LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) {
void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
#if ANDROID_SMP != 0
+ // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
+ LIR* barrier = last_lir_insn_;
+
int dmb_flavor;
// TODO: revisit Arm barrier kinds
switch (barrier_kind) {
@@ -789,8 +792,16 @@ void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
dmb_flavor = kSY; // quiet gcc.
break;
}
- LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor);
- dmb->u.m.def_mask = ENCODE_ALL;
+
+ // If the same barrier already exists, don't generate another.
+ if (barrier == nullptr
+ || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
+ barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+ }
+
+ // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
+ DCHECK(!barrier->flags.use_def_invalid);
+ barrier->u.m.def_mask = ENCODE_ALL;
#endif
}
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 8c3a11f..3235977 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -479,6 +479,7 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
rl_src = LoadValue(rl_src, kAnyReg);
}
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
if (is_long_or_double) {
@@ -488,6 +489,7 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
StoreWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg.GetReg());
}
if (field_info.IsVolatile()) {
+ // A load might follow the volatile store so insert a StoreLoad barrier.
GenMemBarrier(kStoreLoad);
}
if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
@@ -559,9 +561,7 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
}
// r_base now holds static storage base
RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
- if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
- }
+
if (is_long_or_double) {
LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg(),
rl_result.reg.GetHighReg(), INVALID_SREG);
@@ -569,6 +569,14 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
LoadWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg());
}
FreeTemp(r_base);
+
+ if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
+ GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
+ }
+
if (is_long_or_double) {
StoreValueWide(rl_dest, rl_result);
} else {
@@ -716,7 +724,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
rl_result.reg.GetHighReg(), rl_obj.s_reg_low);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
} else {
int reg_ptr = AllocTemp();
@@ -725,7 +736,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
LoadBaseDispWide(reg_ptr, 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(),
INVALID_SREG);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
FreeTemp(reg_ptr);
}
@@ -737,7 +751,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
rl_result.reg.GetReg(), kWord, rl_obj.s_reg_low);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
StoreValue(rl_dest, rl_result);
}
@@ -773,25 +790,29 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
reg_ptr = AllocTemp();
OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value());
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
StoreBaseDispWide(reg_ptr, 0, rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
FreeTemp(reg_ptr);
} else {
rl_src = LoadValue(rl_src, reg_class);
GenNullCheck(rl_obj.reg.GetReg(), opt_flags);
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
StoreBaseDisp(rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value(),
rl_src.reg.GetReg(), kWord);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
MarkGCCard(rl_src.reg.GetReg(), rl_obj.reg.GetReg());
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index f3c5a34..9e50749 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1356,18 +1356,27 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info,
RegLocation rl_src_offset = info->args[2]; // long low
rl_src_offset.wide = 0; // ignore high half in info->args[3]
RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info); // result reg
- if (is_volatile) {
- GenMemBarrier(kLoadLoad);
- }
+
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
if (is_long) {
OpRegReg(kOpAdd, rl_object.reg.GetReg(), rl_offset.reg.GetReg());
LoadBaseDispWide(rl_object.reg.GetReg(), 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG);
- StoreValueWide(rl_dest, rl_result);
} else {
LoadBaseIndexed(rl_object.reg.GetReg(), rl_offset.reg.GetReg(), rl_result.reg.GetReg(), 0, kWord);
+ }
+
+ if (is_volatile) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
+ GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
+ }
+
+ if (is_long) {
+ StoreValueWide(rl_dest, rl_result);
+ } else {
StoreValue(rl_dest, rl_result);
}
return true;
@@ -1385,6 +1394,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
rl_src_offset.wide = 0; // ignore high half in info->args[3]
RegLocation rl_src_value = info->args[4]; // value to store
if (is_volatile || is_ordered) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
@@ -1401,7 +1411,9 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
// Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
FreeTemp(rl_offset.reg.GetReg());
+
if (is_volatile) {
+ // A load might follow the volatile store so insert a StoreLoad barrier.
GenMemBarrier(kStoreLoad);
}
if (is_object) {
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 6df91e6..dd4af9c 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -92,7 +92,10 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) {
((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads.
((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
(REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores.
- !(target_flags & (IS_LOAD | IS_STORE))) {
+ // Skip instructions that are neither loads or stores.
+ !(target_flags & (IS_LOAD | IS_STORE)) ||
+ // Skip instructions that do both load and store.
+ ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
continue;
}
@@ -293,7 +296,8 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) {
/* Skip non-interesting instructions */
if (!(target_flags & IS_LOAD) ||
(this_lir->flags.is_nop == true) ||
- ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1))) {
+ ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
+ ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
continue;
}
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 8b1f81d..b2362fc 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -192,6 +192,10 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
SetupRegMask(&lir->u.m.def_mask, lir->operands[1]);
}
+ if (flags & REG_DEF2) {
+ SetupRegMask(&lir->u.m.def_mask, lir->operands[2]);
+ }
+
if (flags & REG_USE0) {
SetupRegMask(&lir->u.m.use_mask, lir->operands[0]);
}
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 82664e2..657c3d9 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -138,7 +138,10 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg.GetReg(), kWord, INVALID_SREG);
}
if (data.is_volatile) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
return true;
}
@@ -160,6 +163,7 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
int reg_obj = LoadArg(data.object_arg);
int reg_src = LoadArg(data.src_arg, wide);
if (data.is_volatile) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
if (wide) {
@@ -170,7 +174,8 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
}
if (data.is_volatile) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
if (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT)) {
MarkGCCard(reg_src, reg_obj);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5a1f6cd..51fe8f1 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -56,6 +56,7 @@ typedef uint32_t CodeOffset; // Native code offset in bytes.
#define NO_OPERAND (1ULL << kNoOperand)
#define REG_DEF0 (1ULL << kRegDef0)
#define REG_DEF1 (1ULL << kRegDef1)
+#define REG_DEF2 (1ULL << kRegDef2)
#define REG_DEFA (1ULL << kRegDefA)
#define REG_DEFD (1ULL << kRegDefD)
#define REG_DEF_FPCS_LIST0 (1ULL << kRegDefFPCSList0)
@@ -953,7 +954,16 @@ class Mir2Lir : public Backend {
*/
virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+ /**
+ * @brief Used to generate a memory barrier in an architecture specific way.
+ * @details The last generated LIR will be considered for use as barrier. Namely,
+ * if the last LIR can be updated in a way where it will serve the semantics of
+ * barrier, then it will be used as such. Otherwise, a new LIR will be generated
+ * that can keep the semantics.
+ * @param barrier_kind The kind of memory barrier to generate.
+ */
virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
+
virtual void GenMoveException(RegLocation rl_dest) = 0;
virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit, int first_bit,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 9cafcee..e7a1a69 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -344,6 +344,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
{ kX86LockCmpxchg8bM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" },
{ kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
+ { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" },
EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 275a2d9..f2654b9 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -372,6 +372,8 @@ class X86Mir2Lir : public Mir2Lir {
void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
void GenConstWide(RegLocation rl_dest, int64_t value);
+ static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
+
/*
* @brief generate inline code for fast case of Strng.indexOf.
* @param info Call parameters
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index c929265..14278a4 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -772,6 +772,11 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
: (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
LoadWordDisp(TargetReg(kSp), srcOffsetSp, rSI);
NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0);
+
+ // After a store we need to insert barrier in case of potential load. Since the
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+ GenMemBarrier(kStoreLoad);
+
FreeTemp(rSI);
UnmarkTemp(rSI);
NewLIR1(kX86Pop32R, rSI);
@@ -784,9 +789,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
FlushReg(r0);
LockTemp(r0);
- // Release store semantics, get the barrier out of the way. TODO: revisit
- GenMemBarrier(kStoreLoad);
-
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
@@ -801,6 +803,10 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
LoadValueDirect(rl_src_expected, r0);
NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
+ // After a store we need to insert barrier in case of potential load. Since the
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+ GenMemBarrier(kStoreLoad);
+
FreeTemp(r0);
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 9994927..9588b17 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -425,10 +425,54 @@ void X86Mir2Lir::FreeCallTemps() {
FreeTemp(rX86_ARG3);
}
+bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
+ switch (opcode) {
+ case kX86LockCmpxchgMR:
+ case kX86LockCmpxchgAR:
+ case kX86LockCmpxchg8bM:
+ case kX86LockCmpxchg8bA:
+ case kX86XchgMR:
+ case kX86Mfence:
+ // Atomic memory instructions provide full barrier.
+ return true;
+ default:
+ break;
+ }
+
+ // Conservative if cannot prove it provides full barrier.
+ return false;
+}
+
void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
#if ANDROID_SMP != 0
- // TODO: optimize fences
- NewLIR0(kX86Mfence);
+ // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
+ LIR* mem_barrier = last_lir_insn_;
+
+ /*
+ * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
+ * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
+ * to ensure is that there is a scheduling barrier in place.
+ */
+ if (barrier_kind == kStoreLoad) {
+ // If no LIR exists already that can be used a barrier, then generate an mfence.
+ if (mem_barrier == nullptr) {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
+
+ // If last instruction does not provide full barrier, then insert an mfence.
+ if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
+ }
+
+ // Now ensure that a scheduling barrier is in place.
+ if (mem_barrier == nullptr) {
+ GenBarrier();
+ } else {
+ // Mark as a scheduling barrier.
+ DCHECK(!mem_barrier->flags.use_def_invalid);
+ mem_barrier->u.m.def_mask = ENCODE_ALL;
+ }
#endif
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index abe1b3d..b72e60d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -388,6 +388,7 @@ enum X86OpCode {
kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange
kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange
kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange
+ kX86XchgMR, // exchange memory with register (automatically locked)
Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value
Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value
Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index ab0ee52..4a03ebe 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -226,6 +226,12 @@ DISASSEMBLER_ENTRY(cmp,
opcode << "j" << condition_codes[*instr & 0xF];
branch_bytes = 1;
break;
+ case 0x86: case 0x87:
+ opcode << "xchg";
+ store = true;
+ has_modrm = true;
+ byte_operand = (*instr == 0x86);
+ break;
case 0x88: opcode << "mov"; store = true; has_modrm = true; byte_operand = true; break;
case 0x89: opcode << "mov"; store = true; has_modrm = true; break;
case 0x8A: opcode << "mov"; load = true; has_modrm = true; byte_operand = true; break;