diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2015-04-03 14:52:31 -0400 |
---|---|---|
committer | Mark Mendell <mark.p.mendell@intel.com> | 2015-04-06 23:05:13 -0400 |
commit | 58d25fd052e999a24734b0cf856a1563e3d1b2d0 (patch) | |
tree | b1dbeae13a24e3f7ec325698a3724b5d90df2ef9 /compiler | |
parent | 1b8e8cac2c96f6d2af8e7217f997a30e11c098b5 (diff) | |
download | art-58d25fd052e999a24734b0cf856a1563e3d1b2d0.zip art-58d25fd052e999a24734b0cf856a1563e3d1b2d0.tar.gz art-58d25fd052e999a24734b0cf856a1563e3d1b2d0.tar.bz2 |
[optimizing] Implement more x86/x86_64 intrinsics
Implement CAS and bit reverse and byte reverse intrinsics that were
missing from x86 and x86_64 implementations.
Add assembler tests and compareAndSwapLong test.
Change-Id: Iabb2ff46036645df0a91f640288ef06090a64ee3
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 202 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 174 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 9 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 5 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 45 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 11 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 5 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 50 |
8 files changed, 490 insertions, 11 deletions
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index b6e4510..aec2d19 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -320,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); } +void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { + CreateLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + X86Assembler* assembler = GetAssembler(); + // Assign the inputs to the outputs, mixing low/high. + __ movl(output_lo, input_hi); + __ movl(output_hi, input_lo); + __ bswapl(output_lo); + __ bswapl(output_hi); +} + void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -1330,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + // Offset is a long, but in 32 bit mode, we only need the low word. + // Can we update the invoke here to remove a TypeConvert to Long? + locations->SetInAt(2, Location::RequiresRegister()); + // Expected value must be in EAX or EDX:EAX. + // For long, new value must be in ECX:EBX. + if (type == Primitive::kPrimLong) { + locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); + locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); + } else { + locations->SetInAt(3, Location::RegisterLocation(EAX)); + locations->SetInAt(4, Location::RequiresRegister()); + } + + // Force a byte register for the output. + locations->SetOut(Location::RegisterLocation(EAX)); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + // Need a byte register for marking. + locations->AddTemp(Location::RegisterLocation(ECX)); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location out = locations->Out(); + DCHECK_EQ(out.AsRegister<Register>(), EAX); + + if (type == Primitive::kPrimLong) { + DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); + DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); + __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + } else { + // Integer or object. + DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); + Register value = locations->InAt(4).AsRegister<Register>(); + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setb(kZero, out.AsRegister<Register>()); + __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, + X86Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // We want to swap high/low, then bswap each one, and then do the same + // as a 32 bit reverse. + // Exchange high and low. + __ movl(temp, reg_low); + __ movl(reg_low, reg_high); + __ movl(reg_high, temp); + + // bit-reverse low + __ bswapl(reg_low); + SwapBits(reg_low, temp, 1, 0x55555555, assembler); + SwapBits(reg_low, temp, 2, 0x33333333, assembler); + SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); + + // bit-reverse high + __ bswapl(reg_high); + SwapBits(reg_high, temp, 1, 0x55555555, assembler); + SwapBits(reg_high, temp, 2, 0x33333333, assembler); + SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1338,16 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86 diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index f6fa013..5122a00 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1202,6 +1202,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // expected value must be in EAX/RAX. + locations->SetInAt(3, Location::RegisterLocation(RAX)); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); + DCHECK_EQ(expected.AsRegister(), RAX); + CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); + } else { + // Integer or object. + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setcc(kZero, out); + __ movzxb(out, out); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, + X86_64Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, + int32_t shift, int64_t mask, X86_64Assembler* assembler) { + Immediate imm_shift(shift); + __ movq(temp_mask, Immediate(mask)); + __ movq(temp, reg); + __ shrq(reg, imm_shift); + __ andq(temp, temp_mask); + __ andq(reg, temp_mask); + __ shlq(temp, imm_shift); + __ orq(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a long number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; + * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; + * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; + */ + __ bswapq(reg); + SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); + SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); + SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1210,14 +1379,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86_64 diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index b3a1376..4ecb1d8 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1467,6 +1467,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) { EmitOperand(reg, address); } + +void X86Assembler::cmpxchg8b(const Address& address) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC7); + EmitOperand(1, address); +} + + void X86Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index bdf8843..46630e3 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler { X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); + void cmpxchg8b(const Address& address); void mfence(); @@ -476,6 +477,10 @@ class X86Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchg8b(const Address& address) { + lock()->cmpxchg8b(address); + } + // // Misc. functionality // diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index fccb510..dba3b6b 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) { DriverStr(expected, "LoadLongConstant"); } +TEST_F(AssemblerX86Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::EDI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), 0), x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0), + x86::Register(x86::ESI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %ESI, (%EBP)\n" + "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86Test, LockCmpxchg8b) { + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0)); + const char* expected = + "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b (%EBP)\n" + "lock cmpxchg8b (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchg8b"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index e82d90c..627d94e 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1854,11 +1854,22 @@ X86_64Assembler* X86_64Assembler::lock() { void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(reg, address); + EmitUint8(0x0F); + EmitUint8(0xB1); + EmitOperand(reg.LowBits(), address); +} + + +void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg, address); EmitUint8(0x0F); EmitUint8(0xB1); EmitOperand(reg.LowBits(), address); } + void X86_64Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 39f781c..3b4fbb5 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -517,6 +517,7 @@ class X86_64Assembler FINAL : public Assembler { X86_64Assembler* lock(); void cmpxchgl(const Address& address, CpuRegister reg); + void cmpxchgq(const Address& address, CpuRegister reg); void mfence(); @@ -539,6 +540,10 @@ class X86_64Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchgq(const Address& address, CpuRegister reg) { + lock()->cmpxchgq(address, reg); + } + // // Misc. functionality // diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 4402dfc..a79bd09 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -539,6 +539,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) { // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl"); } +TEST_F(AssemblerX86_64Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %ESI, (%R13)\n" + "lock cmpxchgl %ESI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86_64Test, LockCmpxchgq) { + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %RSI, (%R13)\n" + "lock cmpxchg %RSI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchg"); +} + TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); |