diff options
author | Calin Juravle <calin@google.com> | 2014-12-16 17:02:57 +0000 |
---|---|---|
committer | Calin Juravle <calin@google.com> | 2014-12-19 09:58:27 +0000 |
commit | 52c489645b6e9ae33623f1ec24143cde5444906e (patch) | |
tree | a39667aa354645bd42a7a061d08ca82df3004143 | |
parent | 193c7a94822f765b0b6b0cecd54c9f08dfd26425 (diff) | |
download | art-52c489645b6e9ae33623f1ec24143cde5444906e.zip art-52c489645b6e9ae33623f1ec24143cde5444906e.tar.gz art-52c489645b6e9ae33623f1ec24143cde5444906e.tar.bz2 |
[optimizing compiler] Add support for volatile
- for backends: arm, x86, x86_64
- added necessary instructions to assemblies
- clean up code gen for field set/get
- fixed InstructionDataEquals for some instructions
- fixed comments in compiler_enums
* 003-opcode test verifies basic volatile functionality
Change-Id: I144393efa312dfb2c332cb84056b00edffee338a
21 files changed, 806 insertions, 675 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 7ff06a0..7edb490 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -555,7 +555,7 @@ std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); * The current recipe is as follows: * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) - * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load. + * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. * -# Use StoreStore barrier after all stores but before return from any constructor whose * class has final fields. * -# Use NTStoreStore to order non-temporal stores with respect to all later diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f9054e0..dde0dfe 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -670,10 +670,13 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); return false; } + +#if defined(__aarch64__) if (resolved_field->IsVolatile()) { MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); return false; } +#endif Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); @@ -689,12 +692,14 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, null_check, value, field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction(new (arena_) HInstanceFieldGet( current_block_->GetLastInstruction(), field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } @@ -723,10 +728,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, return false; } +#if defined(__aarch64__) if (resolved_field->IsVolatile()) { MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); return false; } +#endif Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass( soa, dex_cache, class_loader, outer_compilation_unit_))); @@ -763,10 +770,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, HInstruction* value = LoadLocal(source_or_dest_reg, field_type); DCHECK_EQ(value->GetType(), field_type); current_block_->AddInstruction( - new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction( - new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 002d9d4..063dc7c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2556,68 +2556,170 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit Arm barrier kinds + DmbOptions flavour = DmbOptions::ISH; // quiet c++ warnings + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavour = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavour = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ dmb(flavour); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, + uint32_t offset, + Register out_lo, + Register out_hi) { + if (offset != 0) { + __ LoadImmediate(out_lo, offset); + __ add(addr, addr, ShifterOperand(out_lo)); + } + __ ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, + uint32_t offset, + Register value_lo, + Register value_hi, + Register temp1, + Register temp2) { + Label fail; + if (offset != 0) { + __ LoadImmediate(temp1, offset); + __ add(addr, addr, ShifterOperand(temp1)); + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ ldrexd(temp1, temp2, addr); + __ strexd(temp1, value_lo, value_hi, addr); + __ cmp(temp1, ShifterOperand(0)); + __ b(&fail, NE); +} + +void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + // Temporary registers for the write barrier. - if (needs_write_barrier) { + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (is_volatile && is_wide) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(Location::RegisterLocation(R2)); + locations->AddTemp(Location::RegisterLocation(R3)); + } } } -void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, obj, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { + Register value_reg = value.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value_reg, base, offset); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); + codegen_->MarkGCCard(temp, card, base, value_reg); } break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile) { + // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension + // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should + // pass it around to be able to optimize. + GenerateWideAtomicStore(base, offset, + value.AsRegisterPairLow<Register>(), + value.AsRegisterPairHigh<Register>(), + locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>()); + } else { + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, obj, offset); + DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile) { + Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); + Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); + + __ vmovrrd(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, offset, + value_reg_lo, + value_reg_hi, + locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(3).AsRegister<Register>()); + } else { + __ StoreDToOffset(value_reg, base, offset); + } break; } @@ -2625,75 +2727,138 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } -void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { + switch (field_type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile) { + GenerateWideAtomicLoad(base, offset, + out.AsRegisterPairLow<Register>(), + out.AsRegisterPairHigh<Register>()); + } else { + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, obj, offset); + DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile) { + Register lo = locations->GetTemp(0).AsRegister<Register>(); + Register hi = locations->GetTemp(1).AsRegister<Register>(); + GenerateWideAtomicLoad(base, offset, lo, hi); + __ vmovdrr(out_reg, lo, hi); + } else { + __ LoadDFromOffset(out_reg, base, offset); + } break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } +} + +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { @@ -3206,146 +3371,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, cls, offset); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, cls, offset); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, cls, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 226e635..b86670d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" @@ -110,6 +111,8 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,15 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, + Register value_lo, Register value_hi, + Register temp1, Register temp2); + void GenerateWideAtomicLoad(Register addr, uint32_t offset, + Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e7edd8a..ddb0e82 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2656,82 +2656,115 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } +} + + +void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); + __ movl(temp, object); + __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), + X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ Bind(&is_null); +} + +void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { + // Long values can be loaded atomically into an XMM using movsd. + // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM + // and then copy the XMM into the output 32bits at a time). + locations->AddTemp(Location::RequiresFpuRegister()); } } -void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(obj, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + if (is_volatile) { + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, Address(base, offset)); + __ movd(out.AsRegisterPairLow<Register>(), temp); + __ psrlq(temp, Immediate(32)); + __ movd(out.AsRegisterPairHigh<Register>(), temp); + } else { + __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); + __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); + } break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2739,87 +2772,152 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } -} -void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { - Label is_null; - __ testl(value, value); - __ j(kEqual, &is_null); - __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); - __ movl(temp, object); - __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); - __ movb(Address(temp, card, TIMES_1, 0), - X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - __ Bind(&is_null); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_byte_type = (field_type == Primitive::kPrimBoolean) + || (field_type == Primitive::kPrimByte); + + // The register allocator does not support multiple + // inputs that die at entry with one in a specific register. + if (is_byte_type) { + // Ensure the value is in a byte register. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // Temporary registers for the write barrier. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. + // We need two XMM registers because there's no easier way to (bit) copy a register pair + // into a single XMM register (we copy each pair part into the XMMs and then interleave them). + // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the + // isolated cases when we need this it isn't worth adding the extra complexity. + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } } -void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<Register>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<Register>()); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } break; } case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(obj, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(obj, kX86WordSize + offset)); + if (is_volatile) { + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, value.AsRegisterPairLow<Register>()); + __ movd(temp2, value.AsRegisterPairHigh<Register>()); + __ punpckldq(temp1, temp2); + __ movsd(Address(base, offset), temp1); + } else { + __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); + __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + } break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { @@ -3383,159 +3481,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } -void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(cls, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(cls, kX86WordSize + offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } -} - -void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(cls, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(cls, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(cls, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index aed06c0..636f884 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" @@ -105,6 +106,8 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); void HandleShift(HBinaryOperation* instruction); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -137,6 +140,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff7fcdc..1bc3092 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -2389,69 +2389,87 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } -void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barier " << kind; + } +} + +void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(obj, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(obj, offset), value); + __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2459,74 +2477,124 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1)); + locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetInAt(1, Location::RequiresRegister()); + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(obj, offset)); + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { @@ -3222,146 +3290,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); } -void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(cls, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(cls, offset), value); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 794b81f..0708864 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" @@ -109,6 +110,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 94ff192..48f1ea9 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -40,18 +40,22 @@ TEST(GVNTest, LocalFieldElimination) { entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* to_remove = block->GetLastInstruction(); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(43), false)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. block->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* use_after_kill = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HExit()); @@ -82,7 +86,8 @@ TEST(GVNTest, GlobalFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); HBasicBlock* then = new (&allocator) HBasicBlock(graph); @@ -98,13 +103,16 @@ TEST(GVNTest, GlobalFieldElimination) { else_->AddSuccessor(join); then->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); then->AddInstruction(new (&allocator) HGoto()); else_->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); else_->AddInstruction(new (&allocator) HGoto()); join->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); join->AddInstruction(new (&allocator) HExit()); graph->TryBuildingSsa(); @@ -132,7 +140,8 @@ TEST(GVNTest, LoopFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HGoto()); HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); @@ -148,22 +157,25 @@ TEST(GVNTest, LoopFieldElimination) { loop_body->AddSuccessor(loop_header); loop_header->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); // Kill inside the loop body to prevent field gets inside the loop header // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); HInstruction* field_set = loop_body->GetLastInstruction(); loop_body->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HGoto()); exit->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); exit->AddInstruction(new (&allocator) HExit()); @@ -242,7 +254,7 @@ TEST(GVNTest, LoopSideEffects) { { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); GlobalValueNumberer gvn(&allocator, graph); gvn.Run(); @@ -256,7 +268,7 @@ TEST(GVNTest, LoopSideEffects) { { outer_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), outer_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); @@ -273,7 +285,7 @@ TEST(GVNTest, LoopSideEffects) { outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); inner_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), inner_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c963b70..b9b2304 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2128,39 +2128,45 @@ class HNullCheck : public HExpression<1> { class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, Primitive::Type field_type) - : field_offset_(field_offset), field_type_(field_type) {} + FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile) + : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {} MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } + bool IsVolatile() const { return is_volatile_; } private: const MemberOffset field_offset_; const Primitive::Type field_type_; + const bool is_volatile_; }; class HInstanceFieldGet : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, value); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { - size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + bool CanBeMoved() const OVERRIDE { return IsVolatile(); } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } virtual size_t ComputeHashCode() const { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(InstanceFieldGet); @@ -2175,15 +2181,18 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { HInstanceFieldSet(HInstruction* object, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, object); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2496,24 +2505,29 @@ class HStaticFieldGet : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); } - bool CanBeMoved() const OVERRIDE { return true; } + + bool CanBeMoved() const OVERRIDE { return IsVolatile(); } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - size_t other_offset = other->AsStaticFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + HStaticFieldGet* other_get = other->AsStaticFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(StaticFieldGet); @@ -2528,15 +2542,18 @@ class HStaticFieldSet : public HTemplateInstruction<2> { HStaticFieldSet(HInstruction* cls, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2677,7 +2694,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(MonitorOperation); - protected: + private: const OperationKind kind_; const uint32_t dex_pc_; @@ -2685,7 +2702,6 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; - class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: MoveOperands(Location source, Location destination, HInstruction* instruction) diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index f677e84..c2ea80e 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -462,7 +462,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, entry->AddSuccessor(block); HInstruction* test = new (allocator) HInstanceFieldGet( - parameter, Primitive::kPrimBoolean, MemberOffset(22)); + parameter, Primitive::kPrimBoolean, MemberOffset(22), false); block->AddInstruction(test); block->AddInstruction(new (allocator) HIf(test)); HBasicBlock* then = new (allocator) HBasicBlock(graph); @@ -481,8 +481,10 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); join->AddPhi(*phi); - *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); - *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); + *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); then->AddInstruction(*input1); else_->AddInstruction(*input2); join->AddInstruction(new (allocator) HExit()); @@ -581,7 +583,8 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, graph->AddBlock(block); entry->AddSuccessor(block); - *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); block->AddInstruction(*field); *ret = new (allocator) HReturn(*field); block->AddInstruction(*ret); diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index c86ec4b..87b3813 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -429,6 +429,8 @@ class ArmAssembler : public Assembler { virtual void ldrex(Register rd, Register rn, Condition cond = AL) = 0; virtual void strex(Register rd, Register rt, Register rn, Condition cond = AL) = 0; + virtual void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) = 0; + virtual void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) = 0; // Miscellaneous instructions. virtual void clrex(Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 8f6d45a..8d1fb60 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -778,6 +778,7 @@ void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode, Emit(encoding); } + void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); @@ -793,6 +794,25 @@ void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { } +void Arm32Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0; + Emit(encoding); +} + + void Arm32Assembler::strex(Register rd, Register rt, Register rn, @@ -811,6 +831,28 @@ void Arm32Assembler::strex(Register rd, Emit(encoding); } +void Arm32Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rd) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | + static_cast<uint32_t>(rt); + Emit(encoding); +} + void Arm32Assembler::clrex(Condition cond) { CHECK_EQ(cond, AL); // This cannot be conditional on ARM. diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 6c8d415..b922d66 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -123,6 +123,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index 951792d..4a0ae0b 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -697,4 +697,28 @@ TEST_F(AssemblerArm32Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerArm32Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerArm32Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 3eaae56..3eccd3f 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1662,9 +1662,6 @@ void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition co CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); CheckCondition(cond); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CheckCondition(cond); CHECK_LT(imm, (1u << 10)); int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | @@ -1701,6 +1698,22 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | B3 | B2 | B1 | B0; + Emit32(encoding); +} + + void Thumb2Assembler::strex(Register rd, Register rt, Register rn, @@ -1709,6 +1722,26 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | + static_cast<uint32_t>(rd); + Emit32(encoding); +} + + void Thumb2Assembler::clrex(Condition cond) { CheckCondition(cond); int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 | diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 48a3a7e..81dd138 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -149,6 +149,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 6ae95a4..425ccd7 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -164,4 +164,32 @@ TEST_F(AssemblerThumb2Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerThumb2Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + GetAssembler()->ldrexd(arm::R5, arm::R3, arm::R7); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n" + "ldrexd r5, r3, [r7]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerThumb2Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + GetAssembler()->strexd(arm::R9, arm::R5, arm::R3, arm::R7); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n" + "strexd r9, r5, r3, [r7]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f0353f6..f8c0043 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -443,6 +443,27 @@ void X86Assembler::movsd(XmmRegister dst, XmmRegister src) { } +void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::addsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 9fecf1e..6c3d131 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -274,6 +274,9 @@ class X86Assembler FINAL : public Assembler { void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void punpckldq(XmmRegister dst, XmmRegister src); + void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index d901673..fccb510 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -105,6 +105,18 @@ TEST_F(AssemblerX86Test, Movl) { DriverStr(expected, "movl"); } +TEST_F(AssemblerX86Test, psrlq) { + GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); + const char* expected = "psrlq $0x20, %xmm0\n"; + DriverStr(expected, "psrlq"); +} + +TEST_F(AssemblerX86Test, punpckldq) { + GetAssembler()->punpckldq(x86::XMM0, x86::XMM1); + const char* expected = "punpckldq %xmm1, %xmm0\n"; + DriverStr(expected, "punpckldq"); +} + TEST_F(AssemblerX86Test, LoadLongConstant) { GetAssembler()->LoadLongConstant(x86::XMM0, 51); const char* expected = |