diff options
author | Manman Ren <mren@apple.com> | 2012-06-18 22:23:48 +0000 |
---|---|---|
committer | Manman Ren <mren@apple.com> | 2012-06-18 22:23:48 +0000 |
commit | eda9fdf979bd1c017304f8e9331f2c7df5df2d1c (patch) | |
tree | 023310a2d90954d98bef98bcced38ade48156395 | |
parent | e877c4f9c7b4e4142f33a29e6cd1a07262525a12 (diff) | |
download | external_llvm-eda9fdf979bd1c017304f8e9331f2c7df5df2d1c.zip external_llvm-eda9fdf979bd1c017304f8e9331f2c7df5df2d1c.tar.gz external_llvm-eda9fdf979bd1c017304f8e9331f2c7df5df2d1c.tar.bz2 |
ARM: use NOEN loads and stores if possible when handling struct byval.
This change is to be enabled in clang.
rdar://9877866
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158684 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 50 | ||||
-rw-r--r-- | test/CodeGen/ARM/struct_byval.ll | 14 |
2 files changed, 56 insertions, 8 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bf17f15..cbe5215 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6260,11 +6260,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize; + unsigned ldrOpc, strOpc, UnitSize = 0; const TargetRegisterClass *TRC = isThumb2 ? (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC_Vec = 0; if (Align & 1) { ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; @@ -6275,10 +6276,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; - UnitSize = 4; + // Check whether we can use NEON instructions. + if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if ((Align % 16 == 0) && SizeVal >= 16) { + ldrOpc = ARM::VLD1q32wb_fixed; + strOpc = ARM::VST1q32wb_fixed; + UnitSize = 16; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; + } + else if ((Align % 8 == 0) && SizeVal >= 8) { + ldrOpc = ARM::VLD1d32wb_fixed; + strOpc = ARM::VST1d32wb_fixed; + UnitSize = 8; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; + } + } + // Can't use NEON instructions. + if (UnitSize == 0) { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } } + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6289,10 +6310,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(destIn).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc), scratch) .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); @@ -6434,8 +6462,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(TRC); - if (isThumb2) { + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(destPhi).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll index 0c2f739..99ba475 100644 --- a/test/CodeGen/ARM/struct_byval.ll +++ b/test/CodeGen/ARM/struct_byval.ll @@ -28,5 +28,19 @@ entry: ret i32 0 } +; Generate a loop using NEON instructions +define i32 @h() nounwind ssp { +entry: +; CHECK: h: +; CHECK: vld1 +; CHECK: sub +; CHECK: vst1 +; CHECK: bne + %st = alloca %struct.LargeStruct, align 16 + %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st) + ret i32 0 +} + declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind +declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind |