diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-09-03 18:16:02 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-09-03 18:16:02 +0000 |
commit | 856eafb9481b280ea46b940c4e2d3149375f4026 (patch) | |
tree | 04f670fdaa06a53a6852de38f553c14f0f3ff356 | |
parent | 62a71a4118119e042b2f64786d197939c7cbab7e (diff) | |
download | external_llvm-856eafb9481b280ea46b940c4e2d3149375f4026.zip external_llvm-856eafb9481b280ea46b940c4e2d3149375f4026.tar.gz external_llvm-856eafb9481b280ea46b940c4e2d3149375f4026.tar.bz2 |
Finish converting the rest of the NEON VLD instructions to use pseudo-
instructions prior to regalloc. Since it's getting a little close to
the 2.8 branch deadline, I'll have to leave the rest of the instructions
handled by the NEONPreAllocPass for now, but I didn't want to leave half
of the VLD instructions converted and the other half not.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112983 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 74 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 145 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 40 | ||||
-rw-r--r-- | lib/Target/ARM/NEONPreAllocPass.cpp | 44 |
4 files changed, 149 insertions, 154 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 6f634b3..fc2e3c3 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -105,16 +105,17 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, D2 = TRI->getSubReg(DstReg, ARM::dsub_5); D3 = TRI->getSubReg(DstReg, ARM::dsub_7); } - MIB.addReg(D0).addReg(D1); + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2) - MIB.addReg(D2); + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 3) - MIB.addReg(D3); + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); if (hasWriteBack) { bool WBIsDead = MI.getOperand(OpIdx).isDead(); unsigned WBReg = MI.getOperand(OpIdx++).getReg(); - MIB.addReg(WBReg, getDefRegState(true) | getDeadRegState(WBIsDead)); + MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead)); } // Copy the addrmode6 operands. bool AddrIsKill = MI.getOperand(OpIdx).isKill(); @@ -128,9 +129,12 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, MIB = AddDefaultPred(MIB); TransferImpOps(MI, MIB, MIB); - // Add an implicit def for the super-reg. - MIB.addReg(DstReg, (getDefRegState(true) | getDeadRegState(DstIsDead) | - getImplRegState(true))); + // For an instruction writing the odd subregs, add an implicit use of the + // super-register because the even subregs were loaded separately. + if (RegSpc == OddDblSpc) + MIB.addReg(DstReg, RegState::Implicit); + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); MI.eraseFromParent(); } @@ -147,7 +151,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, if (hasWriteBack) { bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); - MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)); + MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); } // Copy the addrmode6 operands. bool AddrIsKill = MI.getOperand(OpIdx).isKill(); @@ -336,15 +340,63 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VLD2q32Pseudo_UPD: ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; + case ARM::VLD3d8Pseudo: + ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo: + ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo: + ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; case ARM::VLD1d64TPseudo: ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; + case ARM::VLD3d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; case ARM::VLD1d64TPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64T, true, SingleSpc, 3); break; - + ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VLD3q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VLD4d8Pseudo: + ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo: + ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo: + ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; case ARM::VLD1d64QPseudo: ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; + case ARM::VLD4d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; case ARM::VLD1d64QPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64Q, true, SingleSpc, 4); break; + ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VLD4q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; case ARM::VST1q8Pseudo: ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index e2517dd..51a30c1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1111,120 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } + EVT ResTy; + if (NumVecs == 1) + ResTy = VT; + else { + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); + } + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue SuperReg; if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - SDNode *VLd; - if (NumVecs <= 2) { - EVT ResTy; - if (NumVecs == 1) - ResTy = VT; - else - ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs); - VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); - } else { - std::vector<EVT> ResTys(NumVecs, VT); - ResTys.push_back(MVT::Other); - VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - } + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); if (NumVecs == 1) return VLd; - SDValue SuperReg; - if (NumVecs <= 2) - SuperReg = SDValue(VLd, 0); - else { - SDValue V0 = SDValue(VLd, 0); - SDValue V1 = SDValue(VLd, 1); - // Form a REG_SEQUENCE to force register allocation. - SDValue V2 = SDValue(VLd, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLd, 3); - SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - + SuperReg = SDValue(VLd, 0); assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, dl, VT, SuperReg); ReplaceUses(SDValue(N, Vec), D); } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs <= 2 ? 1 : NumVecs)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); return NULL; } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { // Quad registers are directly supported for VLD1 and VLD2, // loading pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - - EVT ResTy; - if (NumVecs == 1) - ResTy = VT; - else - ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 2 * NumVecs); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); - - // Combine the even and odd subregs to produce the result. if (NumVecs == 1) return VLd; - SDValue QQ = SDValue(VLd, 0); + SuperReg = SDValue(VLd, 0); Chain = SDValue(VLd, 1); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); } else { // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - - std::vector<EVT> ResTys(NumVecs, RegVT); - ResTys.push_back(MemAddr.getValueType()); - ResTys.push_back(MVT::Other); + EVT AddrTy = MemAddr.getValueType(); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); - Chain = SDValue(VLdA, NumVecs+1); + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); + const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; + SDNode *VLdA = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7); + Chain = SDValue(VLdA, 2); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), - Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); - Chain = SDValue(VLdB, NumVecs+1); - - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } + const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0), + Pred, Reg0, Chain }; + SDNode *VLdB = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7); + SuperReg = SDValue(VLdB, 0); + Chain = SDValue(VLdB, 2); + } + + // Extract out the Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, SuperReg); + ReplaceUses(SDValue(N, Vec), Q); } ReplaceUses(SDValue(N, NumVecs), Chain); return NULL; @@ -2166,26 +2125,26 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_neon_vld3: { - unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, - ARM::VLD3d32, ARM::VLD1d64T }; - unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, - ARM::VLD3q16_UPD, - ARM::VLD3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, - ARM::VLD3q16odd_UPD, - ARM::VLD3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo, + ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { - unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, - ARM::VLD4d32, ARM::VLD1d64Q }; - unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, - ARM::VLD4q16_UPD, - ARM::VLD4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, - ARM::VLD4q16odd_UPD, - ARM::VLD4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo, + ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8c8d1d7..4f7db71 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -181,6 +181,10 @@ class VLDQQWBPseudo : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), IIC_VST, "$addr.addr = $wb">; +class VLDQQQQWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb, $src = $dst">; // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> @@ -356,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8Pseudo : VLDQQPseudo; +def VLD3d16Pseudo : VLDQQPseudo; +def VLD3d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -368,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; @@ -376,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -392,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8Pseudo : VLDQQPseudo; +def VLD4d16Pseudo : VLDQQPseudo; +def VLD4d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -404,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; @@ -412,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 0466794..3407ac6 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -74,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD3d8: - case ARM::VLD3d16: - case ARM::VLD3d32: - case ARM::VLD1d64T: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: @@ -85,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8_UPD: - case ARM::VLD3q16_UPD: - case ARM::VLD3q32_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD3q8odd_UPD: - case ARM::VLD3q16odd_UPD: - case ARM::VLD3q32odd_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD3LNq16: case ARM::VLD3LNq32: FirstOpnd = 0; @@ -119,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD4d8: - case ARM::VLD4d16: - case ARM::VLD4d32: - case ARM::VLD1d64Q: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: @@ -130,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8_UPD: - case ARM::VLD4q16_UPD: - case ARM::VLD4q32_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD4q8odd_UPD: - case ARM::VLD4q16odd_UPD: - case ARM::VLD4q32odd_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD4LNq16: case ARM::VLD4LNq32: FirstOpnd = 0; |