summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2010-08-28 05:12:57 +0000
committerBob Wilson <bob.wilson@apple.com>2010-08-28 05:12:57 +0000
commit5f5dbccb8f5dc31e95d3705b4aab25ca20ade911 (patch)
treee06959b9291265027331a52d6574473fed411268
parent32497e1a4b44bbad671605bf3d382c6dc4b92c23 (diff)
downloadexternal_llvm-5f5dbccb8f5dc31e95d3705b4aab25ca20ade911.zip
external_llvm-5f5dbccb8f5dc31e95d3705b4aab25ca20ade911.tar.gz
external_llvm-5f5dbccb8f5dc31e95d3705b4aab25ca20ade911.tar.bz2
Use pseudo instructions for VST1 and VST2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112357 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp47
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp130
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td32
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp14
4 files changed, 110 insertions, 113 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9b0a91b..f39e00e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -119,8 +119,9 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
}
MIB.addReg(D0, getKillRegState(SrcIsKill))
- .addReg(D1, getKillRegState(SrcIsKill))
- .addReg(D2, getKillRegState(SrcIsKill));
+ .addReg(D1, getKillRegState(SrcIsKill));
+ if (NumRegs > 2)
+ MIB.addReg(D2, getKillRegState(SrcIsKill));
if (NumRegs > 3)
MIB.addReg(D3, getKillRegState(SrcIsKill));
MIB = AddDefaultPred(MIB);
@@ -224,6 +225,48 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MI.eraseFromParent();
}
+ case ARM::VST1q8Pseudo:
+ ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo:
+ ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo:
+ ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo:
+ ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
+ case ARM::VST1q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
+
+ case ARM::VST2d8Pseudo:
+ ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo:
+ ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo:
+ ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo:
+ ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo:
+ ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo:
+ ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
+ case ARM::VST2d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
+
case ARM::VST3d8Pseudo:
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
case ARM::VST3d16Pseudo:
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c25966a..4f4f4d0 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1256,16 +1256,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
- // FIXME: This is a temporary flag to distinguish VSTs that have been
- // converted to pseudo instructions.
- bool usePseudoInstrs = (NumVecs >= 3);
-
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1282,124 +1280,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- if (usePseudoInstrs)
- Ops.push_back(RegSeq);
- else {
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- }
- } else {
- Ops.push_back(N->getOperand(3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- usePseudoInstrs ? 6 : NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- if (usePseudoInstrs)
- Ops.push_back(RegSeq);
- else
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(),
- usePseudoInstrs ? 7 : NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- if (usePseudoInstrs)
- Ops[6] = Chain;
- else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
- }
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(),
- usePseudoInstrs ? 7 : NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -2267,15 +2202,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index bfd98a8..88d606c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -490,6 +490,12 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
+class VSTQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
+class VSTQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
class VSTQQPseudo
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
class VSTQQWBPseudo
@@ -520,6 +526,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo;
+def VST1q16Pseudo : VSTQPseudo;
+def VST1q32Pseudo : VSTQPseudo;
+def VST1q64Pseudo : VSTQPseudo;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -540,6 +551,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo;
+def VST1q16Pseudo_UPD : VSTQWBPseudo;
+def VST1q32Pseudo_UPD : VSTQWBPseudo;
+def VST1q64Pseudo_UPD : VSTQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@@ -610,6 +626,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo;
+def VST2d16Pseudo : VSTQPseudo;
+def VST2d32Pseudo : VSTQPseudo;
+
+def VST2q8Pseudo : VSTQQPseudo;
+def VST2q16Pseudo : VSTQQPseudo;
+def VST2q32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -631,6 +655,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo;
+def VST2d16Pseudo_UPD : VSTQWBPseudo;
+def VST2d32Pseudo_UPD : VSTQWBPseudo;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 254ac23..5142ebd 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -178,13 +178,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -192,13 +185,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;