diff options
author | Nate Begeman <natebegeman@mac.com> | 2009-04-24 03:42:54 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2009-04-24 03:42:54 +0000 |
commit | b706d29f9c5ed3ed9acc82f7ab46205ba56b92dc (patch) | |
tree | 105e75ce0dc135a208ef085ba4f70fe162031ff1 /lib/Target | |
parent | 98d07102d67971118c73e7db84d8a05d58dcf3df (diff) | |
download | external_llvm-b706d29f9c5ed3ed9acc82f7ab46205ba56b92dc.zip external_llvm-b706d29f9c5ed3ed9acc82f7ab46205ba56b92dc.tar.gz external_llvm-b706d29f9c5ed3ed9acc82f7ab46205ba56b92dc.tar.bz2 |
PR2957
ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle
mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes
as the shuffle mask. A value of -1 represents UNDEF.
In addition to eliminating the creation of illegal BUILD_VECTORS just to
represent shuffle masks, we are better about canonicalizing the shuffle mask,
resulting in substantially better code for some classes of shuffles.
A clean up of x86 shuffle code, and some canonicalizing in DAGCombiner is next.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69952 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 61 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 214 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 12 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrAltivec.td | 179 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 1651 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 52 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 114 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 544 |
9 files changed, 1195 insertions, 1633 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index c07e6d5..87de2c7 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1672,7 +1672,7 @@ SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); + const int *PermMask = cast<ShuffleVectorSDNode>(Op)->getMask(); DebugLoc dl = Op.getDebugLoc(); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; @@ -1703,39 +1703,40 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { - if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { - unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0; i != MaxElts; ++i) { + if (PermMask[i] < 0) + continue; + + unsigned SrcElt = PermMask[i]; - if (monotonic) { - if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } - } else if (CurrElt != SrcElt) { - monotonic = false; + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; } - - ++CurrElt; + } else if (CurrElt != SrcElt) { + monotonic = false; } - if (rotate) { - if (PrevElt > 0 && SrcElt < MaxElts) { - if ((PrevElt == SrcElt - 1) - || (PrevElt == MaxElts - 1 && SrcElt == 0)) { - PrevElt = SrcElt; - if (SrcElt == 0) - V0Elt = i; - } else { - rotate = false; - } - } else if (PrevElt == 0) { - // First time through, need to keep track of previous element + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; } else { - // This isn't a rotation, takes elements from vector 2 rotate = false; } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; } } } @@ -1768,12 +1769,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector<SDValue, 16> ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j < BytesPerElement; ++j) { ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 32ff8f4..cb36b05 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -456,22 +456,22 @@ static bool isFloatingPointZero(SDValue Op) { /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. -static bool isConstantOrUndef(SDValue Op, unsigned Val) { - return Op.getOpcode() == ISD::UNDEF || - cast<ConstantSDNode>(Op)->getZExtValue() == Val; +static bool isConstantOrUndef(int Op, int Val) { + return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + const int *Mask = N->getMask(); if (!isUnary) { for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1)) + if (!isConstantOrUndef(Mask[i], i*2+1)) return false; } else { for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1) || - !isConstantOrUndef(N->getOperand(i+8), i*2+1)) + if (!isConstantOrUndef(Mask[i], i*2+1) || + !isConstantOrUndef(Mask[i+8], i*2+1)) return false; } return true; @@ -479,18 +479,19 @@ bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + const int *Mask = N->getMask(); if (!isUnary) { for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3)) + if (!isConstantOrUndef(Mask[i ], i*2+2) || + !isConstantOrUndef(Mask[i+1], i*2+3)) return false; } else { for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3) || - !isConstantOrUndef(N->getOperand(i+8), i*2+2) || - !isConstantOrUndef(N->getOperand(i+9), i*2+3)) + if (!isConstantOrUndef(Mask[i ], i*2+2) || + !isConstantOrUndef(Mask[i+1], i*2+3) || + !isConstantOrUndef(Mask[i+8], i*2+2) || + !isConstantOrUndef(Mask[i+9], i*2+3)) return false; } return true; @@ -498,27 +499,29 @@ bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { /// isVMerge - Common function, used to match vmrg* shuffles. /// -static bool isVMerge(SDNode *N, unsigned UnitSize, +static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); + const int *Mask = N->getMask(); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit - if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), + if (!isConstantOrUndef(Mask[i*UnitSize*2+j], LHSStart+j+i*UnitSize) || - !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), + !isConstantOrUndef(Mask[i*UnitSize*2+UnitSize+j], RHSStart+j+i*UnitSize)) return false; } - return true; + return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 8, 24); return isVMerge(N, UnitSize, 8, 8); @@ -526,7 +529,8 @@ bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 0, 16); return isVMerge(N, UnitSize, 0, 0); @@ -536,91 +540,92 @@ bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + // Find the first non-undef value in the shuffle mask. + const int *Mask = SVOp->getMask(); unsigned i; - for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) + for (i = 0; i != 16 && Mask[i] < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. - // Otherwise, check to see if the rest of the elements are consequtively + // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. - unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue(); + unsigned ShiftAmt = Mask[i]; if (ShiftAmt < i) return -1; ShiftAmt -= i; if (!isUnary) { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) + if (!isConstantOrUndef(Mask[i], ShiftAmt+i)) return -1; } else { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) + if (!isConstantOrUndef(Mask[i], (ShiftAmt+i) & 15)) return -1; } - return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. -bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && +bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. - unsigned ElementBase = 0; - SDValue Elt = N->getOperand(0); - if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) - ElementBase = EltV->getZExtValue(); - else - return false; // FIXME: Handle UNDEF elements too! - - if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16) + const int *Mask = N->getMask(); + unsigned ElementBase = Mask[0]; + + // FIXME: Handle UNDEF elements too! + if (ElementBase >= 16) return false; - // Check that they are consequtive. - for (unsigned i = 1; i != EltSize; ++i) { - if (!isa<ConstantSDNode>(N->getOperand(i)) || - cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase) + // Check that the indices are consecutive, in the case of a multi-byte element + // splatted with a v16i8 mask. + for (unsigned i = 1; i != EltSize; ++i) + if (Mask[i] < 0 || Mask[i] != (int)(i+ElementBase)) return false; - } - assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(N->getOperand(i)) && - "Invalid VECTOR_SHUFFLE mask!"); + if (Mask[i] < 0) continue; for (unsigned j = 0; j != EltSize; ++j) - if (N->getOperand(i+j) != N->getOperand(j)) + if (Mask[i+j] != Mask[j]) return false; } - return true; } /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool PPC::isAllNegativeZeroVector(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - if (PPC::isSplatShuffleMask(N, N->getNumOperands())) - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) + BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); + + APInt APVal, APUndef; + unsigned BitSize; + bool HasAnyUndefs; + + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); + return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { - assert(isSplatShuffleMask(N, EltSize)); - return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize; + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + assert(isSplatShuffleMask(SVOp, EltSize)); + return SVOp->getMask()[0] / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -3149,11 +3154,10 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(i+Amt, MVT::i8); - SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16)); + Ops[i] = i + Amt; + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } @@ -3354,7 +3358,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); - unsigned ShufIdxs[16]; + int ShufIdxs[16]; switch (OpNum) { default: assert(0 && "Unknown i32 permute!"); case OP_VMRGHW: @@ -3392,13 +3396,11 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } - SDValue Ops[16]; - for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); - - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(), - OpLHS, OpRHS, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); + MVT VT = OpLHS.getValueType(); + OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this @@ -3406,28 +3408,30 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + const int *PermMask = SVOp->getMask(); + MVT VT = Op.getValueType(); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.getOpcode() == ISD::UNDEF) { - if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || - PPC::isSplatShuffleMask(PermMask.getNode(), 2) || - PPC::isSplatShuffleMask(PermMask.getNode(), 4) || - PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { + if (PPC::isSplatShuffleMask(SVOp, 1) || + PPC::isSplatShuffleMask(SVOp, 2) || + PPC::isSplatShuffleMask(SVOp, 4) || + PPC::isVPKUWUMShuffleMask(SVOp, true) || + PPC::isVPKUHUMShuffleMask(SVOp, true) || + PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true) || + PPC::isVMRGLShuffleMask(SVOp, 2, true) || + PPC::isVMRGLShuffleMask(SVOp, 4, true) || + PPC::isVMRGHShuffleMask(SVOp, 1, true) || + PPC::isVMRGHShuffleMask(SVOp, 2, true) || + PPC::isVMRGHShuffleMask(SVOp, 4, true)) { return Op; } } @@ -3435,15 +3439,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) + if (PPC::isVPKUWUMShuffleMask(SVOp, false) || + PPC::isVPKUHUMShuffleMask(SVOp, false) || + PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false) || + PPC::isVMRGLShuffleMask(SVOp, 2, false) || + PPC::isVMRGLShuffleMask(SVOp, 4, false) || + PPC::isVMRGHShuffleMask(SVOp, 1, false) || + PPC::isVMRGHShuffleMask(SVOp, 2, false) || + PPC::isVMRGHShuffleMask(SVOp, 4, false)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -3453,11 +3457,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. - if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) + if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. - unsigned ByteSource = - cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue(); + unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; @@ -3509,12 +3512,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector<SDValue, 16> ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, @@ -3704,13 +3703,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); // Merge the results together. - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); - Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { assert(0 && "Unknown mul to lower!"); abort(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 01111cf..7946474 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -175,19 +175,21 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary); + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary); + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); + bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); + bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. @@ -196,7 +198,7 @@ namespace llvm { /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. - bool isSplatShuffleMask(SDNode *N, unsigned EltSize); + bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index c90fbc9..9a5be79 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -15,96 +15,118 @@ // Altivec transformation functions and pattern fragments. // -/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid -/// shuffle mask for the VPKUHUM or VPKUWUM instructions. -def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUHUMShuffleMask(N, false); + +def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); }]>; -def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUWUMShuffleMask(N, false); +def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); }]>; - -def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUHUMShuffleMask(N, true); +def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); }]>; -def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUWUMShuffleMask(N, true); +def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); }]>; -def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 1, false); +def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); }]>; -def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 2, false); +def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); }]>; -def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 4, false); +def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); }]>; -def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 1, false); +def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); }]>; -def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 2, false); +def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); }]>; -def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 4, false); +def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); }]>; -def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 1, true); + +def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); }]>; -def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 2, true); +def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); }]>; -def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 4, true); +def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); }]>; -def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 1, true); +def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); }]>; -def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 2, true); +def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); }]>; -def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 4, true); +def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); }]>; -def VSLDOI_get_imm : SDNodeXForm<build_vector, [{ + +def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false)); }]>; -def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{ +def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVSLDOIShuffleMask(N, false) != -1; }], VSLDOI_get_imm>; + /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. -def VSLDOI_unary_get_imm : SDNodeXForm<build_vector, [{ +def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true)); }]>; -def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{ +def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVSLDOIShuffleMask(N, true) != -1; }], VSLDOI_unary_get_imm>; // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. -def VSPLTB_get_imm : SDNodeXForm<build_vector, [{ +def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::getVSPLTImmediate(N, 1)); }]>; -def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 1); +def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); }], VSPLTB_get_imm>; -def VSPLTH_get_imm : SDNodeXForm<build_vector, [{ +def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::getVSPLTImmediate(N, 2)); }]>; -def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 2); +def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); }], VSPLTH_get_imm>; -def VSPLTW_get_imm : SDNodeXForm<build_vector, [{ +def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::getVSPLTImmediate(N, 4)); }]>; -def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 4); +def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4); }], VSPLTW_get_imm>; @@ -268,8 +290,7 @@ def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), "vsldoi $vD, $vA, $vB, $SH", VecFP, [(set VRRC:$vD, - (vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB, - VSLDOI_shuffle_mask:$SH))]>; + (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), @@ -345,28 +366,22 @@ def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHB_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHH_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHW_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLB_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLH_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLW_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; @@ -440,16 +455,16 @@ def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTB_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTH_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltw $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTW_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; @@ -479,13 +494,13 @@ def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VPKUHUM_shuffle_mask))]>; + [(set VRRC:$vD, + (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VPKUWUM_shuffle_mask))]>; + [(set VRRC:$vD, + (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; // Vector Unpack. @@ -603,25 +618,25 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in), - (VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in), +def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), + (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in), +def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), (VPKUHUM VRRC:$vA, VRRC:$vA)>; // Match vmrg*(x,x) -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in), +def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in), +def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in), +def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in), +def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in), +def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in), +def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHW VRRC:$vA, VRRC:$vA)>; // Logical Operations diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d51435c..0236602 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,8 @@ static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -1667,9 +1668,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getUNDEF(MVT::v2i64), Arg, - getMOVLMask(2, DAG, dl)); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); break; } } @@ -2138,186 +2137,156 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value falls within the specified range (L, H]. -static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { - if (Op.getOpcode() == ISD::UNDEF) - return true; - - unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); - return (Val >= Low && Val < Hi); +/// isUndefOrInRange - Return true if Val is undef or if its value falls within +/// the specified range (L, H]. +static bool isUndefOrInRange(int Val, int Low, int Hi) { + return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value equal to the specified value. -static bool isUndefOrEqual(SDValue Op, unsigned Val) { - if (Op.getOpcode() == ISD::UNDEF) +/// isUndefOrEqual - Val is either less than zero (undef) or equal to the +/// specified value. +static bool isUndefOrEqual(int Val, int CmpVal) { + if (Val < 0 || Val == CmpVal) return true; - return cast<ConstantSDNode>(Op)->getZExtValue() == Val; + return false; } -/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFD. -bool X86::isPSHUFDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2 && N->getNumOperands() != 4) - return false; - - // Check if the value doesn't reference the second vector. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) - return false; - } - - return true; +/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference +/// the second operand. +static bool isPSHUFDMask(const int *Mask, MVT VT) { + if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return (Mask[0] < 2 && Mask[1] < 2); + return false; } -/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFHW. -bool X86::isPSHUFHWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { + return ::isPSHUFDMask(N->getMask(), N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFHW. +static bool isPSHUFHWMask(const int *Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - - // Lower quadword copied in order. - for (unsigned i = 0; i != 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) + + // Lower quadword copied in order or undef. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - } - + // Upper quadword shuffled. - for (unsigned i = 4; i != 8; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < 4 || Val > 7) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - } - + return true; } -/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFLW. -bool X86::isPSHUFLWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { + return ::isPSHUFHWMask(N->getMask(), N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFLW. +static bool isPSHUFLWMask(const int *Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. - for (unsigned i = 4; i != 8; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. - for (unsigned i = 0; i != 4; ++i) - if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 4) return false; - + return true; } +bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { + return ::isPSHUFLWMask(N->getMask(), N->getValueType(0)); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -template<class SDOperand> -static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { - if (NumElems != 2 && NumElems != 4) return false; - - unsigned Half = NumElems / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Elems[i], 0, NumElems)) +static bool isSHUFPMask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; - for (unsigned i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); +bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { + return ::isSHUFPMask(N->getMask(), N->getValueType(0)); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -template<class SDOperand> -static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { - if (NumOps != 2 && NumOps != 4) return false; - - unsigned Half = NumOps / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) +static bool isCommutedSHUFPMask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - for (unsigned i = Half; i < NumOps; ++i) - if (!isUndefOrInRange(Ops[i], 0, NumOps)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; return true; } -static bool isCommutedSHUFP(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); +static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { + return isCommutedSHUFPMask(N->getMask(), N->getValueType(0)); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 6) && - isUndefOrEqual(N->getOperand(1), 7) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); -} - -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 2) && - isUndefOrEqual(N->getOperand(1), 3) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); + const int *Mask = N->getMask(); + return isUndefOrEqual(Mask[0], 6) && + isUndefOrEqual(Mask[1], 7) && + isUndefOrEqual(Mask[2], 2) && + isUndefOrEqual(Mask[3], 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; + const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) + if (!isUndefOrEqual(Mask[i], i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(Mask[i], i)) return false; return true; @@ -2326,37 +2295,49 @@ bool X86::isMOVLPMask(SDNode *N) { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; + const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) { - SDValue Arg = N->getOperand(i + NumElems/2); - if (!isUndefOrEqual(Arg, i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) return false; - } return true; } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 + const int *Mask = N->getMask(); + return isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3) && + isUndefOrEqual(Mask[2], 2) && isUndefOrEqual(Mask[3], 3); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -template<class SDOperand> -bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, - bool V2IsSplat = false) { +static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2367,26 +2348,23 @@ bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + return ::isUNPCKLMask(N->getMask(), N->getValueType(0), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -template<class SDOperand> -bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, - bool V2IsSplat = false) { +static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2397,270 +2375,166 @@ bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, return false; } } - return true; } -bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + return ::isUNPCKHMask(N->getMask(), N->getValueType(0), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKL_v_undef_Mask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i+1); - + + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { + return ::isUNPCKL_v_undef_Mask(N->getMask(), N->getValueType(0)); +} + /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKH_v_undef_Mask(const int *Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i + 1); - + + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { + return ::isUNPCKH_v_undef_Mask(N->getMask(), N->getValueType(0)); +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -template<class SDOperand> -static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { +static bool isMOVLMask(const int *Mask, MVT VT) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4) return false; - - if (!isUndefOrEqual(Elts[0], NumElts)) + + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - - for (unsigned i = 1; i < NumElts; ++i) { - if (!isUndefOrEqual(Elts[i], i)) + + for (int i = 1; i < NumElts; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - } - + return true; } -bool X86::isMOVLMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isMOVLMask(N->op_begin(), N->getNumOperands()); +bool X86::isMOVLMask(ShuffleVectorSDNode *N) { + return ::isMOVLMask(N->getMask(), N->getValueType(0)); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -template<class SDOperand> -static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, - bool V2IsSplat = false, - bool V2IsUndef = false) { +static bool isCommutedMOVLMask(const int *Mask, MVT VT, bool V2IsSplat = false, + bool V2IsUndef = false) { + int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Ops[0], 0)) + + if (!isUndefOrEqual(Mask[0], 0)) return false; - - for (unsigned i = 1; i < NumOps; ++i) { - SDValue Arg = Ops[i]; - if (!(isUndefOrEqual(Arg, i+NumOps) || - (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) + + for (int i = 1; i < NumOps; ++i) + if (!(isUndefOrEqual(Mask[i], i+NumOps) || + (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - } - + return true; } -static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedMOVL(N->op_begin(), N->getNumOperands(), - V2IsSplat, V2IsUndef); + return isCommutedMOVLMask(N->getMask(), N->getValueType(0), V2IsSplat, + V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 1, 1, 3, 3 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 1) return false; - } + const int *Mask = N->getMask(); + for (unsigned i = 0; i < 2; ++i) + if (Mask[i] >=0 && Mask[i] != 1) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 3) return false; - HasHi = true; + if (Mask[i] >= 0 && Mask[i] != 3) + return false; + if (Mask[i] == 3) + HasHi = true; } - // Don't use movshdup if it can be done with a shufps. + // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 0) return false; - } + const int *Mask = N->getMask(); + for (unsigned i = 0; i < 2; ++i) + if (Mask[i] > 0) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val != 2) return false; - HasHi = true; - } - - // Don't use movshdup if it can be done with a shufps. - return HasHi; -} - -/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a identity operation on the LHS or RHS. -static bool isIdentityMask(SDNode *N, bool RHS = false) { - unsigned NumElems = N->getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + if (Mask[i] >= 0 && Mask[i] != 2) return false; - return true; -} - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element. -static bool isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // This is a splat operation if each element of the permute is the same, and - // if the value doesn't reference the second vector. - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa<ConstantSDNode>(Elt)) { - ElementBase = Elt; - break; - } + if (Mask[i] == 2) + HasHi = true; } - - if (!ElementBase.getNode()) - return false; - - for (; i != NumElems; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (Arg != ElementBase) return false; - } - - // Make sure it is a splat of the first vector operand. - return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; -} - -/// getSplatMaskEltNo - Given a splat mask, return the index to the element -/// we want to splat. -static SDValue getSplatMaskEltNo(SDNode *N) { - assert(isSplatMask(N) && "Not a splat mask"); - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa<ConstantSDNode>(Elt)) - return Elt; - } - assert(0 && " No splat value found!"); - return SDValue(); -} - - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element and it's a 2 or 4 element mask. -bool X86::isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // We can only splat 64-bit, and 32-bit quantities with a single instruction. - if (N->getNumOperands() != 4 && N->getNumOperands() != 2) - return false; - return ::isSplatMask(N); -} - -/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a splat of zero element. -bool X86::isSplatLoMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), 0)) - return false; - return true; + // Don't use movsldup if it can be done with a shufps. + return HasHi; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned e = N->getNumOperands() / 2; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) +bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { + int e = N->getValueType(0).getVectorNumElements() / 2; + const int *Mask = N->getMask(); + + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(e+i), i)) + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(Mask[e+i], i)) return false; return true; } @@ -2669,20 +2543,20 @@ bool X86::isMOVDDUPMask(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - unsigned NumOperands = N->getNumOperands(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + int NumOperands = SVOp->getValueType(0).getVectorNumElements(); + const int *MaskP = SVOp->getMask(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (unsigned i = 0; i < NumOperands; ++i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(NumOperands-i-1); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + for (int i = 0; i < NumOperands; ++i) { + int Val = MaskP[NumOperands-i-1]; + if (Val < 0) Val = 0; if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } - return Mask; } @@ -2690,19 +2564,16 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + int Val = MaskP[i]; + if (Val >= 0) Mask |= (Val - 4); - } if (i != 4) Mask <<= 2; } - return Mask; } @@ -2710,90 +2581,71 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - Mask |= Val; + int Val = MaskP[i]; + if (Val >= 0) + Mask |= Val; if (i != 0) Mask <<= 2; } - return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as -/// values in ther permute mask. -static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, - SDValue &V2, SDValue &Mask, - SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector<SDValue, 8> MaskVec; - DebugLoc dl = Op.getDebugLoc(); - - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); - continue; - } - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in +/// their permute mask. +static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + const int *Mask = SVOp->getMask(); + SmallVector<int, 8> MaskVec; + + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + MaskVec.push_back(idx); + else if (idx < NumElems) + MaskVec.push_back(idx + NumElems); else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + MaskVec.push_back(idx - NumElems); } - - std::swap(V1, V2); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), + SVOp->getOperand(0), &MaskVec[0]); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static -SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector<SDValue, 8> MaskVec; - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) continue; - } - assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else if (idx < NumElems) + Mask[i] = idx + NumElems; else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + Mask[i] = idx - NumElems; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(SDNode *Mask) { - unsigned NumElems = Mask->getNumOperands(); +static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { + int NumElems = Op->getValueType(0).getVectorNumElements(); + const int *Mask = Op->getMask(); + if (NumElems != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+2)) + if (!isUndefOrEqual(Mask[i], i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+4)) + if (!isUndefOrEqual(Mask[i], i+4)) return false; return true; } @@ -2817,7 +2669,8 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, + ShuffleVectorSDNode *Op) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2825,14 +2678,16 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Mask->getNumOperands(); + int NumElems = Op->getValueType(0).getVectorNumElements(); + const int *Mask = Op->getMask(); + if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i)) + for (int i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) + for (int i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask[i], i+NumElems)) return false; return true; } @@ -2850,29 +2705,6 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an undef. -static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - - SDValue V1 = N->getOperand(0); - SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) - return false; - else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) - return false; - } - } - return true; -} - /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2883,34 +2715,26 @@ static inline bool isZeroNode(SDValue Elt) { } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -static bool isZeroShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - +/// to an zero vector. +/// FIXME: move to dag combiner? +static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) - continue; - - unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Idx < NumElems) { - unsigned Opc = V1.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + const int *Mask = N->getMask(); + int NumElems = N->getValueType(0).getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int Idx = Mask[i]; + if (Idx >= NumElems) { + unsigned Opc = V2.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V1.getNode()->getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) return false; - } else if (Idx >= NumElems) { - unsigned Opc = V2.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + } else if (Idx >= 0) { + unsigned Opc = V1.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2958,127 +2782,94 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { - assert(Mask.getOpcode() == ISD::BUILD_VECTOR); - +static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + const int *Mask = SVOp->getMask(); + bool Changed = false; - SmallVector<SDValue, 8> MaskVec; - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); - if (Val > NumElems) { - Arg = DAG.getConstant(NumElems, Arg.getValueType()); - Changed = true; - } + SmallVector<int, 8> MaskVec; + + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx > NumElems) { + idx = NumElems; + Changed = true; } - MaskVec.push_back(Arg); + MaskVec.push_back(idx); } - if (Changed) - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), - Mask.getValueType(), - &MaskVec[0], MaskVec.size()); - return Mask; + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), + SVOp->getOperand(1), &MaskVec[0]); + return SDValue(SVOp, 0); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - - SmallVector<SDValue, 8> MaskVec; - MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> Mask; + Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation -/// of specified width. -static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + +/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> Mask; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + Mask.push_back(i); + Mask.push_back(i + NumElems); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation -/// of specified width. -static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); +/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> Mask; for (unsigned i = 0; i != Half; ++i) { - MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); - } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps -/// element #0 of a vector with the specified index, leaving the rest of the -/// elements in place. -static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, - SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; - // Element #0 of the result gets the elt we are replacing. - MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); - for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + Mask.push_back(i + Half); + Mask.push_back(i + NumElems + Half); + } + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { - MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; - MVT VT = Op.getValueType(); - if (PVT == VT) - return Op; - SDValue V1 = Op.getOperand(0); - SDValue Mask = Op.getOperand(2); - unsigned MaskNumElems = Mask.getNumOperands(); - unsigned NumElems = MaskNumElems; - DebugLoc dl = Op.getDebugLoc(); - // Special handling of v4f32 -> v4i32. - if (VT != MVT::v4f32) { - // Find which element we want to splat. - SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); - unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue(); - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - Mask = getUnpacklMask(MaskNumElems, DAG, dl); - } else { - Mask = getUnpackhMask(MaskNumElems, DAG, dl); - EltNo -= NumElems/2; - } - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); - NumElems >>= 1; +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE2) { + if (SV->getValueType(0).getVectorNumElements() <= 4) + return SDValue(SV, 0); + + MVT PVT = MVT::v4f32; + MVT VT = SV->getValueType(0); + DebugLoc dl = SV->getDebugLoc(); + SDValue V1 = SV->getOperand(0); + int NumElems = VT.getVectorNumElements(); + int EltNo = SV->getSplatIndex(); + + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + V1 = getUnpackl(DAG, dl, VT, V1, V1); + } else { + V1 = getUnpackh(DAG, dl, VT, V1, V1); + EltNo -= NumElems/2; } - SDValue Cst = DAG.getConstant(EltNo, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + NumElems >>= 1; } - + + // Perform the splat. + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// isVectorLoad - Returns true if the node is a vector load, a scalar @@ -3095,32 +2886,28 @@ static bool isVectorLoad(SDValue Op) { /// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. /// -static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, - SelectionDAG &DAG, bool HasSSE3) { +static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE3) { // If we have sse3 and shuffle has more than one use or input is a load, then // use movddup. Otherwise, use movlhps. - bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); + SDValue V1 = SV->getOperand(0); + + bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1)); MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = Op.getValueType(); + MVT VT = SV->getValueType(0); if (VT == PVT) - return Op; - DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PVT.getVectorNumElements(); - if (NumElems == 2) { - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + return SDValue(SV, 0); + + DebugLoc dl = SV->getDebugLoc(); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + if (PVT.getVectorNumElements() == 2) { + int Mask[2] = { 0, 0 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } else { - assert(NumElems == 4); - SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); - SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst0, Cst1, Cst0, Cst1); + int Mask[4] = { 0, 1, 0, 1 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } - - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3130,39 +2917,31 @@ static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); - unsigned NumElems = V2.getValueType().getVectorNumElements(); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 16> MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) - if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(DAG.getConstant(NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(i == Idx ? NumElems : i); + return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, - unsigned NumElems, bool Low, - SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, + int NumElems, bool Low, SelectionDAG &DAG) { unsigned NumZeros = 0; - for (unsigned i = 0; i < NumElems; ++i) { + for (int i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - SDValue Idx = Mask.getOperand(Index); - if (Idx.getOpcode() == ISD::UNDEF) { + int Idx = Mask[Index]; + if (Idx < 0) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); + SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -3173,40 +2952,40 @@ unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, +/// FIXME: split into pslldqi, psrldqi, palignr variants. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = Mask.getNumOperands(); + const int *Mask = SVOp->getMask(); + int NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; - unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, false, DAG); if (!NumZeros) return false; } - bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); - if (Idx.getOpcode() == ISD::UNDEF) + for (int i = NumZeros; i < NumElems; ++i) { + int Val = isLeft ? (i - NumZeros) : i; + int Idx = Mask[isLeft ? i : (i - NumZeros)]; + if (Idx < 0) continue; - unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); - if (Index < NumElems) + if (Idx < NumElems) SeenV1 = true; else { - Index -= NumElems; + Idx -= NumElems; SeenV2 = true; } - if (Index != Val) + if (Idx != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); + ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); ShAmt = NumZeros; return true; } @@ -3291,8 +3070,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3377,11 +3156,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SDValue Ops[] = { - Item, DAG.getUNDEF(Item.getValueType()), - getSwapEltZeroMask(VecElts, Idx, DAG, dl) - }; - Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); + SmallVector<int, 4> Mask; + Mask.push_back(Idx); + for (unsigned i = 1; i != VecElts; ++i) + Mask.push_back(i); + Item = DAG.getVectorShuffle(VecVT, dl, Item, + DAG.getUNDEF(Item.getValueType()), + &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3425,15 +3206,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT MaskEVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, - DAG.getUNDEF(VT), Mask); + MaskVec.push_back(i == Idx ? 0 : 1); + return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); } } @@ -3491,54 +3267,48 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); break; case 2: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); break; case 3: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getUnpacklMask(NumElems, DAG, dl)); + V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); break; } } - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector<SDValue, 8> MaskVec; + SmallVector<int, 8> MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); + MaskVec.push_back(Reverse ? 1-i : i); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); + MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } if (Values.size() > 2) { + // If we have SSE 4.1, Expand into a number of inserts. + if (getSubtarget()->hasSSE41()) { + V[0] = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NumElems; ++i) + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + Op.getOperand(i), DAG.getIntPtrConstant(i)); + return V[0]; + } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], - UnpckMask); + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); NumElems >>= 1; } return V[0]; @@ -3553,11 +3323,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + const int *Mask = SVOp->getMask(); SmallVector<int, 8> MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3568,9 +3339,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad; - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast<ConstantSDNode>(Elt)->getZExtValue(); + int EltIdx = Mask[i]; MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3623,14 +3392,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector<SDValue,8> MaskV; - MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); - MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); - - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); + SmallVector<int, 8> MaskV; + MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); + MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3668,15 +3435,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - MaskV.clear(); - for (unsigned i = 0; i != 8; ++i) - MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) - : DAG.getConstant(MaskVals[i], - MVT::i16)); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, - &MaskV[0], 8)); + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } @@ -3733,49 +3493,45 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector<SDValue, 8> MaskV; + SmallVector<int, 8> MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); + MaskV.push_back(idx & 3); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + MaskV.push_back(i); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector<SDValue, 8> MaskV; + SmallVector<int, 8> MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); + MaskV.push_back(i); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); + MaskV.push_back((idx & 3) + 4); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3811,11 +3567,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector<SDValue, 16> MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + const int *Mask = SVOp->getMask(); SmallVector<int, 16> MaskVals; // If we have SSSE3, case 1 is generated when all result bytes come from @@ -3825,9 +3582,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast<ConstantSDNode>(Elt)->getZExtValue(); + int EltIdx = Mask[i]; MaskVals.push_back(EltIdx); if (EltIdx < 0) continue; @@ -3958,11 +3713,14 @@ SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, - MVT VT, - SDValue PermMask, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - unsigned NumElems = PermMask.getNumOperands(); +SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + MVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + const int *PermMask = SVOp->getMask(); + unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3981,38 +3739,35 @@ SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, else NewVT = MVT::v2f64; } - unsigned Scale = NumElems / NewWidth; - SmallVector<SDValue, 8> MaskVec; + int Scale = NumElems / NewWidth; + SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - unsigned StartIdx = ~0U; - for (unsigned j = 0; j < Scale; ++j) { - SDValue Elt = PermMask.getOperand(i+j); - if (Elt.getOpcode() == ISD::UNDEF) + int StartIdx = -1; + for (int j = 0; j < Scale; ++j) { + int EltIdx = PermMask[i+j]; + if (EltIdx < 0) continue; - unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); - if (StartIdx == ~0U) + if (StartIdx == -1) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == ~0U) - MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); + if (StartIdx == -1) + MaskVec.push_back(-1); else - MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); + MaskVec.push_back(StartIdx / Scale); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size())); + return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -4046,31 +3801,37 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, - SDValue PermMask, MVT VT, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = PermMask.getValueType(); - MVT MaskEVT = MaskVT.getVectorElementType(); +LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + MVT VT = SVOp->getValueType(0); + const int *PermMaskPtr = SVOp->getMask(); + SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); - SmallVector<SDValue, 8> Mask1(4, DAG.getUNDEF(MaskEVT)); + SmallVector<int, 8> Mask1(4U, -1); + SmallVector<int, 8> PermMask; + + for (unsigned i = 0; i != 8; ++i) + PermMask.push_back(PermMaskPtr[i]); + unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); } else { - unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); - assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Val < 4) { + assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Idx < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Elt; + Mask1[NumLo] = Idx; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Elt; + Mask1[2+NumHi] = Idx; NumHi++; } } @@ -4081,24 +3842,21 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask1[0], Mask1.size())); + V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - SmallVector<SDValue, 8> Mask2(4, DAG.getUNDEF(MaskEVT)); + SmallVector<int, 8> Mask2(4U, -1); + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = DAG.getConstant(Idx, MaskEVT); + Mask2[i] = Idx; } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask2[0], Mask2.size())); + return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -4109,60 +3867,51 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); + CommuteVectorShuffleMask(PermMask, VT); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - SDValue Elt = PermMask.getOperand(HiIndex); - if (Elt.getOpcode() == ISD::UNDEF) + int Val = PermMask[HiIndex]; + if (Val < 0) continue; - unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask.getOperand(HiIndex); - Mask1[1] = DAG.getUNDEF(MaskEVT); - Mask1[2] = PermMask.getOperand(HiIndex^1); - Mask1[3] = DAG.getUNDEF(MaskEVT); - V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[HiIndex]; + Mask1[1] = -1; + Mask1[2] = PermMask[HiIndex^1]; + Mask1[3] = -1; + V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); if (HiIndex >= 2) { - Mask1[0] = PermMask.getOperand(0); - Mask1[1] = PermMask.getOperand(1); - Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); - Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[0]; + Mask1[1] = PermMask[1]; + Mask1[2] = HiIndex & 1 ? 6 : 4; + Mask1[3] = HiIndex & 1 ? 4 : 6; + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); } else { - Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); - Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); - Mask1[2] = PermMask.getOperand(2); - Mask1[3] = PermMask.getOperand(3); - if (Mask1[2].getOpcode() != ISD::UNDEF) - Mask1[2] = - DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, - MaskEVT); - if (Mask1[3].getOpcode() != ISD::UNDEF) - Mask1[3] = - DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, - MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector<SDValue,8> LoMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector<SDValue,8> HiMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector<SDValue,8> *MaskPtr = &LoMask; + SmallVector<int,8> LoMask(4U, -1); + SmallVector<int,8> HiMask(4U, -1); + + SmallVector<int,8> *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -4173,84 +3922,68 @@ LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, LoIdx = 0; HiIdx = 2; } - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); - } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { + } else if (Idx < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Elt; + (*MaskPtr)[LoIdx] = Idx; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Elt; + (*MaskPtr)[HiIdx] = Idx; HiIdx++; } } - SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &LoMask[0], LoMask.size())); - SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &HiMask[0], HiMask.size())); - SmallVector<SDValue, 8> MaskOps; + SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); + SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); + SmallVector<int, 8> MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(DAG.getUNDEF(MaskEVT)); + MaskOps.push_back(-1); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); + MaskOps.push_back(Idx); } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskOps[0], MaskOps.size())); + return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PermMask.getNumOperands(); + const int *PermMask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); + unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - // FIXME: Check for legal shuffle and return? - - if (isUndefShuffle(Op.getNode())) - return DAG.getUNDEF(VT); - - if (isZeroShuffle(Op.getNode())) + if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - if (isIdentityMask(PermMask.getNode())) - return V1; - else if (isIdentityMask(PermMask.getNode(), true)) - return V2; - // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && - VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(PermMask.getNode())) - return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); + if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 && + X86::isMOVDDUPMask(SVOp)) + return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3()); - if (isSplatMask(PermMask.getNode())) { - if (isMMX || NumElems < 4) return Op; - // Promote it to a v4{if}32 splat. - return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); + // Promote splats to v4f32. + if (SVOp->isSplat()) { + if (isMMX || NumElems < 4) + return Op; + return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, - *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -4258,32 +3991,29 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) { - SDValue NewV1 = NewOp.getOperand(0); - SDValue NewV2 = NewOp.getOperand(1); - SDValue NewMask = NewOp.getOperand(2); - if (isCommutedMOVL(NewMask.getNode(), true, false)) { - NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); - return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, - dl); - } + if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - + + if (X86::isPSHUFDMask(SVOp)) + return Op; + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasSSE2() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4291,8 +4021,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(PermMask.getNode())) { + + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4300,17 +4030,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - - if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || - X86::isMOVSLDUPMask(PermMask.getNode()) || - X86::isMOVHLPSMask(PermMask.getNode()) || - X86::isMOVHPMask(PermMask.getNode()) || - X86::isMOVLPMask(PermMask.getNode()))) + + // FIXME: fold these into legal mask. + if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVHPMask(SVOp) || + X86::isMOVLPMask(SVOp))) return Op; - if (ShouldXformToMOVHLPS(PermMask.getNode()) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) - return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (ShouldXformToMOVHLPS(SVOp) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + return CommuteVectorShuffle(SVOp, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4318,7 +4049,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4327,115 +4058,84 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + Op = CommuteVectorShuffle(SVOp, DAG); + SVOp = cast<ShuffleVectorSDNode>(Op); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - // FIXME: Figure out a cleaner way to do this. - if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { - if (V2IsUndef) return V1; - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (V2IsSplat) { - // V2 is a splat, so the mask may be malformed. That is, it may point - // to any V2 element. The instruction selectior won't like this. Get - // a corrected mask and commute to form a proper MOVS{S|D}. - SDValue NewMask = getMOVLMask(NumElems, DAG, dl); - if (NewMask.getNode() != PermMask.getNode()) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } - return Op; + if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + // Shuffling low element of v1 into undef, just return v1. + if (V2IsUndef) + return V1; + // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which + // the instruction selector will not match, so get a canonical MOVL with + // swapped operands to undo the commute. + return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) + if (X86::isUNPCKL_v_undef_Mask(SVOp) || + X86::isUNPCKH_v_undef_Mask(SVOp) || + X86::isUNPCKLMask(SVOp) || + X86::isUNPCKHMask(SVOp)) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(PermMask, DAG); - if (NewMask.getNode() != PermMask.getNode()) { - if (X86::isUNPCKLMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + SDValue NewMask = NormalizeMask(SVOp, DAG); + ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); + if (NSVOp != SVOp) { + if (X86::isUNPCKLMask(NSVOp, true)) { + return NewMask; + } else if (X86::isUNPCKHMask(NSVOp, true)) { + return NewMask; } } } - // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (Commuted) { // Commute is back and try unpck* again. - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) - return Op; + // FIXME: this seems wrong. + SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); + ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || + X86::isUNPCKH_v_undef_Mask(NewSVOp) || + X86::isUNPCKLMask(NewSVOp) || + X86::isUNPCKHMask(NewSVOp)) + return NewOp; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Try PSHUF* first, then SHUFP*. - // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically - // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. - if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, - DAG.getUNDEF(VT), PermMask); - return Op; - } - if (!isMMX) { - if (Subtarget->hasSSE2() && - (X86::isPSHUFDMask(PermMask.getNode()) || - X86::isPSHUFHWMask(PermMask.getNode()) || - X86::isPSHUFLWMask(PermMask.getNode()))) { - MVT RVT = VT; - if (VT == MVT::v4f32) { - RVT = MVT::v4i32; - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, - DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), - DAG.getUNDEF(RVT), PermMask); - } else if (V2.getOpcode() != ISD::UNDEF) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, - DAG.getUNDEF(RVT), PermMask); - if (RVT != VT) - Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); - return Op; - } - - // Binary or unary shufps. - if (X86::isSHUFPMask(PermMask.getNode()) || - (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) - return Op; - } + // Normalize the node to match x86 shuffle ops if needed + if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + // Check for legal shuffle and return? + if (isShuffleMaskLegal(PermMask, VT)) + return Op; + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); + return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); } @@ -4529,22 +4229,12 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; + // SHUFPS the element to the lowest double word, then movss. - MVT MaskVT = MVT::getIntVectorWithNumElements(4); - SmallVector<SDValue, 8> IdxVec; - IdxVec. - push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), Mask); + int Mask[4] = { Idx, -1, -1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4558,17 +4248,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - MVT MaskVT = MVT::getIntVectorWithNumElements(2); - SmallVector<SDValue, 8> IdxVec; - IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), - Mask); + int Mask[2] = { 1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -5075,19 +4758,6 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); - SmallVector<SDValue, 4> MaskVec; - MaskVec.push_back(DAG.getConstant(0, MVT::i32)); - MaskVec.push_back(DAG.getConstant(4, MVT::i32)); - MaskVec.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec.push_back(DAG.getConstant(5, MVT::i32)); - SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &MaskVec[0], MaskVec.size()); - SmallVector<SDValue, 4> MaskVec2; - MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, - &MaskVec2[0], MaskVec2.size()); - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -5096,13 +4766,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - XR1, XR2, UnpcklMask); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - Unpck1, CLod0, UnpcklMask); + SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -5110,8 +4778,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, - Sub, Sub, ShufMask); + int ShufMask[2] = { 1, -1 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, + DAG.getUNDEF(MVT::v2f64), ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -7263,34 +6932,36 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(const int *Mask, MVT VT) const { // Only do shuffles on 128-bit vector types for now. - // FIXME: pshufb, blends - if (VT.getSizeInBits() == 64) return false; - return (Mask.getNode()->getNumOperands() <= 4 || - isIdentityMask(Mask.getNode()) || - isIdentityMask(Mask.getNode(), true) || - isSplatMask(Mask.getNode()) || - X86::isPSHUFHWMask(Mask.getNode()) || - X86::isPSHUFLWMask(Mask.getNode()) || - X86::isUNPCKLMask(Mask.getNode()) || - X86::isUNPCKHMask(Mask.getNode()) || - X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || - X86::isUNPCKH_v_undef_Mask(Mask.getNode())); + if (VT.getSizeInBits() == 64) + return false; + + // FIXME: pshufb, blends, palignr, shifts. + return (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(Mask, VT) || + isMOVLMask(Mask, VT) || + isSHUFPMask(Mask, VT) || + isPSHUFDMask(Mask, VT) || + isPSHUFHWMask(Mask, VT) || + isPSHUFLWMask(Mask, VT) || + isUNPCKLMask(Mask, VT) || + isUNPCKHMask(Mask, VT) || + isUNPCKL_v_undef_Mask(Mask, VT) || + isUNPCKH_v_undef_Mask(Mask, VT)); } bool -X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, - MVT EVT, SelectionDAG &DAG) const { - unsigned NumElts = BVOps.size(); - // Only do shuffles on 128-bit vector types for now. - if (EVT.getSizeInBits() * NumElts == 64) return false; - if (NumElts == 2) return true; - if (NumElts == 4) { - return (isMOVLMask(&BVOps[0], 4) || - isCommutedMOVL(&BVOps[0], 4, true) || - isSHUFPMask(&BVOps[0], 4) || - isCommutedSHUFP(&BVOps[0], 4)); +X86TargetLowering::isVectorClearMaskLegal(const int *Mask, MVT VT) const { + unsigned NumElts = VT.getVectorNumElements(); + // FIXME: This collection of masks seems suspect. + if (NumElts == 2) + return true; + if (NumElts == 4 && VT.getSizeInBits() == 128) { + return (isMOVLMask(Mask, VT) || + isCommutedMOVLMask(Mask, VT, true) || + isSHUFPMask(Mask, VT) || + isCommutedSHUFPMask(Mask, VT)); } return false; } @@ -8025,15 +7696,14 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, +static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, unsigned NumElems, MVT EVT, SDNode *&Base, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { Base = NULL; for (unsigned i = 0; i < NumElems; ++i) { - SDValue Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) { + if (PermMask[i] < 0) { if (!Base) return false; continue; @@ -8066,12 +7736,12 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - SDValue PermMask = N->getOperand(2); - unsigned NumElems = PermMask.getNumOperands(); + const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask(); + unsigned NumElems = VT.getVectorNumElements(); // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 // where the upper half is 0, it is advantageous to rewrite it as a build @@ -8080,9 +7750,10 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, SDValue In[2]; In[0] = N->getOperand(0); In[1] = N->getOperand(1); - unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue(); - unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue(); - if (In[0].getValueType().getVectorNumElements() == NumElems && + unsigned Idx0 = PermMask[0]; + unsigned Idx1 = PermMask[1]; + // FIXME: can we take advantage of undef index? + if (PermMask[0] >= 0 && PermMask[1] >= 0 && In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { ConstantSDNode* InsertVecIdx = @@ -8546,9 +8217,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - isSplatMask(ShAmtOp.getOperand(2).getNode())) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 050b869..d91951c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -230,7 +230,8 @@ namespace llvm { // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, - + + // CMPPD, CMPPS - Vector double/float comparison. // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, @@ -251,80 +252,72 @@ namespace llvm { namespace X86 { /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFDMask(SDNode *N); + bool isPSHUFDMask(ShuffleVectorSDNode *N); /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFHWMask(SDNode *N); + bool isPSHUFHWMask(ShuffleVectorSDNode *N); /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFLWMask(SDNode *N); + bool isPSHUFLWMask(ShuffleVectorSDNode *N); /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. - bool isSHUFPMask(SDNode *N); + bool isSHUFPMask(ShuffleVectorSDNode *N); /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. - bool isMOVHLPSMask(SDNode *N); + bool isMOVHLPSMask(ShuffleVectorSDNode *N); /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> - bool isMOVHLPS_v_undef_Mask(SDNode *N); + bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. - bool isMOVLPMask(SDNode *N); + /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. + bool isMOVLPMask(ShuffleVectorSDNode *N); /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} + /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(SDNode *N); + bool isMOVHPMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> - bool isUNPCKL_v_undef_Mask(SDNode *N); + bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> - bool isUNPCKH_v_undef_Mask(SDNode *N); + bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. - bool isMOVLMask(SDNode *N); + bool isMOVLMask(ShuffleVectorSDNode *N); /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(SDNode *N); + bool isMOVSHDUPMask(ShuffleVectorSDNode *N); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(SDNode *N); - - /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a splat of a single element. - bool isSplatMask(SDNode *N); - - /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a splat of zero element. - bool isSplatLoMask(SDNode *N); + bool isMOVSLDUPMask(ShuffleVectorSDNode *N); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. - bool isMOVDDUPMask(SDNode *N); + bool isMOVDDUPMask(ShuffleVectorSDNode *N); /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* @@ -477,14 +470,13 @@ namespace llvm { /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. - virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const; + virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, - MVT EVT, SelectionDAG &DAG) const; + virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 462433b..511d42a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3801,6 +3801,7 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 71f2cb1..338b9e2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -30,33 +30,37 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; // MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to // PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ +def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShuffleSHUFImmediate(N)); }]>; // Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKHMask(N); +def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); }]>; // Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKLMask(N); +def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); }]>; // Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKH_v_undef_Mask(N); +def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; // Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKL_v_undef_Mask(N); +def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -// Patterns for shuffling. -def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); +def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); }], MMX_SHUFFLE_get_shuf_imm>; //===----------------------------------------------------------------------===// @@ -185,9 +189,8 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (vector_shuffle immAllZerosV, - (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), - MOVL_shuffle_mask)))]>; + (movl immAllZerosV, + (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), @@ -319,86 +322,74 @@ let isTwoAddress = 1 in { (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v8i8 (mmx_unpckh VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v4i16 (mmx_unpckh VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v2i32 (mmx_unpckh VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; // Unpack Low Packed Data Instructions def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v8i8 (mmx_unpckl VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v4i16 (mmx_unpckl VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v2i32 (mmx_unpckl VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; } // -- Pack Instructions @@ -411,17 +402,13 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle - VR64:$src1, (undef), - MMX_PSHUFW_shuffle_mask:$src2)))]>; + (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle - (bc_v4i16 (load_mmx addr:$src1)), - (undef), - MMX_PSHUFW_shuffle_mask:$src2)))]>; + (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), + (undef)))]>; // -- Conversion Instructions let neverHasSideEffects = 1 in { @@ -627,34 +614,27 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { - def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; } let AddedComplexity = 10 in { - def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } // Patterns to perform vector shuffling with a zeroed out vector. let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))), - MMX_UNPCKL_shuffle_mask)), + def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, + (v2i32 (scalar_to_vector (load_mmx addr:$src))))), (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3ce35bd..a10f443 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -175,102 +175,107 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{ // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, // SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ +def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShuffleSHUFImmediate(N)); }]>; // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{ +def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); }]>; // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{ +def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; -def SSE_splat_mask : PatLeaf<(build_vector), [{ - return X86::isSplatMask(N); -}], SHUFFLE_get_shuf_imm>; - -def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ - return X86::isSplatLoMask(N); +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; }]>; -def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVDDUPMask(N); +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHLPSMask(N); +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHLPS_v_undef_Mask(N); +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHPMask(N); +def movhp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVLPMask(N); +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVLMask(N); +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVSHDUPMask(N); +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); }]>; -def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVSLDUPMask(N); +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); }]>; -def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKLMask(N); +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); }]>; -def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKHMask(N); +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); }]>; -def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKL_v_undef_Mask(N); +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKH_v_undef_Mask(N); +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); -}], SHUFFLE_get_shuf_imm>; - -def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFHWMask(N); -}], SHUFFLE_get_pshufhw_imm>; - -def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFLWMask(N); -}], SHUFFLE_get_pshuflw_imm>; - -def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_shuf_imm>; -def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isSHUFPMask(N); +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_shuf_imm>; -def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isSHUFPMask(N); -}], SHUFFLE_get_shuf_imm>; +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshufhw_imm>; +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshuflw_imm>; //===----------------------------------------------------------------------===// // SSE scalar FP Instructions @@ -704,16 +709,14 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVLP_shuffle_mask)))]>; + (movlp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVHP_shuffle_mask)))]>; + (movhp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -728,29 +731,25 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (vector_shuffle - (bc_v2f64 (v4f32 VR128:$src)), (undef), - UNPCKH_shuffle_mask)), (iPTR 0))), - addr:$dst)]>; + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>; let Constraints = "$src1 = $dst" in { let AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHP_shuffle_mask)))]>; + (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHLPS_shuffle_mask)))]>; + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" let AddedComplexity = 20 in -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), +def : Pat<(v4f32 (movddup VR128:$src, (undef))), (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; @@ -908,51 +907,41 @@ let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in // Convert to pshufd def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i32i8imm:$src3), + VR128:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$src3)))]>; + (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i32i8imm:$src3), + f128mem:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - SHUFP_shuffle_mask:$src3)))]>; + (v4f32 (shufp:$src3 + VR128:$src1, (memopv4f32 addr:$src2))))]>; let AddedComplexity = 10 in { def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v4f32 (unpckh VR128:$src1, + (memopv4f32 addr:$src2))))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1044,8 +1033,7 @@ let neverHasSideEffects = 1 in (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)))]>; + (v4f32 (movl VR128:$src1, VR128:$src2)))]>; } // Move to lower bits of a VR128 and zeroing upper bits. @@ -1451,16 +1439,14 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)), - MOVLP_shuffle_mask)))]>; + (v2f64 (movlp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)), - MOVHP_shuffle_mask)))]>; + (v2f64 (movhp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1474,9 +1460,8 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (vector_shuffle VR128:$src, (undef), - UNPCKH_shuffle_mask)), (iPTR 0))), - addr:$dst)]>; + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -1744,48 +1729,39 @@ let Constraints = "$src1 = $dst" in { def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$src3)))]>; + [(set VR128:$dst, + (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - SHUFP_shuffle_mask:$src3)))]>; + (v2f64 (shufp:$src3 + VR128:$src1, (memopv2f64 addr:$src2))))]>; let AddedComplexity = 10 in { def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v2f64 (unpckh VR128:$src1, + (memopv2f64 addr:$src2))))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2043,49 +2019,43 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle - VR128:$src1, (undef), - PSHUFD_shuffle_mask:$src2)))]>; + [(set VR128:$dst, (v4i32 (pshufd:$src2 + VR128:$src1, (undef))))]>; def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle + [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), - (undef), - PSHUFD_shuffle_mask:$src2)))]>; + (undef))))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - VR128:$src1, (undef), - PSHUFHW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, + (undef))))]>, XS, Requires<[HasSSE2]>; def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef), - PSHUFHW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshufhw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - VR128:$src1, (undef), - PSHUFLW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, + (undef))))]>, XD, Requires<[HasSSE2]>; def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2), + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef), - PSHUFLW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshuflw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XD, Requires<[HasSSE2]>; @@ -2094,107 +2064,91 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, - (memopv2i64 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>; def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, + (unpckh VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + (unpckh VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + (unpckh VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, - (memopv2i64 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>; } // Extract / Insert @@ -2357,8 +2311,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movsd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)))]>; + (v2f64 (movl VR128:$src1, VR128:$src2)))]>; } // Store / copy lower 64-bits of a XMM register. @@ -2449,44 +2402,35 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src, (undef), - MOVSHDUP_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (movshdup + VR128:$src, (undef))))]>; def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - (memopv4f32 addr:$src), (undef), - MOVSHDUP_shuffle_mask)))]>; + [(set VR128:$dst, (movshdup + (memopv4f32 addr:$src), (undef)))]>; def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src, (undef), - MOVSLDUP_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (movsldup + VR128:$src, (undef))))]>; def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - (memopv4f32 addr:$src), (undef), - MOVSLDUP_shuffle_mask)))]>; + [(set VR128:$dst, (movsldup + (memopv4f32 addr:$src), (undef)))]>; def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src, (undef), - MOVDDUP_shuffle_mask)))]>; + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movddup\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (vector_shuffle - (scalar_to_vector (loadf64 addr:$src)), - (undef), MOVDDUP_shuffle_mask)))]>; + (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), + (undef))))]>; -def : Pat<(vector_shuffle - (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef), MOVDDUP_shuffle_mask), +def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(vector_shuffle - (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask), +def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2555,22 +2499,18 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait", // vector_shuffle v1, <undef> <1, 1, 3, 3> let AddedComplexity = 15 in -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - MOVSHDUP_shuffle_mask)), +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in -def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), - MOVSHDUP_shuffle_mask)), +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <0, 0, 2, 2> let AddedComplexity = 15 in - def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - MOVSLDUP_shuffle_mask)), + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in - def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), - MOVSLDUP_shuffle_mask)), + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; //===----------------------------------------------------------------------===// @@ -2911,207 +2851,173 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), +def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), +def : Pat<(unpckh (v2f64 VR128:$src), (undef)), (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), +def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), +def : Pat<(unpckh (v2i64 VR128:$src), (undef)), (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Special unary SHUFPSrri case. -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; +let AddedComplexity = 5 in +def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; // Special unary SHUFPDrri case. -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef), - SHUFP_unary_shuffle_mask:$sm), - (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[HasSSE2]>; // Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), - PSHUFD_binary_shuffle_mask:$sm)), - (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), - (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, +def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, - Requires<[HasSSE2]>; // vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHP_shuffle_mask)), +def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHLPS_shuffle_mask)), +def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), - MOVHLPS_v_undef_shuffle_mask)), +def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), - MOVHLPS_v_undef_shuffle_mask)), +def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), (MOVHLPSrr VR128:$src1, VR128:$src1)>; } let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS // (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS -def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (vector_shuffle - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (vector_shuffle - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)), +def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)), +def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; // vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) -def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVLP_shuffle_mask)), +def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVLP_shuffle_mask)), +def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; } // Set lowest element and zero upper elements. let AddedComplexity = 15 in -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, - MOVL_shuffle_mask)), +def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; |