diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 22 |
2 files changed, 23 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6e6d2f5..b33fd82 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7949,9 +7949,9 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { bool X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { - // Only do shuffles on 128-bit vector types for now. + // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return false; + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 48e4e0b..ffe2a88 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2915,6 +2915,7 @@ defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", int_x86_ssse3_psign_d, int_x86_ssse3_psign_d_128>; +// palignr patterns. let Constraints = "$src1 = $dst" in { def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), @@ -2935,7 +2936,6 @@ let Constraints = "$src1 = $dst" in { []>, OpSize; } -// palignr patterns. def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)), (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; @@ -2944,6 +2944,26 @@ def : Pat<(int_x86_ssse3_palign_r VR64:$src1, (i8 imm:$src3)), (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; +def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)), (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>, |