diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2013-10-23 21:06:07 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2013-10-23 21:06:07 +0000 |
commit | bb41c75ab51fcfc3ad36d3f8a438652b141e0fc0 (patch) | |
tree | 1bb8f7c09feb260afbdaedf1b4ce48f42c0d3024 | |
parent | d7da59004fc9262f33c96ad2736b36ff1235ce7b (diff) | |
download | external_llvm-bb41c75ab51fcfc3ad36d3f8a438652b141e0fc0.zip external_llvm-bb41c75ab51fcfc3ad36d3f8a438652b141e0fc0.tar.gz external_llvm-bb41c75ab51fcfc3ad36d3f8a438652b141e0fc0.tar.bz2 |
X86: Custom lower sext v16i8 to v16i16, and the corresponding truncate.
Also update the cost model.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193270 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 3 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cast.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-sext.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-trunc.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-conversions.ll | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/pmovsx-inreg.ll | 3 |
8 files changed, 58 insertions, 10 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d747647..694623e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1150,9 +1150,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FNEG, MVT::v4f64, Custom); setOperationAction(ISD::FABS, MVT::v4f64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1160,8 +1157,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); @@ -1194,10 +1189,16 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); @@ -10391,7 +10392,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, return LowerSIGN_EXTEND_AVX512(Op, DAG); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && - (VT != MVT::v8i32 || InVT != MVT::v8i16)) + (VT != MVT::v8i32 || InVT != MVT::v8i16) && + (VT != MVT::v16i16 || InVT != MVT::v16i8)) return SDValue(); if (Subtarget->hasInt256()) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 004710b..7cae485 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5602,16 +5602,19 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; } let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; } let Predicates = [UseSSE41] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 36bfeb1..f88a666 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -401,12 +401,15 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { static const TypeConversionCostTblEntry<MVT::SimpleValueType> AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index b69b3bf..f3c1283 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -38,6 +38,10 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK: cost of 9 {{.*}} sext %S = sext <8 x i1> %in to <8 x i32> + ;CHECK: cost of 1 {{.*}} zext + %A1 = zext <16 x i8> undef to <16 x i16> + ;CHECK: cost of 1 {{.*}} sext + %A2 = sext <16 x i8> undef to <16 x i16> ;CHECK: cost of 1 {{.*}} sext %A = sext <8 x i16> undef to <8 x i32> ;CHECK: cost of 1 {{.*}} zext @@ -51,11 +55,13 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK: cost of 1 {{.*}} zext %D = zext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 1 {{.*}} trunc + ;CHECK: cost of 1 {{.*}} trunc %E = trunc <4 x i64> undef to <4 x i32> ;CHECK: cost of 1 {{.*}} trunc %F = trunc <8 x i32> undef to <8 x i16> + ;CHECK: cost of 2 {{.*}} trunc + %F1 = trunc <16 x i16> undef to <16 x i8> ;CHECK: cost of 3 {{.*}} trunc %G = trunc <8 x i64> undef to <8 x i32> diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index b9c7000..fb2287f 100644 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -154,6 +154,17 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { ret <4 x i64> %extmask } +; AVX-LABEL: sext_16i8_to_16i16 +; AVX: vpmovsxbw +; AVX: vmovhlps +; AVX: vpmovsxbw +; AVX: ret +define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { + %X = load <16 x i8>* %ptr + %Y = sext <16 x i8> %X to <16 x i16> + ret <16 x i16> %Y +} + ; AVX: sext_4i8_to_4i64 ; AVX: vpslld $24 ; AVX: vpsrad $24 diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll index d007736..58d0a35 100644 --- a/test/CodeGen/X86/avx-trunc.ll +++ b/test/CodeGen/X86/avx-trunc.ll @@ -12,4 +12,9 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{ %B = trunc <8 x i32> %A to <8 x i16> ret <8 x i16>%B } - +define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{ +; CHECK-LABEL: trunc_16_8 +; CHECK: pshufb + %B = trunc <16 x i16> %A to <16 x i8> + ret <16 x i8> %B +} diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll index 0143f18..f49718e 100644 --- a/test/CodeGen/X86/avx2-conversions.ll +++ b/test/CodeGen/X86/avx2-conversions.ll @@ -72,6 +72,25 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) { ret <16 x i16> %t } +; CHECK-LABEL: sext_16i8_16i16: +; CHECK: vpmovsxbw +; CHECK-NOT: vinsert +; CHECK: ret +define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) { + %t = sext <16 x i8> %z to <16 x i16> + ret <16 x i16> %t +} + +; CHECK-LABEL: trunc_16i16_16i8: +; CHECK: vpshufb +; CHECK: vpshufb +; CHECK: vpor +; CHECK: ret +define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { + %t = trunc <16 x i16> %z to <16 x i8> + ret <16 x i8> %t +} + ; CHECK: load_sext_test1 ; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}} ; CHECK: ret diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll index d30d7d0..07979f6 100644 --- a/test/CodeGen/X86/pmovsx-inreg.ll +++ b/test/CodeGen/X86/pmovsx-inreg.ll @@ -86,8 +86,7 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind { ret void ; AVX2-LABEL: test6: -; FIXME: v16i8 -> v16i16 is scalarized. -; AVX2-NOT: pmovsx +; AVX2: vpmovsxbw } define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind { |