summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-09-16 07:39:07 +0000
committerNadav Rotem <nrotem@apple.com>2012-09-16 07:39:07 +0000
commit638e4c13cb25e8ad1044c1d581fb387ec8d15033 (patch)
treeb6e61b7c40456f0778feab86ac8529afa3011633
parent3c0e5c9ecedb00d3c36fb2747b642bd3e38d0260 (diff)
downloadexternal_llvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.zip
external_llvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.tar.gz
external_llvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.tar.bz2
The PMOVZXWD family of functions had patterns extends narrow vector types to wide vector types.
It had patterns for zext-loading and extending. This commit adds patterns for loading a wide type, performing a bitcast, and extending. This is an odd pattern, but it is commonly used when writing code with intrinsics. rdar://11897677 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163995 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td24
-rw-r--r--test/CodeGen/X86/pmovext.ll22
2 files changed, 46 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 17e91a6..9a37ec5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in {
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVZXDQrm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in {
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVZXDQrm addr:$src)>;
}
let Predicates = [HasAVX2] in {
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll
new file mode 100644
index 0000000..c453046
--- /dev/null
+++ b/test/CodeGen/X86/pmovext.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; rdar://11897677
+
+;CHECK: intrin_pmov
+;CHECK: pmovzxbw (%rsi), %xmm0
+;CHECK-NEXT: movdqu
+;CHECK-NEXT: ret
+define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
+ %1 = bitcast i8* %src to <2 x i64>*
+ %2 = load <2 x i64>* %1, align 16
+ %3 = bitcast <2 x i64> %2 to <16 x i8>
+ %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
+ %5 = bitcast i16* %dest to i8*
+ %6 = bitcast <8 x i16> %4 to <16 x i8>
+ tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind