summaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86/vec_shift4.ll
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2010-07-28 00:21:48 +0000
committerNate Begeman <natebegeman@mac.com>2010-07-28 00:21:48 +0000
commit51409214d7471328df2e92e0a8f9d05221fe0e6f (patch)
tree30478f9edf8a371dfbe2beb08a71d415e821e32b /test/CodeGen/X86/vec_shift4.ll
parentf374ba2bcd3a3b993d6b3fcd9f70a29973f93175 (diff)
downloadexternal_llvm-51409214d7471328df2e92e0a8f9d05221fe0e6f.zip
external_llvm-51409214d7471328df2e92e0a8f9d05221fe0e6f.tar.gz
external_llvm-51409214d7471328df2e92e0a8f9d05221fe0e6f.tar.bz2
Implement a vectorized algorithm for <16 x i8> << <16 x i8>
This is about 4x faster and smaller than the existing scalarization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109566 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/vec_shift4.ll')
-rw-r--r--test/CodeGen/X86/vec_shift4.ll13
1 files changed, 12 insertions, 1 deletions
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index d8f4e4e..9ef7fbd 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
-define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
; CHECK-NOT: shll
; CHECK: pslld
@@ -12,3 +12,14 @@ entry:
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+ %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1]
+ %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp2
+}