From 51409214d7471328df2e92e0a8f9d05221fe0e6f Mon Sep 17 00:00:00 2001
From: Nate Begeman <natebegeman@mac.com>
Date: Wed, 28 Jul 2010 00:21:48 +0000
Subject: Implement a vectorized algorithm for <16 x i8> << <16 x i8> This is
 about 4x faster and smaller than the existing scalarization.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109566 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/X86/vec_shift4.ll | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'test/CodeGen/X86/vec_shift4.ll')

diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index d8f4e4e..9ef7fbd 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
 
-define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
 ; CHECK-NOT: shll
 ; CHECK: pslld
@@ -12,3 +12,14 @@ entry:
   %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %tmp2
 }
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+  %shl = shl <16 x i8> %r, %a                     ; <<16 x i8>> [#uses=1]
+  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
-- 
cgit v1.1