diff options
author | Chris Lattner <sabre@nondot.org> | 2011-06-18 06:05:24 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-06-18 06:05:24 +0000 |
commit | b85e4eba85a38698f3b3332f82554bf8442547e2 (patch) | |
tree | ae680321c7e03ee37d612c42282038950d37ea13 /test/Assembler | |
parent | 6be41eb7f00319f5ffa1a5435dcd1e81b3ce932d (diff) | |
download | external_llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.zip external_llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.gz external_llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.bz2 |
rip out a ton of intrinsic modernization logic from AutoUpgrade.cpp, which is
for pre-2.9 bitcode files. We keep x86 unaligned loads, movnt, crc32, and the
target indep prefetch change.
As usual, updating the testsuite is a PITA.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133337 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Assembler')
-rw-r--r-- | test/Assembler/AutoUpgradeIntrinsics.ll | 85 | ||||
-rw-r--r-- | test/Assembler/AutoUpgradeMMXIntrinsics.ll | 223 |
2 files changed, 4 insertions, 304 deletions
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index 20beb49..eb4ac76 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -1,87 +1,6 @@ ; Tests to make sure intrinsics are automatically upgraded. -; RUN: llvm-as < %s | llvm-dis | not grep {i32 @llvm\\.ct} -; RUN: llvm-as < %s | llvm-dis | \ -; RUN: not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*} -; RUN: llvm-as < %s | llvm-dis | \ -; RUN: not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*} -; RUN: llvm-as < %s | llvm-dis | \ -; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*} -; RUN: llvm-as < %s | llvm-dis | \ -; RUN: not grep {llvm\\.x86\\.sse2\\.loadu} -; RUN: llvm-as < %s | llvm-dis | \ -; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s -declare i32 @llvm.ctpop.i28(i28 %val) -declare i32 @llvm.cttz.i29(i29 %val) -declare i32 @llvm.ctlz.i30(i30 %val) - -define i32 @test_ct(i32 %A) { - %c1 = call i32 @llvm.ctpop.i28(i28 1234) - %c2 = call i32 @llvm.cttz.i29(i29 2345) - %c3 = call i32 @llvm.ctlz.i30(i30 3456) - %r1 = add i32 %c1, %c2 - %r2 = add i32 %r1, %c3 - ret i32 %r2 -} - -declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo) -declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo) -define i32 @test_part_set(i32 %A, i16 %B) { - %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0) - %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0) - %c = zext i16 %b to i32 - %d = add i32 %a, %c - ret i32 %d -} - -declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo) -declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo) -define i32 @test_part_select(i32 %A, i16 %B) { - %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0) - %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0) - %c = zext i16 %b to i32 - %d = add i32 %a, %c - ret i32 %d -} - -declare i32 @llvm.bswap.i32.i32(i32 %x) -declare i16 @llvm.bswap.i16.i16(i16 %x) -define i32 @test_bswap(i32 %A, i16 %B) { - %a = call i32 @llvm.bswap.i32.i32(i32 %A) - %b = call i16 @llvm.bswap.i16.i16(i16 %B) - %c = zext i16 %b to i32 - %d = add i32 %a, %c - ret i32 %d -} - -declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone -define void @sh16(<4 x i16> %A, <2 x i32> %B) { - %r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] - %r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] - %r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] - ret void -} - -declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone -define void @sh32(<2 x i32> %A, <2 x i32> %B) { - %r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] - %r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] - %r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] - ret void -} - -declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone -define void @sh64(<1 x i64> %A, <2 x i32> %B) { - %r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] - %r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] - ret void -} declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone @@ -90,6 +9,10 @@ define void @test_loadu(i8* %a, double* %b) { %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a) %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a) %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b) + +; CHECK: load i128* {{.*}}, align 1 +; CHECK: load i128* {{.*}}, align 1 +; CHECK: load i128* {{.*}}, align 1 ret void } diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll deleted file mode 100644 index 54120ff..0000000 --- a/test/Assembler/AutoUpgradeMMXIntrinsics.ll +++ /dev/null @@ -1,223 +0,0 @@ -; Tests to make sure MMX intrinsics are automatically upgraded. -; RUN: llvm-as < %s | llvm-dis -o %t -; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>} -; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>} -; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>} -; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>} -; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32 - -; Addition -declare <8 x i8> @llvm.x86.mmx.padd.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone -define void @add(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D, - <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) { - %r1 = call <8 x i8> @llvm.x86.mmx.padd.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D) - %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F) - %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H) - %r5 = call <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8> %A, <8 x i8> %B) - %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D) - %r7 = call <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8> %A, <8 x i8> %B) - %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D) - ret void -} - -; Subtraction -declare <8 x i8> @llvm.x86.mmx.psub.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone -define void @sub(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D, - <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) { - %r1 = call <8 x i8> @llvm.x86.mmx.psub.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D) - %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F) - %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H) - %r5 = call <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8> %A, <8 x i8> %B) - %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D) - %r7 = call <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8> %A, <8 x i8> %B) - %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D) - ret void -} - -; Multiplication -declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone -define void @mul(<4 x i16> %A, <4 x i16> %B) { - %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B) - %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B) - %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B) - %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B) - ret void -} - -; Bitwise operations -declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone -define void @bit(<1 x i64> %A, <1 x i64> %B) { - %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B) - %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B) - %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B) - %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B) - ret void -} - -; Averages -declare <8 x i8> @llvm.x86.mmx.pavg.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone -define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) { - %r1 = call <8 x i8> @llvm.x86.mmx.pavg.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D) - ret void -} - -; Maximum -declare <8 x i8> @llvm.x86.mmx.pmaxu.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone -define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) { - %r1 = call <8 x i8> @llvm.x86.mmx.pmaxu.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D) - ret void -} - -; Minimum -declare <8 x i8> @llvm.x86.mmx.pminu.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone -define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) { - %r1 = call <8 x i8> @llvm.x86.mmx.pminu.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D) - ret void -} - -; Packed sum of absolute differences -declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone -define void @psad(<8 x i8> %A, <8 x i8> %B) { - %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B) - ret void -} - -; Shift left -declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone -define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) { - %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C) - %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C) - %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C) - %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D) - %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D) - %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D) - ret void -} - -; Shift right logical -declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone -define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) { - %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C) - %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C) - %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C) - %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D) - %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D) - %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D) - ret void -} - -; Shift right arithmetic -declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone -define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) { - %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C) - %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C) - %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D) - %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D) - ret void -} - -; Pack/Unpack ops -declare <8 x i8> @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone -define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D, - <2 x i32> %E, <2 x i32> %F) { - %r1 = call <8 x i8> @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D) - %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F) - %r3 = call <8 x i8> @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D) - %r4 = call <8 x i8> @llvm.x86.mmx.punpckhbw(<8 x i8> %A, <8 x i8> %B) - %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D) - %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F) - %r7 = call <8 x i8> @llvm.x86.mmx.punpcklbw(<8 x i8> %A, <8 x i8> %B) - %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D) - %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F) - ret void -} - -; Integer comparison ops -declare <8 x i8> @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone -define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D, - <2 x i32> %E, <2 x i32> %F) { - %r1 = call <8 x i8> @llvm.x86.mmx.pcmpeq.b(<8 x i8> %A, <8 x i8> %B) - %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D) - %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F) - %r4 = call <8 x i8> @llvm.x86.mmx.pcmpgt.b(<8 x i8> %A, <8 x i8> %B) - %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D) - %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F) - ret void -} - -; Miscellaneous -declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone -declare i32 @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone -declare void @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone -declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone -declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone -declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone -define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D, - <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H, - i32* %I, i8 %J, i16 %K, i32 %L) { - call void @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I) - %r1 = call i32 @llvm.x86.mmx.pmovmskb(<8 x i8> %A) - call void @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G) - %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J) - %r3 = call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37) - %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927) - %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37) - ret void -} |