diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-12-11 21:25:42 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-12-11 21:25:42 +0000 |
commit | f98f2ce29e6e2996fa58f38979143eceaa818335 (patch) | |
tree | 86dffe7414e6657874db8ac36e5ddcf7d41b2d9c /test/CodeGen | |
parent | 57ac1f458a754f30cf500410b438fb260f9b8fe5 (diff) | |
download | external_llvm-f98f2ce29e6e2996fa58f38979143eceaa818335.zip external_llvm-f98f2ce29e6e2996fa58f38979143eceaa818335.tar.gz external_llvm-f98f2ce29e6e2996fa58f38979143eceaa818335.tar.bz2 |
Add R600 backend
A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169915 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
39 files changed, 636 insertions, 0 deletions
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll new file mode 100644 index 0000000..ac4a874 --- /dev/null +++ b/test/CodeGen/R600/add.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll new file mode 100644 index 0000000..662085e --- /dev/null +++ b/test/CodeGen/R600/and.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = and <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll new file mode 100644 index 0000000..0407533 --- /dev/null +++ b/test/CodeGen/R600/fabs.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @fabs( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @fabs(float ) readnone diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll new file mode 100644 index 0000000..d7d1b65 --- /dev/null +++ b/test/CodeGen/R600/fadd.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fadd float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll new file mode 100644 index 0000000..85dbfd5 --- /dev/null +++ b/test/CodeGen/R600/fadd.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fadd <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll new file mode 100644 index 0000000..a94cfb5 --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Not checking arguments 2 and 3 to CNDE, because they may change between +;registers and literal.x depending on what the optimizer does. +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 2, i32 3 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll new file mode 100644 index 0000000..5c981ef --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the +; chance to optimize the fcmp + select instructions to CNDE was missed +; due to the fact that the operands to fcmp and select had different types + +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}} + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 -1, i32 0 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll new file mode 100644 index 0000000..1dcd07c --- /dev/null +++ b/test/CodeGen/R600/fcmp.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1 + %1 = load float addrspace(1)* %arrayidx1 + %cmp = fcmp oeq float %0, %1 + %sext = sext i1 %cmp to i32 + store i32 %sext, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll new file mode 100644 index 0000000..b013fd6 --- /dev/null +++ b/test/CodeGen/R600/fdiv.v4f32.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fdiv <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll new file mode 100644 index 0000000..845330f --- /dev/null +++ b/test/CodeGen/R600/floor.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @floor(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @floor(float) readonly diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll new file mode 100644 index 0000000..3708f0b --- /dev/null +++ b/test/CodeGen/R600/fmax.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll new file mode 100644 index 0000000..19d59ab --- /dev/null +++ b/test/CodeGen/R600/fmin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r1, float %r0 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll new file mode 100644 index 0000000..eb1d523 --- /dev/null +++ b/test/CodeGen/R600/fmul.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fmul float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll new file mode 100644 index 0000000..6d44a0c --- /dev/null +++ b/test/CodeGen/R600/fmul.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fmul <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll new file mode 100644 index 0000000..0ec1c37 --- /dev/null +++ b/test/CodeGen/R600/fsub.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fsub float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll new file mode 100644 index 0000000..612a57e --- /dev/null +++ b/test/CodeGen/R600/fsub.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll new file mode 100644 index 0000000..39f3322 --- /dev/null +++ b/test/CodeGen/R600/i8_to_double_to_float.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = uitofp i8 %1 to double + %3 = fptrunc double %2 to float + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll new file mode 100644 index 0000000..aad44d9 --- /dev/null +++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll @@ -0,0 +1,18 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Test that a select with reversed True/False values is correctly lowered +;to a SETNE_INT. There should only be one SETNE_INT instruction. + +;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK_NOT: SETNE_INT + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1 + %1 = load i32 addrspace(1)* %arrayidx1 + %cmp = icmp eq i32 %0, %1 + %value = select i1 %cmp, i32 0, i32 -1 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg new file mode 100644 index 0000000..36ee493 --- /dev/null +++ b/test/CodeGen/R600/lit.local.cfg @@ -0,0 +1,13 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +targets = set(root.targets_to_build.split()) +if not 'R600' in targets: + config.unsupported = True + diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll new file mode 100644 index 0000000..4c731b2 --- /dev/null +++ b/test/CodeGen/R600/literals.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Test using an integer literal constant. +; Generated ASM should be: +; ADD_INT REG literal.x, 5 +; or +; ADD_INT literal.x REG, 5 + +; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 +define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = add i32 5, %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Test using a float literal constant. +; Generated ASM should be: +; ADD REG literal.x, 5.0 +; or +; ADD literal.x REG, 5.0 + +; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 +define void @float_literal(float addrspace(1)* %out, float %in) { +entry: + %0 = fadd float 5.0, %in + store float %0, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll new file mode 100644 index 0000000..693eb27 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.mul(float ,float ) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll new file mode 100644 index 0000000..fac957f --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.AMDGPU.trunc( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.trunc(float ) readnone diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll new file mode 100644 index 0000000..dc120bf --- /dev/null +++ b/test/CodeGen/R600/llvm.cos.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.cos.f32(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.cos.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll new file mode 100644 index 0000000..0ae9172 --- /dev/null +++ b/test/CodeGen/R600/llvm.pow.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.pow.f32( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.pow.f32(float ,float ) readonly diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll new file mode 100644 index 0000000..5cd6998 --- /dev/null +++ b/test/CodeGen/R600/llvm.sin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.sin.f32( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.sin.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll new file mode 100644 index 0000000..9362728 --- /dev/null +++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(float addrspace(1)* %out, float addrspace(2)* %in) { + %1 = load float addrspace(2)* %in + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll new file mode 100644 index 0000000..b070dcd --- /dev/null +++ b/test/CodeGen/R600/load.i8.ll @@ -0,0 +1,10 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll new file mode 100644 index 0000000..6838c1a --- /dev/null +++ b/test/CodeGen/R600/reciprocal.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = fdiv float 1.0, %r0 + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.rcp(float ) readnone diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll new file mode 100644 index 0000000..3556fac --- /dev/null +++ b/test/CodeGen/R600/sdiv.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; The code generated by sdiv is long and complex and may frequently change. +; The goal of this test is to make sure the ISel doesn't fail. +; +; This program was previously failing to compile when one of the selectcc +; opcodes generated by the sdiv lowering was being legalized and optimized to: +; selectcc Remainder -1, 0, -1, SETGT +; This was fixed by adding an additional pattern in R600Instructions.td to +; match this pattern with a CNDGE_INT. + +; CHECK: RETURN + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %num = load i32 addrspace(1) * %in + %den = load i32 addrspace(1) * %den_ptr + %result = sdiv i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll new file mode 100644 index 0000000..f65a300 --- /dev/null +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Note additional optimizations may cause this SGT to be replaced with a +; CND* instruction. +; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} +; Test a selectcc with i32 LHS/RHS and float True/False + +define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %1 = icmp sge i32 %0, 0 + %2 = select i1 %1, float 1.0, float 0.0 + store float %2, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll new file mode 100644 index 0000000..f0a0f51 --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} +define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { + %1 = load float addrspace(1)* %in + %2 = fcmp oeq float %1, 0.0 + %3 = select i1 %2, float 1.0, float 2.0 + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll new file mode 100644 index 0000000..b38078e --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde_int.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE_INT +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %1 = load i32 addrspace(1)* %in + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 1, i32 2 + store i32 %3, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll new file mode 100644 index 0000000..0752f2e --- /dev/null +++ b/test/CodeGen/R600/setcc.v4i32.ll @@ -0,0 +1,12 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = icmp eq <4 x i32> %a, %b + %sext = sext <4 x i1> %result to <4 x i32> + store <4 x i32> %sext, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll new file mode 100644 index 0000000..1070250 --- /dev/null +++ b/test/CodeGen/R600/short-args.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll new file mode 100644 index 0000000..8b0d244 --- /dev/null +++ b/test/CodeGen/R600/store.v4f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %1 = load <4 x float> addrspace(1) * %in + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll new file mode 100644 index 0000000..a659815 --- /dev/null +++ b/test/CodeGen/R600/store.v4i32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %1 = load <4 x i32> addrspace(1) * %in + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll new file mode 100644 index 0000000..47657a6 --- /dev/null +++ b/test/CodeGen/R600/udiv.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by udiv is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 udiv +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = udiv <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll new file mode 100644 index 0000000..2e7388c --- /dev/null +++ b/test/CodeGen/R600/urem.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by urem is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 urem +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = urem <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/SI/sanity.ll b/test/CodeGen/SI/sanity.ll new file mode 100644 index 0000000..62cdcf5 --- /dev/null +++ b/test/CodeGen/SI/sanity.ll @@ -0,0 +1,37 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +; CHECK: S_ENDPGM + +define void @main() { +main_body: + call void @llvm.AMDGPU.shader.type(i32 1) + %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) + %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0 + %2 = load <4 x i32> addrspace(2)* %1 + %3 = call i32 @llvm.SI.vs.load.buffer.index() + %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3) + %5 = extractelement <4 x float> %4, i32 0 + %6 = extractelement <4 x float> %4, i32 1 + %7 = extractelement <4 x float> %4, i32 2 + %8 = extractelement <4 x float> %4, i32 3 + %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) + %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1 + %11 = load <4 x i32> addrspace(2)* %10 + %12 = call i32 @llvm.SI.vs.load.buffer.index() + %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12) + %14 = extractelement <4 x float> %13, i32 0 + %15 = extractelement <4 x float> %13, i32 1 + %16 = extractelement <4 x float> %13, i32 2 + %17 = extractelement <4 x float> %13, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8) + ret void +} + +declare void @llvm.AMDGPU.shader.type(i32) + +declare i32 @llvm.SI.vs.load.buffer.index() readnone + +declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32) + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) |