summaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll2
-rw-r--r--test/CodeGen/R600/ds_read2.ll24
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll8
-rw-r--r--test/CodeGen/R600/ds_write2.ll30
-rw-r--r--test/CodeGen/R600/ds_write2st64.ll6
-rw-r--r--test/CodeGen/R600/fmaxnum.ll91
-rw-r--r--test/CodeGen/R600/fminnum.ll91
-rw-r--r--test/CodeGen/R600/ftrunc.f64.ll2
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll4
-rw-r--r--test/CodeGen/R600/operand-folding.ll2
-rw-r--r--test/CodeGen/R600/si-annotate-cf.ll25
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll4
12 files changed, 248 insertions, 41 deletions
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index c381fc4..e7e13d6 100644
--- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
-; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
; CHECK: s_endpgm
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
index 7110a90..5929898 100644
--- a/test/CodeGen/R600/ds_read2.ll
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -7,7 +7,7 @@
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
; SI-LABEL: @simple_read2_f32
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
@@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_max_offset
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
@@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_x2
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
@@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
; Make sure there is an instruction between the two sets of reads.
; SI-LABEL: @simple_read2_f32_x2_barrier
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
; SI: s_barrier
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
@@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
; SI-LABEL: @simple_read2_f64
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
-; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
@@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f64_max_offset
-; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
; SI: s_endpgm
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
; Alignment only 4
; SI-LABEL: @misaligned_read2_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
@@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
; SI-LABEL: @load_constant_adjacent_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
%val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
; SI-LABEL: @load_constant_disjoint_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
%val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
; SI-LABEL: @load_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
%val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
@@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
; SI-LABEL: @load_misaligned64_constant_large_offsets
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
; SI: s_endpgm
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
%val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index 482debb..54b3b45 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -5,7 +5,7 @@
; SI-LABEL: @simple_read2st64_f32_0_1
-; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
@@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f64_0_1
-; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
@@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
; Alignment only
; SI-LABEL: @misaligned_read2st64_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
; SI: s_endpgm
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
@@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
; SI-NOT: ds_read2st_b64
-; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
index 1d94c57..b553d34 100644
--- a/test/CodeGen/R600/ds_write2.ll
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -7,7 +7,7 @@
; SI-LABEL: @simple_write2_one_val_f32
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
; SI-LABEL: @simple_write2_two_val_subreg2_f32
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
; SI-LABEL: @simple_write2_two_val_subreg4_f32
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2_two_val_f32_x2
-; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
@@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
; SI-LABEL: @simple_write2_one_val_f64
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
; SI-LABEL: @misaligned_simple_write2_one_val_f64
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
@@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
; SI-LABEL: @store_constant_adjacent_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
define void @store_constant_adjacent_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() {
; SI-LABEL: @store_constant_disjoint_offsets
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
define void @store_constant_disjoint_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() {
; SI-LABEL: @store_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
@@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() {
; SI-LABEL: @store_misaligned64_constant_large_offsets
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
-; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
index 2044df2..1d9d881 100644
--- a/test/CodeGen/R600/ds_write2st64.ll
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -7,7 +7,7 @@
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
; SI: s_endpgm
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
; SI-NOT: ds_write2st64_b64
-; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
index c105598..3029bd02 100644
--- a/test/CodeGen/R600/fmaxnum.ll
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -11,6 +11,9 @@ declare double @llvm.maxnum.f64(double, double)
; FUNC-LABEL: @test_fmax_f32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -20,6 +23,10 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin
; FUNC-LABEL: @test_fmax_v2f32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -31,6 +38,12 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -46,6 +59,17 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -69,6 +93,27 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -79,6 +124,10 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -89,6 +138,11 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344(nan)
define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -99,6 +153,10 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -109,6 +167,10 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -119,6 +181,10 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -129,6 +195,10 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -139,6 +209,10 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -149,6 +223,10 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -157,6 +235,10 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
; FUNC-LABEL: @fmax_var_immediate_f32
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -165,6 +247,9 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmax_immediate_var_f32
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -174,6 +259,9 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmax_var_literal_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -183,6 +271,9 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
; FUNC-LABEL: @fmax_literal_var_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
index 6b93b83..4d7b525 100644
--- a/test/CodeGen/R600/fminnum.ll
+++ b/test/CodeGen/R600/fminnum.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.minnum.f32(float, float) #0
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
@@ -9,6 +10,9 @@ declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
; FUNC-LABEL: @test_fmin_f32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.minnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -18,6 +22,10 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin
; FUNC-LABEL: @test_fmin_v2f32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -29,6 +37,12 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -44,6 +58,17 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -67,6 +92,27 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -77,6 +123,10 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -87,6 +137,11 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -97,6 +152,10 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -107,6 +166,10 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -117,6 +180,10 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -127,6 +194,10 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -137,6 +208,10 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -147,6 +222,10 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -155,6 +234,9 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
; FUNC-LABEL: @fmin_var_immediate_f32
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -163,6 +245,9 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmin_immediate_var_f32
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -172,6 +257,9 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmin_var_literal_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -181,6 +269,9 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
; FUNC-LABEL: @fmin_literal_var_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
index 4ea84a7..6618d8b 100644
--- a/test/CodeGen/R600/ftrunc.f64.ll
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -27,9 +27,9 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
-; SI: cmp_gt_i32
; SI: s_not_b64
; SI: s_and_b64
+; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_lt_i32
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index caa4b19..06a8b12 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -32,8 +32,8 @@
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
index 4bf748e..816755e 100644
--- a/test/CodeGen/R600/operand-folding.ll
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -19,7 +19,7 @@ endif:
}
; CHECK-LABEL: {{^}}fold_imm:
-; CHECK v_or_i32_e32 v{{[0-9]+}}, 5
+; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
entry:
%fold = add i32 3, 2
diff --git a/test/CodeGen/R600/si-annotate-cf.ll b/test/CodeGen/R600/si-annotate-cf.ll
new file mode 100644
index 0000000..3f30988
--- /dev/null
+++ b/test/CodeGen/R600/si-annotate-cf.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
+
+; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
+; Lowered break instructin:
+; SI: s_or_b64
+; Lowered Loop instruction:
+; SI: s_andn2_b64
+; s_cbranch_execnz [[LOOP_LABEL]]
+; SI: s_endpgm
+define void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+main_body:
+ %0 = and i32 %a, %b
+ %1 = trunc i32 %0 to i1
+ br label %ENDIF
+
+ENDLOOP:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+
+ENDIF:
+ br i1 %1, label %ENDLOOP, label %ENDIF
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index efb1de2..82d88eb 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
; SI: s_endpgm
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
@@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: s_endpgm
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*