diff options
Diffstat (limited to 'test/CodeGen/R600/private-memory.ll')
-rw-r--r-- | test/CodeGen/R600/private-memory.ll | 88 |
1 files changed, 57 insertions, 31 deletions
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 89122be..bfb4a6a 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,19 +1,23 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; FUNC-LABEL: @mova_same_clause +; FUNC-LABEL: {{^}}mova_same_clause: -; R600-CHECK: LDS_WRITE -; R600-CHECK: LDS_WRITE -; R600-CHECK: LDS_READ -; R600-CHECK: LDS_READ +; R600: LDS_WRITE +; R600: LDS_WRITE +; R600: LDS_READ +; R600: LDS_READ -; SI-CHECK: DS_WRITE_B32 -; SI-CHECK: DS_WRITE_B32 -; SI-CHECK: DS_READ_B32 -; SI-CHECK: DS_READ_B32 +; SI-PROMOTE: ds_write_b32 +; SI-PROMOTE: ds_write_b32 +; SI-PROMOTE: ds_read_b32 +; SI-PROMOTE: ds_read_b32 + +; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 +; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -41,9 +45,10 @@ entry: ; XXX: This generated code has unnecessary MOVs, we should be able to optimize ; this. -; FUNC-LABEL: @multiple_structs -; R600-CHECK-NOT: MOVA_INT -; SI-CHECK-NOT: V_MOVREL +; FUNC-LABEL: {{^}}multiple_structs: +; R600-NOT: MOVA_INT +; SI-NOT: v_movrel +; SI-NOT: v_movrel %struct.point = type { i32, i32 } define void @multiple_structs(i32 addrspace(1)* %out) { @@ -71,9 +76,9 @@ entry: ; loads and stores should be lowered to copies, so there shouldn't be any ; MOVA instructions. -; FUNC-LABEL: @direct_loop -; R600-CHECK-NOT: MOVA_INT -; SI-CHECK-NOT: V_MOVREL +; FUNC-LABEL: {{^}}direct_loop: +; R600-NOT: MOVA_INT +; SI-NOT: v_movrel define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: @@ -107,11 +112,13 @@ for.end: ret void } -; FUNC-LABEL: @short_array +; FUNC-LABEL: {{^}}short_array: -; R600-CHECK: MOVA_INT +; R600: MOVA_INT -; SI-CHECK: V_MOVRELS_B32_e32 +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0 +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0 +; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i16] @@ -126,12 +133,12 @@ entry: ret void } -; FUNC-LABEL: @char_array +; FUNC-LABEL: {{^}}char_array: -; R600-CHECK: MOVA_INT +; R600: MOVA_INT -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100 -; SI-CHECK: V_MOVRELS_B32_e32 +; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0 +; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1 ; encoding: [0x01,0x10,0x60,0xe0 define void @char_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i8] @@ -149,12 +156,12 @@ entry: ; Make sure we don't overwrite workitem information with private memory -; FUNC-LABEL: @work_item_info -; R600-CHECK-NOT: MOV T0.X +; FUNC-LABEL: {{^}}work_item_info: +; R600-NOT: MOV T0.X ; Additional check in case the move ends up in the last slot -; R600-CHECK-NOT: MOV * TO.X +; R600-NOT: MOV * TO.X -; SI-CHECK-NOT: V_MOV_B32_e{{(32|64)}} v0 +; SI-NOT: v_mov_b32_e{{(32|64)}} v0 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: %0 = alloca [2 x i32] @@ -172,11 +179,11 @@ entry: ; Test that two stack objects are not stored in the same register ; The second stack object should be in T3.X -; FUNC-LABEL: @no_overlap +; FUNC-LABEL: {{^}}no_overlap: ; R600_CHECK: MOV ; R600_CHECK: [[CHAN:[XYZW]]]+ -; R600-CHECK-NOT: [[CHAN]]+ -; SI-CHECK: V_MOV_B32_e32 v3 +; R600-NOT: [[CHAN]]+ +; SI: v_mov_b32_e32 v3 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) { entry: %0 = alloca [3 x i8], align 1 @@ -283,3 +290,22 @@ entry: ret void } +; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it +; finds one, it should stop trying to promote. + +; FUNC-LABEL: ptrtoint: +; SI-NOT: ds_write +; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x5 +define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %alloca = alloca [16 x i32] + %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a + store i32 5, i32* %tmp0 + %tmp1 = ptrtoint [16 x i32]* %alloca to i32 + %tmp2 = add i32 %tmp1, 5 + %tmp3 = inttoptr i32 %tmp2 to i32* + %tmp4 = getelementptr i32* %tmp3, i32 %b + %tmp5 = load i32* %tmp4 + store i32 %tmp5, i32 addrspace(1)* %out + ret void +} |