From 311ea66db107917789c00f2ae11ec6b98eb64e59 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Mon, 11 Feb 2013 15:58:21 +0000 Subject: R600/SI: Use V_ADD_F32 instead of V_MOV_B32 for clamp/neg/abs modifiers. The modifiers don't seem to have any effect with V_MOV_B32, supposedly it's meant to just move bits untouched. Fixes 46 piglit tests with radeonsi, though unfortunately 11 of those had just regressed because they started using the clamp modifier. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174890 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index afafa8c..ceab692 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -74,13 +74,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; case AMDGPU::CLAMP_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(0) // ABS .addImm(1) // CLAMP .addImm(0) // OMOD @@ -89,13 +87,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( break; case AMDGPU::FABS_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(1) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD @@ -104,13 +100,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( break; case AMDGPU::FNEG_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(0) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD -- cgit v1.1