diff options
Diffstat (limited to 'lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r-- | lib/Target/R600/SILowerControlFlow.cpp | 17 |
1 files changed, 13 insertions, 4 deletions
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index c2e8f02..958763d 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Vec = MI.getOperand(2).getReg(); unsigned Off = MI.getOperand(4).getImm(); + unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); + if (!SubReg) + SubReg = Vec; - MachineInstr *MovRel = + MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(SubReg + Off) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); @@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Off = MI.getOperand(4).getImm(); unsigned Val = MI.getOperand(5).getReg(); + unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); + if (!SubReg) + SubReg = Dst; MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(SubReg + Off, RegState::Define) .addReg(Val) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); @@ -409,6 +415,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); bool HaveKill = false; bool NeedM0 = false; @@ -476,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { IndirectSrc(MI); break; + case AMDGPU::SI_INDIRECT_DST_V1: case AMDGPU::SI_INDIRECT_DST_V2: case AMDGPU::SI_INDIRECT_DST_V4: case AMDGPU::SI_INDIRECT_DST_V8: @@ -487,6 +495,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { NeedWQM = true; // Fall through case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_ADD_U32_RTN: NeedM0 = true; break; @@ -508,7 +517,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::M0).addImm(0xffffffff); } - if (NeedWQM) { + if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); |