diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-11-13 23:36:50 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-11-13 23:36:50 +0000 |
commit | a2b4eb6d15a13de257319ac6231b5ab622cd02b1 (patch) | |
tree | 3147a7994db9c80cbaa22526fae0dbfdc780c212 | |
parent | b52bf6a3b31596a309f4b12884522e9b4a344654 (diff) | |
download | external_llvm-a2b4eb6d15a13de257319ac6231b5ab622cd02b1.zip external_llvm-a2b4eb6d15a13de257319ac6231b5ab622cd02b1.tar.gz external_llvm-a2b4eb6d15a13de257319ac6231b5ab622cd02b1.tar.bz2 |
R600/SI: Add support for private address space load/store
Private address space is emulated using the register file with
MOVRELS and MOVRELD instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194626 91177308-0d34-0410-b5e6-96231b3b80d8
28 files changed, 429 insertions, 154 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 88b375b..a989135 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -309,6 +309,40 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } + case AMDGPUISD::REGISTER_LOAD: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + break; + SDValue Addr, Offset; + + SelectADDRIndirect(N->getOperand(1), Addr, Offset); + const SDValue Ops[] = { + Addr, + Offset, + CurDAG->getTargetConstant(0, MVT::i32), + N->getOperand(0), + }; + return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), + CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), + Ops); + } + case AMDGPUISD::REGISTER_STORE: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + break; + SDValue Addr, Offset; + SelectADDRIndirect(N->getOperand(2), Addr, Offset); + const SDValue Ops[] = { + N->getOperand(1), + Addr, + Offset, + CurDAG->getTargetConstant(0, MVT::i32), + N->getOperand(0), + }; + return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), + CurDAG->getVTList(MVT::Other), + Ops); + } } return SelectCode(N); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 36de482..51ad217 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUFrameLowering.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" @@ -250,8 +251,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) // AMDGPU DAG lowering case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); } @@ -326,6 +327,21 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, &Args[0], Args.size()); } +SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = + static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); + + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); + assert(FIN); + + unsigned FrameIndex = FIN->getIndex(); + unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); + return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), + Op.getValueType()); +} SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -563,7 +579,8 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } StoreSDNode *Store = cast<StoreSDNode>(Op); - if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Store->getValue().getValueType().isVector()) { return SplitVectorStore(Op, DAG); } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index a3554a5..dacb086 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -28,6 +28,7 @@ private: void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Args, unsigned Start, unsigned Count) const; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 592dcbf..1b2e131 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -120,31 +120,43 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); + int OffsetOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr); + // addr is a custom operand with multiple MI operands, and only the + // first MI operand is given a name. + int RegOpIdx = OffsetOpIdx + 1; + int ChanOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan); if (isRegisterLoad(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); + int DstOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { - buildMovInstr(MBB, MI, MI->getOperand(0).getReg(), + buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { - buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(), + buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(), Address, OffsetReg); } } else if (isRegisterStore(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); + int ValOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val); + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), - MI->getOperand(0).getReg()); + MI->getOperand(ValOpIdx).getReg()); } else { - buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(), - calculateIndirectAddress(RegIndex, Channel), - OffsetReg); + buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(), + calculateIndirectAddress(RegIndex, Channel), + OffsetReg); } } else { return false; @@ -260,6 +272,57 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; } +int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = -1; + + if (MFI->getNumObjects() == 0) { + return -1; + } + + if (MRI.livein_empty()) { + return 0; + } + + const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + unsigned Reg = LI->first; + if (TargetRegisterInfo::isVirtualRegister(Reg) || + !IndirectRC->contains(Reg)) + continue; + + unsigned RegIndex; + unsigned RegEnd; + for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; + ++RegIndex) { + if (IndirectRC->getRegister(RegIndex) == Reg) + break; + } + Offset = std::max(Offset, (int)RegIndex); + } + + return Offset + 1; +} + +int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + if (MFI->getNumObjects() == 0) { + return -1; + } + + Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); + + return getIndirectIndexBegin(MF) + Offset; +} + void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const { diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index dc65d4e..6378fdd 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -99,6 +99,14 @@ protected: MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) const; + /// \returns the smallest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + /// \returns the largest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + public: bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; @@ -144,14 +152,6 @@ public: virtual unsigned getIEQOpcode() const = 0; virtual bool isMov(unsigned opcode) const = 0; - /// \returns the smallest register index that will be accessed by an indirect - /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0; - - /// \returns the largest register index that will be accessed by an indirect - /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0; - /// \brief Calculate the "Indirect Address" for the given \p RegIndex and /// \p Channel /// diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 0c81a6b..3e1fc27 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -35,6 +35,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> } def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; def COND_EQ : PatLeaf < (cond), @@ -277,6 +278,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst < multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, ComplexPattern addrPat> { +let UseNamedOperandTable = 1 in { + def RegisterLoad : AMDGPUShaderInst < (outs dstClass:$dst), (ins addrClass:$addr, i32imm:$chan), @@ -295,6 +298,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, let isRegisterStore = 1; } } +} } // End isCodeGenOnly = 1, isPseudo = 1 diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 135d3dd..688e1a0 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -50,6 +50,10 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { assert(!"Unimplemented"); return NULL; } + virtual unsigned getHWRegIndex(unsigned Reg) const { + assert(!"Unimplemented"); return 0; + } + /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a236a3e..4b6c2eb 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -519,7 +519,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -843,20 +842,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, false, false, false, 0); } -SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { - - MachineFunction &MF = DAG.getMachineFunction(); - const AMDGPUFrameLowering *TFL = - static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); - - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); - assert(FIN); - - unsigned FrameIndex = FIN->getIndex(); - unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); - return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); -} - bool R600TargetLowering::isZero(SDValue Op) const { if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { return Cst->isNullValue(); diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 6457ad4..c10257e 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -59,7 +59,6 @@ private: SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index a11d54a..aff11ce 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -1024,67 +1024,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } -int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int Offset = 0; - - if (MFI->getNumObjects() == 0) { - return -1; - } - - if (MRI.livein_empty()) { - return 0; - } - - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - LE = MRI.livein_end(); - LI != LE; ++LI) { - Offset = std::max(Offset, - GET_REG_INDEX(RI.getEncodingValue(LI->first))); - } - - return Offset + 1; -} - -int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { - int Offset = 0; - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Variable sized objects are not supported - assert(!MFI->hasVarSizedObjects()); - - if (MFI->getNumObjects() == 0) { - return -1; - } - - Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); - - return getIndirectIndexBegin(MF) + Offset; -} - -std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( +void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); - std::vector<unsigned> Regs; unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); - if (End == -1) { - return Regs; - } + if (End == -1) + return; for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); - Regs.push_back(SuperReg); + Reserved.set(SuperReg); for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); - Regs.push_back(Reg); + Reserved.set(Reg); } } - return Regs; } unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index d7438ef..e2996c7 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -193,14 +193,9 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} - /// \returns a list of all the registers that may be accesed using indirect - /// addressing. - std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const; - - virtual int getIndirectIndexBegin(const MachineFunction &MF) const; - - virtual int getIndirectIndexEnd(const MachineFunction &MF) const; - + /// \brief Reserve the registers that may be accesed using indirect addressing. + void reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const; virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0b2e6ec..d8e431a 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; -def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index dd8f3ef..fbe333d 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm) BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); Reserved.set(AMDGPU::ONE); @@ -48,14 +50,8 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } - const R600InstrInfo *RII = - static_cast<const R600InstrInfo*>(TM.getInstrInfo()); - std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF); - for (std::vector<unsigned>::iterator I = IndirectRegs.begin(), - E = IndirectRegs.end(); - I != E; ++I) { - Reserved.set(*I); - } + TII->reserveIndirectRegisters(Reserved, MF); + return Reserved; } @@ -73,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; } +unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const { + return GET_REG_INDEX(getEncodingValue(Reg)); +} + const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( MVT VT) const { switch(VT.SimpleTy) { diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index d458e55..8833ee7 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -39,6 +39,8 @@ struct R600RegisterInfo : public AMDGPURegisterInfo { /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; + virtual unsigned getHWRegIndex(unsigned Reg) const; + /// \brief get the register class of the specified type to use in the /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 371572e..3519aad 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -75,6 +75,19 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); + // We need to custom lower loads/stores from private memory + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i128, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -95,6 +108,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); @@ -106,6 +120,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i64, Custom); setTargetDAGCombine(ISD::SELECT_CC); @@ -122,6 +137,8 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const { // XXX: This depends on the address space and also we may want to revist // the alignment values we specify in the DataLayout. + if (!VT.isSimple() || VT == MVT::Other) + return false; return VT.bitsGT(MVT::i32); } @@ -350,6 +367,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } + case AMDGPU::SI_RegisterStorePseudo: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder MIB = + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), + Reg); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + MIB.addOperand(MI->getOperand(i)); + + MI->eraseFromParent(); + } } return BB; } @@ -395,7 +425,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); - if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Op.getValueType().isVector()) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), @@ -403,20 +434,13 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { }; return DAG.getMergeValues(MergedValues, 2, SDLoc(Op)); } else { - return SDValue(); + return LowerLOAD(Op, DAG); } } - case ISD::STORE: { - StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); - if (Store->getValue().getValueType().isVector() && - Store->getValue().getValueType().getVectorNumElements() >= 8) - return SplitVectorStore(Op, DAG); - else - return AMDGPUTargetLowering::LowerOperation(Op, DAG); - } case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::ANY_EXTEND: // Fall-through case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); @@ -628,6 +652,30 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + LoadSDNode *Load = cast<LoadSDNode>(Op); + + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + return SDValue(); + + SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Load->getBasePtr(), DAG.getConstant(0, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + DAG.getConstant(2, MVT::i32)); + + SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + SDValue MergedValues[2] = { + Ret, + Load->getChain() + }; + return DAG.getMergeValues(MergedValues, 2, DL); + +} + SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const { @@ -685,6 +733,56 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op, return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi); } +SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + StoreSDNode *Store = cast<StoreSDNode>(Op); + EVT VT = Store->getMemoryVT(); + + SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); + if (Ret.getNode()) + return Ret; + + if (VT.isVector() && VT.getVectorNumElements() >= 8) + return SplitVectorStore(Op, DAG); + + if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + return SDValue(); + + SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + DAG.getConstant(2, MVT::i32)); + SDValue Chain = Store->getChain(); + SmallVector<SDValue, 8> Values; + + if (VT == MVT::i64) { + for (unsigned i = 0; i < 2; ++i) { + Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Store->getValue(), DAG.getConstant(i, MVT::i32))); + } + } else if (VT == MVT::i128) { + for (unsigned i = 0; i < 2; ++i) { + for (unsigned j = 0; j < 2; ++j) { + Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, + Store->getValue(), DAG.getConstant(i, MVT::i32)), + DAG.getConstant(j, MVT::i32))); + } + } + } else { + Values.push_back(Store->getValue()); + } + + for (unsigned i = 0; i < Values.size(); ++i) { + SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, + Ptr, DAG.getConstant(i, MVT::i32)); + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Values[i], PartPtr, + DAG.getTargetConstant(0, MVT::i32)); + } + return Chain; +} + + SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 1ac6e0d..384caf4 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -25,8 +25,10 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue Chain, unsigned Offset) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7e42fb7..7ef662e 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -186,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { - if (!Op.isReg()) + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) return std::make_pair(0, 0); unsigned Reg = Op.getReg(); diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b8e75cb..b4380d07 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -230,7 +230,8 @@ MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const { - llvm_unreachable("Not Implemented"); + return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), + DstReg) .addReg(SrcReg); } bool SIInstrInfo::isMov(unsigned Opcode) const { @@ -603,17 +604,8 @@ unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, return RegIndex; } - -int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { - llvm_unreachable("Unimplemented"); -} - -int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { - llvm_unreachable("Unimplemented"); -} - const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { - llvm_unreachable("Unimplemented"); + return &AMDGPU::VReg_32RegClass; } MachineInstrBuilder SIInstrInfo::buildIndirectWrite( @@ -621,7 +613,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite( MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - llvm_unreachable("Unimplemented"); + const DebugLoc &DL = MBB->findDebugLoc(I); + unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( + getIndirectIndexBegin(*MBB->getParent())); + + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) + .addReg(IndirectBaseReg, RegState::Define) + .addOperand(I->getOperand(0)) + .addReg(IndirectBaseReg) + .addReg(OffsetReg) + .addImm(0) + .addReg(ValueReg); } MachineInstrBuilder SIInstrInfo::buildIndirectRead( @@ -629,5 +631,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead( MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - llvm_unreachable("Unimplemented"); + const DebugLoc &DL = MBB->findDebugLoc(I); + unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( + getIndirectIndexBegin(*MBB->getParent())); + + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) + .addOperand(I->getOperand(0)) + .addOperand(I->getOperand(1)) + .addReg(IndirectBaseReg) + .addReg(OffsetReg) + .addImm(0); + +} + +void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const { + int End = getIndirectIndexEnd(MF); + int Begin = getIndirectIndexBegin(MF); + + if (End == -1) + return; + + + for (int Index = Begin; Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 1); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 2); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 3); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 7); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 15); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); } diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 1ab3786..84ebc96 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo { private: const SIRegisterInfo RI; + MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned OffsetVGPR, + unsigned MovRelOp, + unsigned Dst, + unsigned Src0) const; + // If you add or remove instructions from this function, you will + public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); @@ -58,9 +66,6 @@ public: virtual bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const; - virtual int getIndirectIndexBegin(const MachineFunction &MF) const; - - virtual int getIndirectIndexEnd(const MachineFunction &MF) const; bool isSALUInstr(const MachineInstr &MI) const; unsigned getVALUOp(const MachineInstr &MI) const; @@ -114,7 +119,12 @@ public: unsigned ValueReg, unsigned Address, unsigned OffsetReg) const; - }; + void reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const; + + void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, + unsigned SavReg, unsigned IndexReg) const; +}; namespace AMDGPU { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index b55f59d..4cd0daa 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -121,6 +121,10 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{ return false; }]>; +def FRAMEri64 : Operand<iPTR> { + let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index); +} + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 1823168..fb9ae45 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1293,6 +1293,36 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { +//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>; + +let UseNamedOperandTable = 1 in { + +def SI_RegisterLoad : AMDGPUShaderInst < + (outs VReg_32:$dst, SReg_64:$temp), + (ins FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterLoad = 1; + let mayLoad = 1; +} + +class SIRegStore<dag outs> : AMDGPUShaderInst < + outs, + (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterStore = 1; + let mayStore = 1; +} + +let usesCustomInserter = 1 in { +def SI_RegisterStorePseudo : SIRegStore<(outs)>; +} // End usesCustomInserter = 1 +def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; + + +} // End UseNamedOperandTable = 1 + def SI_INDIRECT_SRC : InstSI < (outs VReg_32:$dst, SReg_64:$temp), (ins unknown:$src, VSrc_32:$idx, i32imm:$off), @@ -1309,6 +1339,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI < let Constraints = "$src = $dst"; } +def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; @@ -1988,7 +2019,7 @@ def : Pat< (V_CMP_U_F32_e64 $src0, $src1) >; -//============================================================================// +//===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// @@ -2000,6 +2031,11 @@ def : Pat < >; def : Pat < + (i32 (trunc i64:$a)), + (EXTRACT_SUBREG $a, sub0) +>; + +def : Pat < (or i64:$a, i64:$b), (INSERT_SUBREG (INSERT_SUBREG (IMPLICIT_DEF), diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index a6c43bb..958763d 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Vec = MI.getOperand(2).getReg(); unsigned Off = MI.getOperand(4).getImm(); + unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); + if (!SubReg) + SubReg = Vec; - MachineInstr *MovRel = + MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(SubReg + Off) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); @@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Off = MI.getOperand(4).getImm(); unsigned Val = MI.getOperand(5).getReg(); + unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); + if (!SubReg) + SubReg = Dst; MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(SubReg + Off, RegState::Define) .addReg(Val) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); @@ -477,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { IndirectSrc(MI); break; + case AMDGPU::SI_INDIRECT_DST_V1: case AMDGPU::SI_INDIRECT_DST_V2: case AMDGPU::SI_INDIRECT_DST_V4: case AMDGPU::SI_INDIRECT_DST_V8: diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index e06a022..0bbad09 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "SIRegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIInstrInfo.h" using namespace llvm; @@ -26,6 +27,9 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm) BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::EXEC); + Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); + const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo()); + TII->reserveIndirectRegisters(Reserved, MF); return Reserved; } @@ -51,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( } } +unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { + return getEncodingValue(Reg); +} + const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { assert(!TargetRegisterInfo::isVirtualRegister(Reg)); diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index ba831b0..8148f7f 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -42,6 +42,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + virtual unsigned getHWRegIndex(unsigned Reg) const; + /// \brief Return the 'base' register class for this register. /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc. const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 632509c..e4492d7 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -299,8 +299,6 @@ entry: ; R600-CHECK: 31 ; SI-CHECK-LABEL: @load_i64_sext ; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]] -; SI-CHECK: V_LSHL_B64 [[LSHL:v\[[0-9]:[0-9]\]]], [[VAL]], 32 -; SI-CHECK: V_ASHR_I64 v{{\[[0-9]:[0-9]\]}}, [[LSHL]], 32 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/private-memory.ll index 1ef6c35..48a013c 100644 --- a/test/CodeGen/R600/indirect-addressing.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,16 +1,24 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; This test checks that uses and defs of the AR register happen in the same ; instruction clause. -; CHECK: @mova_same_clause -; CHECK: MOVA_INT -; CHECK-NOT: ALU clause -; CHECK: 0 + AR.x -; CHECK: MOVA_INT -; CHECK-NOT: ALU clause -; CHECK: 0 + AR.x +; R600-CHECK-LABEL: @mova_same_clause +; R600-CHECK: MOVA_INT +; R600-CHECK-NOT: ALU clause +; R600-CHECK: 0 + AR.x +; R600-CHECK: MOVA_INT +; R600-CHECK-NOT: ALU clause +; R600-CHECK: 0 + AR.x +; SI-CHECK-LABEL: @mova_same_clause +; SI-CHECK: V_READFIRSTLANE +; SI-CHECK: V_MOVRELD +; SI-CHECK: S_CBRANCH +; SI-CHECK: V_READFIRSTLANE +; SI-CHECK: V_MOVRELD +; SI-CHECK: S_CBRANCH define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -38,9 +46,10 @@ entry: ; XXX: This generated code has unnecessary MOVs, we should be able to optimize ; this. -; CHECK: @multiple_structs -; CHECK-NOT: MOVA_INT - +; R600-CHECK-LABEL: @multiple_structs +; R600-CHECK-NOT: MOVA_INT +; SI-CHECK-LABEL: @multiple_structs +; SI-CHECK-NOT: V_MOVREL %struct.point = type { i32, i32 } define void @multiple_structs(i32 addrspace(1)* %out) { @@ -68,8 +77,10 @@ entry: ; loads and stores should be lowered to copies, so there shouldn't be any ; MOVA instructions. -; CHECK: @direct_loop -; CHECK-NOT: MOVA_INT +; R600-CHECK-LABLE: @direct_loop +; R600-CHECK-NOT: MOVA_INT +; SI-CHECK-LABEL: @direct_loop +; SI-CHECK-NOT: V_MOVREL define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index 2926163..fe9df10 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -43,7 +43,7 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ;EG-CHECK: ASHR ;SI-CHECK-LABEL: @ashr_i64 -;SI-CHECK: V_ASHR_I64 +;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: %0 = sext i32 %in to i64 diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 3d192d9..2824ff8 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,8 +1,7 @@ ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @unaligned_load_store_i32: -; SI: V_ADD_I32_e64 [[REG:v[0-9]+]] -; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]] +; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]] define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { %v = load i32 addrspace(3)* %p, align 1 store i32 %v, i32 addrspace(3)* %r, align 1 @@ -10,8 +9,7 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r } ; SI-LABEL: @unaligned_load_store_v4i32: -; SI: V_ADD_I32_e64 [[REG:v[0-9]+]] -; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]] +; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]] define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { %v = load <4 x i32> addrspace(3)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 |