diff options
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 476 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXIntrinsics.td | 66 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ldu-reg-plus-offset.ll | 21 |
3 files changed, 451 insertions, 112 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 4457ec3..03a3aa4 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -787,194 +787,476 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { unsigned Opcode; SDLoc DL(N); SDNode *LD; - MemSDNode *Mem = cast<MemSDNode>(N); + SDValue Base, Offset, Addr; - EVT RetVT = Mem->getMemoryVT().getVectorElementType(); + EVT EltVT = Mem->getMemoryVT().getVectorElementType(); - // Select opcode - if (Subtarget.is64Bit()) { + if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; break; case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; break; case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; break; } break; case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; break; } break; } - } else { - switch (N->getOpcode()) { - default: - return NULL; - case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + + SDValue Ops[] = { Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 2)); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; + break; + } break; } - break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; + break; + } break; } - break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + } + + SDValue Ops[] = { Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 3)); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; + break; + } break; } - break; - case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; + break; + } break; } - break; } - } - SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 2)); + } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 93cdfef..14049b1 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1342,20 +1342,38 @@ int_nvvm_ldu_global_p>; // Elementized vector ldu multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } -multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int32Regs:$src), +multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } @@ -1452,20 +1470,38 @@ defm INT_PTX_LDG_GLOBAL_p64 // Elementized vector ldg multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int64Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), + !strconcat("ld.global.nc.", TyStr), []>; } multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ld.global.nc.", TyStr), []>; } diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll new file mode 100644 index 0000000..242e5b8 --- /dev/null +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + + +define void @reg_plus_offset(i32* %a) { +; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; +; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; + %p2 = getelementptr i32* %a, i32 8 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1 + %p3 = getelementptr i32* %a, i32 9 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1 + %t3 = mul i32 %t1, %t2 + store i32 %t3, i32* %a + ret void +} + +!1 = metadata !{ i32 4 } + +declare i32 @llvm.nvvm.ldu.global.i.i32(i32*) +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
\ No newline at end of file |