summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-07-12 18:15:08 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-07-12 18:15:08 +0000
commitd2442c10f9bfe8a9d6cdcb28030d32deb5b192b1 (patch)
treeac1365c977003b8bbafb4a5891e95f3d856ccbf4
parent54453c11b429a4f90f64bd83e113c69008cbd9ed (diff)
downloadexternal_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.zip
external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.gz
external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.bz2
R600/SI: Add double precision fsub pattern for SI
Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186179 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIISelLowering.cpp15
-rw-r--r--lib/Target/R600/SIInstructions.td17
-rw-r--r--test/CodeGen/R600/fsub64.ll13
3 files changed, 42 insertions, 3 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 4d0fdf3..336bfbf 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -296,6 +296,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
+ case AMDGPU::V_SUB_F64: {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(0) /* src2 */
+ .addImm(0) /* ABS */
+ .addImm(0) /* CLAMP */
+ .addImm(0) /* OMOD */
+ .addImm(2); /* NEG */
+ MI->eraseFromParent();
+ break;
+ }
}
return BB;
}
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 3deaa2e..eed4f7f 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1232,17 +1232,23 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
-// This psuedo instruction takes a pointer as input and outputs a resource
-// constant that can be used with the ADDR64 MUBUF instructions.
-
let usesCustomInserter = 1 in {
+// This psuedo instruction takes a pointer as input and outputs a resource
+// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
(ins SReg_64:$ptr),
"", []
>;
+def V_SUB_F64 : InstSI <
+ (outs VReg_64:$dst),
+ (ins VReg_64:$src0, VReg_64:$src1),
+ "V_SUB_F64 $dst, $src0, $src1",
+ []
+>;
+
} // end usesCustomInserter
} // end IsCodeGenOnly, isPseudo
@@ -1271,6 +1277,11 @@ def : Pat <
$src0, $src1, $src2, $src3)
>;
+def : Pat <
+ (f64 (fsub f64:$src0, f64:$src1)),
+ (V_SUB_F64 $src0, $src1)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
new file mode 100644
index 0000000..fa59dcc
--- /dev/null
+++ b/test/CodeGen/R600/fsub64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; CHECK: @fsub_f64
+; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}, 0, 0, 0, 0, 2
+
+define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double addrspace(1)* %in1
+ %r1 = load double addrspace(1)* %in2
+ %r2 = fsub double %r0, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}