R600/SI: Add double precision fsub pattern for SI

Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186179 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2013-07-12 18:15:08 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2013-07-12 18:15:08 +0000
commit: d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1 (patch)
tree: ac1365c977003b8bbafb4a5891e95f3d856ccbf4
parent: 54453c11b429a4f90f64bd83e113c69008cbd9ed (diff)
download: external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.zip
external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.gz
external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.bz2
3 files changed, 42 insertions, 3 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 4d0fdf3..336bfbf 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -296,6 +296,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MI->eraseFromParent();
     break;
   }
+  case AMDGPU::V_SUB_F64: {
+    const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
+            MI->getOperand(0).getReg())
+            .addReg(MI->getOperand(1).getReg())
+            .addReg(MI->getOperand(2).getReg())
+            .addImm(0)  /* src2 */
+            .addImm(0)  /* ABS */
+            .addImm(0)  /* CLAMP */
+            .addImm(0)  /* OMOD */
+            .addImm(2); /* NEG */
+    MI->eraseFromParent();
+    break;
+  }
   }
   return BB;
 }
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 3deaa2e..eed4f7f 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1232,17 +1232,23 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 
 } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
 
-// This psuedo instruction takes a pointer as input and outputs a resource
-// constant that can be used with the ADDR64 MUBUF instructions.
-
 let usesCustomInserter = 1 in {
 
+// This psuedo instruction takes a pointer as input and outputs a resource
+// constant that can be used with the ADDR64 MUBUF instructions.
 def SI_ADDR64_RSRC : InstSI <
   (outs SReg_128:$srsrc),
   (ins SReg_64:$ptr),
   "", []
 >;
 
+def V_SUB_F64 : InstSI <
+  (outs VReg_64:$dst),
+  (ins VReg_64:$src0, VReg_64:$src1),
+  "V_SUB_F64 $dst, $src0, $src1",
+  []
+>;
+
 } // end usesCustomInserter
 
 } // end IsCodeGenOnly, isPseudo
@@ -1271,6 +1277,11 @@ def : Pat <
        $src0, $src1, $src2, $src3)
 >;
 
+def : Pat <
+  (f64 (fsub f64:$src0, f64:$src1)),
+  (V_SUB_F64 $src0, $src1)
+>;
+
 /********** ======================= **********/
 /********** Image sampling patterns **********/
 /********** ======================= **********/
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
new file mode 100644
index 0000000..fa59dcc
--- /dev/null
+++ b/test/CodeGen/R600/fsub64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; CHECK: @fsub_f64
+; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}, 0, 0, 0, 0, 2
+
+define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fsub double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>	2013-07-12 18:15:08 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2013-07-12 18:15:08 +0000
commit	d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1 (patch)
tree	ac1365c977003b8bbafb4a5891e95f3d856ccbf4
parent	54453c11b429a4f90f64bd83e113c69008cbd9ed (diff)
download	external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.zip external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.gz external_llvm-d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1.tar.bz2