Make the use of the vmla and vmls VFP instructions controllable via cmd line.

Preliminary testing shows significant performance wins by not using these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99436 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jim Grosbach <grosbach@apple.com> 2010-03-24 22:31:46 +0000
committer: Jim Grosbach <grosbach@apple.com> 2010-03-24 22:31:46 +0000
commit: 2676737e5ed3e4b5c89b4d06b60d998e9318eb73 (patch)
tree: 48ab73eca523ba3889720df84425b36b6be252fc /lib/Target/ARM
parent: 044be39090a702504c62fc0544fc977a6caa7112 (diff)
download: external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.zip
external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.tar.gz
external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.tar.bz2
5 files changed, 30 insertions, 4 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index f73707f..08ead22 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1369,6 +1369,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
   let Inst{4} = op4;
 }
 
+// Double precision, binary, VML[AS] (for additional predicate)
+class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+           dag iops, InstrItinClass itin, string opc, string asm,
+           list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-8}  = 0b1011;
+  let Inst{6} = op6;
+  let Inst{4} = op4;
+  list<Predicate> Predicates = [HasVFP2, UseVMLx];
+}
+
+
 // Single precision, unary
 class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 26a2806..f2ab06f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
 def UseMovt   : Predicate<"Subtarget->useMovt()">;
 def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
 
+def UseVMLx   : Predicate<"Subtarget->useVMLx()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aca8230..0458389 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
 // FP FMA Operations.
 //
 
-def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                 IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
@@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
                  [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst">;
 
-def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                 IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
@@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
                  (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
@@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
 def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
                  (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 2dad7f1..6b4438d 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,10 @@ static cl::opt<bool>
 UseNEONFP("arm-use-neon-fp",
           cl::desc("Use NEON for single-precision FP"),
           cl::init(false), cl::Hidden);
+static cl::opt<bool>
+UseVMLxInstructions("arm-use-vmlx",
+                    cl::desc("Use VFP vmla and vmls instructions"),
+                    cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
 UseMOVT("arm-use-movt",
@@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   : ARMArchVersion(V4)
   , ARMFPUType(None)
   , UseNEONForSinglePrecisionFP(UseNEONFP)
+  , UseVMLx(UseVMLxInstructions)
   , IsThumb(isT)
   , ThumbMode(Thumb1)
   , PostRAScheduler(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 2dc81a4..686684c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,6 +50,10 @@ protected:
   /// determine if NEON should actually be used.
   bool UseNEONForSinglePrecisionFP;
 
+  /// UseVMLx - If the VFP2 instructions are available, indicates whether
+  /// the VML[AS] instructions should be used.
+  bool UseVMLx;
+
   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
   bool IsThumb;
 
@@ -119,6 +123,7 @@ protected:
   bool hasNEON() const { return ARMFPUType >= NEON;  }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
+  bool useVMLx() const {return hasVFP2() && UseVMLx; }
 
   bool hasFP16() const { return HasFP16; }
author	Jim Grosbach <grosbach@apple.com>	2010-03-24 22:31:46 +0000
committer	Jim Grosbach <grosbach@apple.com>	2010-03-24 22:31:46 +0000
commit	2676737e5ed3e4b5c89b4d06b60d998e9318eb73 (patch)
tree	48ab73eca523ba3889720df84425b36b6be252fc /lib/Target/ARM
parent	044be39090a702504c62fc0544fc977a6caa7112 (diff)
download	external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.zip external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.tar.gz external_llvm-2676737e5ed3e4b5c89b4d06b60d998e9318eb73.tar.bz2