summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/MachineCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/MachineCombiner.cpp')
-rw-r--r--lib/CodeGen/MachineCombiner.cpp435
1 files changed, 435 insertions, 0 deletions
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
new file mode 100644
index 0000000..2931258
--- /dev/null
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -0,0 +1,435 @@
+//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The machine combiner pass uses machine trace metrics to ensure the combined
+// instructions does not lengthen the critical path or the resource depth.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "machine-combiner"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumInstCombined, "Number of machineinst combined");
+
+namespace {
+class MachineCombiner : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+
+ TargetSchedModel TSchedModel;
+
+ /// OptSize - True if optimizing for code size.
+ bool OptSize;
+
+public:
+ static char ID;
+ MachineCombiner() : MachineFunctionPass(ID) {
+ initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override { return "Machine InstCombiner"; }
+
+private:
+ bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool combineInstructions(MachineBasicBlock *);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace);
+ unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace);
+ bool
+ preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
+ bool preservesResourceLen(MachineBasicBlock *MBB,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs);
+ void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+};
+}
+
+char MachineCombiner::ID = 0;
+char &llvm::MachineCombinerID = MachineCombiner::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+ "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+ false, false)
+
+void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+ MachineInstr *DefInstr = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ DefInstr = MRI->getUniqueVRegDef(MO.getReg());
+ // PHI's have no depth etc.
+ if (DefInstr && DefInstr->isPHI())
+ DefInstr = nullptr;
+ return DefInstr;
+}
+
+/// getDepth - Computes depth of instructions in vector \InsInstr.
+///
+/// \param InsInstrs is a vector of machine instructions
+/// \param InstrIdxForVirtReg is a dense map of virtual register to index
+/// of defining machine instruction in \p InsInstrs
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
+unsigned
+MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ SmallVector<unsigned, 16> InstrDepth;
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Foreach instruction in in the new sequence compute the depth based on the
+ // operands. Use the trace information when possible. For new operands which
+ // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
+ for (auto *InstrPtr : InsInstrs) { // for each Use
+ unsigned IDepth = 0;
+ DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+ for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = InstrPtr->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isUse())
+ continue;
+ unsigned DepthOp = 0;
+ unsigned LatencyOp = 0;
+ DenseMap<unsigned, unsigned>::iterator II =
+ InstrIdxForVirtReg.find(MO.getReg());
+ if (II != InstrIdxForVirtReg.end()) {
+ // Operand is new virtual register not in trace
+ assert(II->second < InstrDepth.size() && "Bad Index");
+ MachineInstr *DefInstr = InsInstrs[II->second];
+ assert(DefInstr &&
+ "There must be a definition for a new virtual register");
+ DepthOp = InstrDepth[II->second];
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ MachineInstr *DefInstr = getOperandDef(MO);
+ if (DefInstr) {
+ DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ }
+ }
+ IDepth = std::max(IDepth, DepthOp + LatencyOp);
+ }
+ InstrDepth.push_back(IDepth);
+ }
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ return InstrDepth[NewRootIdx];
+}
+
+/// getLatency - Computes instruction latency as max of latency of defined
+/// operands
+///
+/// \param Root is a machine instruction that could be replaced by NewRoot.
+/// It is used to compute a more accurate latency information for NewRoot in
+/// case there is a dependent instruction in the same trace (\p BlockTrace)
+/// \param NewRoot is the instruction for which the latency is computed
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Latency of \p NewRoot
+unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Check each definition in NewRoot and compute the latency
+ unsigned NewRootLatency = 0;
+
+ for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = NewRoot->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isDef())
+ continue;
+ // Get the first instruction that uses MO
+ MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
+ RI++;
+ MachineInstr *UseMO = RI->getParent();
+ unsigned LatencyOp = 0;
+ if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
+ LatencyOp = TSchedModel.computeOperandLatency(
+ NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+ }
+ NewRootLatency = std::max(NewRootLatency, LatencyOp);
+ }
+ return NewRootLatency;
+}
+
+/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// lengthen the critical path. The DAGCombine code sequence ends in MI
+/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
+/// necessary condition for the new sequence to replace the old sequence is that
+/// is cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
+/// The slack is the number of cycles Root can be delayed before the critical
+/// patch becomes longer.
+bool MachineCombiner::preservesCriticalPathLen(
+ MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ // NewRoot is the last instruction in the \p InsInstrs vector
+ // Get depth and latency of NewRoot
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ MachineInstr *NewRoot = InsInstrs[NewRootIdx];
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
+
+ // Get depth, latency and slack of Root
+ unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+ unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+ unsigned RootSlack = BlockTrace.getInstrSlack(Root);
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth
+ << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
+ << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency "
+ << NewRootDepth + NewRootLatency << "\n";
+ dbgs() << " RootDepth + RootLatency + RootSlack "
+ << RootDepth + RootLatency + RootSlack << "\n";);
+
+ /// True when the new sequence does not lenghten the critical path.
+ return ((NewRootDepth + NewRootLatency) <=
+ (RootDepth + RootLatency + RootSlack));
+}
+
+/// helper routine to convert instructions into SC
+void MachineCombiner::instr2instrSC(
+ SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
+ for (auto *InstrPtr : Instrs) {
+ unsigned Opc = InstrPtr->getOpcode();
+ unsigned Idx = TII->get(Opc).getSchedClass();
+ const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
+ InstrsSC.push_back(SC);
+ }
+}
+/// preservesResourceLen - True when the new instructions do not increase
+/// resource length
+bool MachineCombiner::preservesResourceLen(
+ MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs) {
+
+ // Compute current resource length
+
+ //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+ SmallVector <const MachineBasicBlock *, 1> MBBarr;
+ MBBarr.push_back(MBB);
+ unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
+
+ // Deal with SC rather than Instructions.
+ SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
+ SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
+
+ instr2instrSC(InsInstrs, InsInstrsSC);
+ instr2instrSC(DelInstrs, DelInstrsSC);
+
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
+
+ // Compute new resource length
+ unsigned ResLenAfterCombine =
+ BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
+
+ DEBUG(dbgs() << "RESOURCE DATA: \n";
+ dbgs() << " resource len before: " << ResLenBeforeCombine
+ << " after: " << ResLenAfterCombine << "\n";);
+
+ return ResLenAfterCombine <= ResLenBeforeCombine;
+}
+
+/// \returns true when new instruction sequence should be generated
+/// independent if it lenghtens critical path or not
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
+ if (OptSize && (NewSize < OldSize))
+ return true;
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
+ return false;
+}
+
+/// combineInstructions - substitute a slow code sequence with a faster one by
+/// evaluating instruction combining pattern.
+/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
+/// combining based on machine trace metrics. Only combine a sequence of
+/// instructions when this neither lengthens the critical path nor increases
+/// resource pressure. When optimizing for codesize always combine when the new
+/// sequence is shorter.
+bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+
+ auto BlockIter = MBB->begin();
+
+ while (BlockIter != MBB->end()) {
+ auto &MI = *BlockIter++;
+
+ DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
+ SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
+ // The motivating example is:
+ //
+ // MUL Other MUL_op1 MUL_op2 Other
+ // \ / \ | /
+ // ADD/SUB => MADD/MSUB
+ // (=Root) (=NewRoot)
+
+ // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
+ // usually beneficial for code size it unfortunately can hurt performance
+ // when the ADD is on the critical path, but the MUL is not. With the
+ // substitution the MUL becomes part of the critical path (in form of the
+ // MADD) and can lengthen it on architectures where the MADD latency is
+ // longer than the ADD latency.
+ //
+ // For each instruction we check if it can be the root of a combiner
+ // pattern. Then for each pattern the new code sequence in form of MI is
+ // generated and evaluated. When the efficiency criteria (don't lengthen
+ // critical path, don't use more resources) is met the new sequence gets
+ // hooked up into the basic block before the old sequence is removed.
+ //
+ // The algorithm does not try to evaluate all patterns and pick the best.
+ // This is only an artificial restriction though. In practice there is
+ // mostly one pattern and hasPattern() can order patterns based on an
+ // internal cost heuristic.
+
+ if (TII->hasPattern(MI, Pattern)) {
+ for (auto P : Pattern) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!InsInstrs.size())
+ continue;
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lenghten the critical path nor increases
+ // resource pressure.
+ if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
+ (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) & MI,
+ (MachineInstr *)InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+ // Eagerly stop after the first pattern fired
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ for (auto *InstrPtr : InsInstrs) {
+ MachineFunction *MF = MBB->getParent();
+ MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
+ }
+ }
+ InstrIdxForVirtReg.clear();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ TSchedModel.init(SchedModel, &STI, TII);
+ MRI = &MF.getRegInfo();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ OptSize = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ if (!TII->useMachineCombiner()) {
+ DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
+ return false;
+ }
+
+ bool Changed = false;
+
+ // Try to combine instructions.
+ for (auto &MBB : MF)
+ Changed |= combineInstructions(&MBB);
+
+ return Changed;
+}