diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64CollectLOH.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64CollectLOH.cpp | 1117 |
1 files changed, 1117 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp new file mode 100644 index 0000000..6b1f096 --- /dev/null +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -0,0 +1,1117 @@ +//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that collect the Linker Optimization Hint (LOH). +// This pass should be run at the very end of the compilation flow, just before +// assembly printer. +// To be useful for the linker, the LOH must be printed into the assembly file. +// +// A LOH describes a sequence of instructions that may be optimized by the +// linker. +// This same sequence cannot be optimized by the compiler because some of +// the information will be known at link time. +// For instance, consider the following sequence: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: ldr xC, [xB, #imm] +// This sequence can be turned into: +// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: +// L3: ldr xC, sym+#imm +// It may also be turned into either the following more efficient +// code sequences: +// - If sym@PAGEOFF + #imm fits the encoding space of L3. +// L1: adrp xA, sym@PAGE +// L3: ldr xC, [xB, sym@PAGEOFF + #imm] +// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: +// L1: adr xA, sym +// L3: ldr xC, [xB, #imm] +// +// To be valid a LOH must meet all the requirements needed by all the related +// possible linker transformations. +// For instance, using the running example, the constraints to emit +// ".loh AdrpAddLdr" are: +// - L1, L2, and L3 instructions are of the expected type, i.e., +// respectively ADRP, ADD (immediate), and LD. +// - The result of L1 is used only by L2. +// - The register argument (xA) used in the ADD instruction is defined +// only by L1. +// - The result of L2 is used only by L3. +// - The base address (xB) in L3 is defined only L2. +// - The ADRP in L1 and the ADD in L2 must reference the same symbol using +// @PAGE/@PAGEOFF with no additional constants +// +// Currently supported LOHs are: +// * So called non-ADRP-related: +// - .loh AdrpAddLdr L1, L2, L3: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: ldr xC, [xB, #imm] +// - .loh AdrpLdrGotLdr L1, L2, L3: +// L1: adrp xA, sym@GOTPAGE +// L2: ldr xB, [xA, sym@GOTPAGEOFF] +// L3: ldr xC, [xB, #imm] +// - .loh AdrpLdr L1, L3: +// L1: adrp xA, sym@PAGE +// L3: ldr xC, [xA, sym@PAGEOFF] +// - .loh AdrpAddStr L1, L2, L3: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: str xC, [xB, #imm] +// - .loh AdrpLdrGotStr L1, L2, L3: +// L1: adrp xA, sym@GOTPAGE +// L2: ldr xB, [xA, sym@GOTPAGEOFF] +// L3: str xC, [xB, #imm] +// - .loh AdrpAdd L1, L2: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// For all these LOHs, L1, L2, L3 form a simple chain: +// L1 result is used only by L2 and L2 result by L3. +// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument +// by L1. +// All these LOHs aim at using more efficient load/store patterns by folding +// some instructions used to compute the address directly into the load/store. +// +// * So called ADRP-related: +// - .loh AdrpAdrp L2, L1: +// L2: ADRP xA, sym1@PAGE +// L1: ADRP xA, sym2@PAGE +// L2 dominates L1 and xA is not redifined between L2 and L1 +// This LOH aims at getting rid of redundant ADRP instructions. +// +// The overall design for emitting the LOHs is: +// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. +// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: +// 1. Associates them a label. +// 2. Emits them in a MCStreamer (EmitLOHDirective). +// - The MCMachOStreamer records them into the MCAssembler. +// - The MCAsmStreamer prints them. +// - Other MCStreamers ignore them. +// 3. Closes the MCStreamer: +// - The MachObjectWriter gets them from the MCAssembler and writes +// them in the object file. +// - Other ObjectWriters ignore them. +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-collect-loh" + +static cl::opt<bool> +PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, + cl::desc("Restrict analysis to registers invovled" + " in LOHs"), + cl::init(true)); + +static cl::opt<bool> +BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, + cl::desc("Restrict analysis at basic block scope"), + cl::init(true)); + +STATISTIC(NumADRPSimpleCandidate, + "Number of simplifiable ADRP dominate by another"); +STATISTIC(NumADRPComplexCandidate2, + "Number of simplifiable ADRP reachable by 2 defs"); +STATISTIC(NumADRPComplexCandidate3, + "Number of simplifiable ADRP reachable by 3 defs"); +STATISTIC(NumADRPComplexCandidateOther, + "Number of simplifiable ADRP reachable by 4 or more defs"); +STATISTIC(NumADDToSTRWithImm, + "Number of simplifiable STR with imm reachable by ADD"); +STATISTIC(NumLDRToSTRWithImm, + "Number of simplifiable STR with imm reachable by LDR"); +STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); +STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); +STATISTIC(NumADDToLDRWithImm, + "Number of simplifiable LDR with imm reachable by ADD"); +STATISTIC(NumLDRToLDRWithImm, + "Number of simplifiable LDR with imm reachable by LDR"); +STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); +STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); +STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); +STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); +STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); +STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); +STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); +STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); +STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); + +namespace llvm { +void initializeAArch64CollectLOHPass(PassRegistry &); +} + +namespace { +struct AArch64CollectLOH : public MachineFunctionPass { + static char ID; + AArch64CollectLOH() : MachineFunctionPass(ID) { + initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "AArch64 Collect Linker Optimization Hint (LOH)"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineDominatorTree>(); + } + +private: +}; + +/// A set of MachineInstruction. +typedef SetVector<const MachineInstr *> SetOfMachineInstr; +/// Map a basic block to a set of instructions per register. +/// This is used to represent the exposed uses of a basic block +/// per register. +typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *> +BlockToSetOfInstrsPerColor; +/// Map a basic block to an instruction per register. +/// This is used to represent the live-out definitions of a basic block +/// per register. +typedef MapVector<const MachineBasicBlock *, const MachineInstr **> +BlockToInstrPerColor; +/// Map an instruction to a set of instructions. Used to represent the +/// mapping def to reachable uses or use to definitions. +typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs; +/// Map a basic block to a BitVector. +/// This is used to record the kill registers per basic block. +typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet; + +/// Map a register to a dense id. +typedef DenseMap<unsigned, unsigned> MapRegToId; +/// Map a dense id to a register. Used for debug purposes. +typedef SmallVector<unsigned, 32> MapIdToReg; +} // end anonymous namespace. + +char AArch64CollectLOH::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, + false) + +/// Given a couple (MBB, reg) get the corresponding set of instruction from +/// the given "sets". +/// If this couple does not reference any set, an empty set is added to "sets" +/// for this couple and returned. +/// \param nbRegs is used internally allocate some memory. It must be consistent +/// with the way sets is used. +static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, + const MachineBasicBlock &MBB, unsigned reg, + unsigned nbRegs) { + SetOfMachineInstr *result; + BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); + if (it != sets.end()) + result = it->second; + else + result = sets[&MBB] = new SetOfMachineInstr[nbRegs]; + + return result[reg]; +} + +/// Given a couple (reg, MI) get the corresponding set of instructions from the +/// the given "sets". +/// This is used to get the uses record in sets of a definition identified by +/// MI and reg, i.e., MI defines reg. +/// If the couple does not reference anything, an empty set is added to +/// "sets[reg]". +/// \pre set[reg] is valid. +static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + return sets[reg][&MI]; +} + +/// Same as getUses but does not modify the input map: sets. +/// \return NULL if the couple (reg, MI) is not in sets. +static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + InstrToInstrs::const_iterator Res = sets[reg].find(&MI); + if (Res != sets[reg].end()) + return &(Res->second); + return nullptr; +} + +/// Initialize the reaching definition algorithm: +/// For each basic block BB in MF, record: +/// - its kill set. +/// - its reachable uses (uses that are exposed to BB's predecessors). +/// - its the generated definitions. +/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to +/// the list of uses of exposed defintions. +/// \param ADRPMode specifies to only consider ADRP instructions for generated +/// definition. It also consider definitions of ADRP instructions as uses and +/// ignore other uses. The ADRPMode is used to collect the information for LHO +/// that involve ADRP operation only. +static void initReachingDef(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + const MapRegToId &RegToId, + const MachineInstr *DummyOp, bool ADRPMode) { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + unsigned NbReg = RegToId.size(); + + for (MachineBasicBlock &MBB : MF) { + const MachineInstr **&BBGen = Gen[&MBB]; + BBGen = new const MachineInstr *[NbReg]; + memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg); + + BitVector &BBKillSet = Kill[&MBB]; + BBKillSet.resize(NbReg); + for (const MachineInstr &MI : MBB) { + bool IsADRP = MI.getOpcode() == AArch64::ADRP; + + // Process uses first. + if (IsADRP || !ADRPMode) + for (const MachineOperand &MO : MI.operands()) { + // Treat ADRP def as use, as the goal of the analysis is to find + // ADRP defs reached by other ADRP defs. + if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || + (ADRPMode && (!IsADRP || !MO.isDef()))) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + CurReg = ItCurRegId->second; + + // if CurReg has not been defined, this use is reachable. + if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) + getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); + // current basic block definition for this color, if any, is in Gen. + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); + } + + // Process clobbers. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + // Clobbers kill the related colors. + const uint32_t *PreservedRegs = MO.getRegMask(); + + // Set generated regs. + for (const auto Entry : RegToId) { + unsigned Reg = Entry.second; + // Use the global register ID when querying APIs external to this + // pass. + if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { + // Do not register clobbered definition for no ADRP. + // This definition is not used anyway (otherwise register + // allocation is wrong). + BBGen[Reg] = ADRPMode ? &MI : nullptr; + BBKillSet.set(Reg); + } + } + } + + // Process register defs. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { + MapRegToId::const_iterator ItRegId = RegToId.find(*AI); + assert(ItRegId != RegToId.end() && + "Sub-register of an " + "involved register, not recorded as involved!"); + BBKillSet.set(ItRegId->second); + BBGen[ItRegId->second] = &MI; + } + BBGen[ItCurRegId->second] = &MI; + } + } + + // If we restrict our analysis to basic block scope, conservatively add a + // dummy + // use for each generated value. + if (!ADRPMode && DummyOp && !MBB.succ_empty()) + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); + } +} + +/// Reaching def core algorithm: +/// while an Out has changed +/// for each bb +/// for each color +/// In[bb][color] = U Out[bb.predecessors][color] +/// insert reachableUses[bb][color] in each in[bb][color] +/// op.reachedUses +/// +/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) +static void reachingDefAlgorithm(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToSetOfInstrsPerColor &In, + BlockToSetOfInstrsPerColor &Out, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + unsigned NbReg) { + bool HasChanged; + do { + HasChanged = false; + for (MachineBasicBlock &MBB : MF) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); + SetOfMachineInstr &BBReachableUses = + getSet(ReachableUses, MBB, CurReg, NbReg); + SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); + unsigned Size = BBOutSet.size(); + // In[bb][color] = U Out[bb.predecessors][color] + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); + BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); + } + // insert reachableUses[bb][color] in each in[bb][color] op.reachedses + for (const MachineInstr *MI : BBInSet) { + SetOfMachineInstr &OpReachedUses = + getUses(ColorOpToReachedUses, CurReg, *MI); + OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); + } + // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) + if (!Kill[&MBB].test(CurReg)) + BBOutSet.insert(BBInSet.begin(), BBInSet.end()); + if (Gen[&MBB][CurReg]) + BBOutSet.insert(Gen[&MBB][CurReg]); + HasChanged |= BBOutSet.size() != Size; + } + } + } while (HasChanged); +} + +/// Release all memory dynamically allocated during the reaching +/// definition algorithm. +static void finitReachingDef(BlockToSetOfInstrsPerColor &In, + BlockToSetOfInstrsPerColor &Out, + BlockToInstrPerColor &Gen, + BlockToSetOfInstrsPerColor &ReachableUses) { + for (auto &IT : Out) + delete[] IT.second; + for (auto &IT : In) + delete[] IT.second; + for (auto &IT : ReachableUses) + delete[] IT.second; + for (auto &IT : Gen) + delete[] IT.second; +} + +/// Reaching definition algorithm. +/// \param MF function on which the algorithm will operate. +/// \param[out] ColorOpToReachedUses will contain the result of the reaching +/// def algorithm. +/// \param ADRPMode specify whether the reaching def algorithm should be tuned +/// for ADRP optimization. \see initReachingDef for more details. +/// \param DummyOp if not NULL, the algorithm will work at +/// basic block scope and will set for every exposed definition a use to +/// @p DummyOp. +/// \pre ColorOpToReachedUses is an array of at least number of registers of +/// InstrToInstrs. +static void reachingDef(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, bool ADRPMode = false, + const MachineInstr *DummyOp = nullptr) { + // structures: + // For each basic block. + // Out: a set per color of definitions that reach the + // out boundary of this block. + // In: Same as Out but for in boundary. + // Gen: generated color in this block (one operation per color). + // Kill: register set of killed color in this block. + // ReachableUses: a set per color of uses (operation) reachable + // for "In" definitions. + BlockToSetOfInstrsPerColor Out, In, ReachableUses; + BlockToInstrPerColor Gen; + BlockToRegSet Kill; + + // Initialize Gen, kill and reachableUses. + initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, + DummyOp, ADRPMode); + + // Algo. + if (!DummyOp) + reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, + ReachableUses, RegToId.size()); + + // finit. + finitReachingDef(In, Out, Gen, ReachableUses); +} + +#ifndef NDEBUG +/// print the result of the reaching definition algorithm. +static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, + unsigned NbReg, const TargetRegisterInfo *TRI, + const MapIdToReg &IdToReg) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + if (ColorOpToReachedUses[CurReg].empty()) + continue; + DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + DEBUG(dbgs() << "Def:\n"); + DEBUG(DefsIt.first->print(dbgs())); + DEBUG(dbgs() << "Reachable uses:\n"); + for (const MachineInstr *MI : DefsIt.second) { + DEBUG(MI->print(dbgs())); + } + } + } +} +#endif // NDEBUG + +/// Answer the following question: Can Def be one of the definition +/// involved in a part of a LOH? +static bool canDefBePartOfLOH(const MachineInstr *Def) { + unsigned Opc = Def->getOpcode(); + // Accept ADRP, ADDLow and LOADGot. + switch (Opc) { + default: + return false; + case AArch64::ADRP: + return true; + case AArch64::ADDXri: + // Check immediate to see if the immediate is an address. + switch (Def->getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + } + case AArch64::LDRXui: + // Check immediate to see if the immediate is an address. + switch (Def->getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + return true; + } + } + // Unreachable. + return false; +} + +/// Check whether the given instruction can the end of a LOH chain involving a +/// store. +static bool isCandidateStore(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::STRBui: + case AArch64::STRHui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + // In case we have str xA, [xA, #imm], this is two different uses + // of xA and we cannot fold, otherwise the xA stored may be wrong, + // even if #imm == 0. + if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) + return true; + } + return false; +} + +/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, +/// Build the Use to Defs information and filter out obvious non-LOH candidates. +/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. +/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, +/// i.e., no simple chain. +/// \param ADRPMode -- \see initReachingDef. +static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, + const InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, + bool ADRPMode = false) { + + SetOfMachineInstr NotCandidate; + unsigned NbReg = RegToId.size(); + MapRegToId::const_iterator EndIt = RegToId.end(); + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { + // If this color is never defined, continue. + if (ColorOpToReachedUses[CurReg].empty()) + continue; + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + for (const MachineInstr *MI : DefsIt.second) { + const MachineInstr *Def = DefsIt.first; + MapRegToId::const_iterator It; + // if all the reaching defs are not adrp, this use will not be + // simplifiable. + if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || + (!ADRPMode && !canDefBePartOfLOH(Def)) || + (!ADRPMode && isCandidateStore(MI) && + // store are LOH candidate iff the end of the chain is used as + // base. + ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || + It->second != CurReg))) { + NotCandidate.insert(MI); + continue; + } + // Do not consider self reaching as a simplifiable case for ADRP. + if (!ADRPMode || MI != DefsIt.first) { + UseToReachingDefs[MI].insert(DefsIt.first); + // If UsesIt has several reaching definitions, it is not + // candidate for simplificaton in non-ADRPMode. + if (!ADRPMode && UseToReachingDefs[MI].size() > 1) + NotCandidate.insert(MI); + } + } + } + } + for (const MachineInstr *Elem : NotCandidate) { + DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); + // It would have been better if we could just remove the entry + // from the map. Because of that, we have to filter the garbage + // (second.empty) in the subsequence analysis. + UseToReachingDefs[Elem].clear(); + } +} + +/// Based on the use to defs information (in ADRPMode), compute the +/// opportunities of LOH ADRP-related. +static void computeADRP(const InstrToInstrs &UseToDefs, + AArch64FunctionInfo &AArch64FI, + const MachineDominatorTree *MDT) { + DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); + for (const auto &Entry : UseToDefs) { + unsigned Size = Entry.second.size(); + if (Size == 0) + continue; + if (Size == 1) { + const MachineInstr *L2 = *Entry.second.begin(); + const MachineInstr *L1 = Entry.first; + if (!MDT->dominates(L2, L1)) { + DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 + << '\n'); + continue; + } + DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); + SmallVector<const MachineInstr *, 2> Args; + Args.push_back(L2); + Args.push_back(L1); + AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); + ++NumADRPSimpleCandidate; + } +#ifdef DEBUG + else if (Size == 2) + ++NumADRPComplexCandidate2; + else if (Size == 3) + ++NumADRPComplexCandidate3; + else + ++NumADRPComplexCandidateOther; +#endif + // if Size < 1, the use should have been removed from the candidates + assert(Size >= 1 && "No reaching defs for that use!"); + } +} + +/// Check whether the given instruction can be the end of a LOH chain +/// involving a load. +static bool isCandidateLoad(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::LDRSBWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) + return false; + return true; + } + // Unreachable. + return false; +} + +/// Check whether the given instruction can load a litteral. +static bool supportLoadFromLiteral(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::LDRSWui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + return true; + } + // Unreachable. + return false; +} + +/// Check whether the given instruction is a LOH candidate. +/// \param UseToDefs is used to check that Instr is at the end of LOH supported +/// chain. +/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are +/// already been filtered out. +static bool isCandidate(const MachineInstr *Instr, + const InstrToInstrs &UseToDefs, + const MachineDominatorTree *MDT) { + if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) + return false; + + const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); + if (Def->getOpcode() != AArch64::ADRP) { + // At this point, Def is ADDXri or LDRXui of the right type of + // symbol, because we filtered out the uses that were not defined + // by these kind of instructions (+ ADRP). + + // Check if this forms a simple chain: each intermediate node must + // dominates the next one. + if (!MDT->dominates(Def, Instr)) + return false; + // Move one node up in the simple chain. + if (UseToDefs.find(Def) == + UseToDefs.end() + // The map may contain garbage we have to ignore. + || + UseToDefs.find(Def)->second.empty()) + return false; + Instr = Def; + Def = *UseToDefs.find(Def)->second.begin(); + } + // Check if we reached the top of the simple chain: + // - top is ADRP. + // - check the simple chain property: each intermediate node must + // dominates the next one. + if (Def->getOpcode() == AArch64::ADRP) + return MDT->dominates(Def, Instr); + return false; +} + +static bool registerADRCandidate(const MachineInstr &Use, + const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, + SetOfMachineInstr *InvolvedInLOHs, + const MapRegToId &RegToId) { + // Look for opportunities to turn ADRP -> ADD or + // ADRP -> LDR GOTPAGEOFF into ADR. + // If ADRP has more than one use. Give up. + if (Use.getOpcode() != AArch64::ADDXri && + (Use.getOpcode() != AArch64::LDRXui || + !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) + return false; + InstrToInstrs::const_iterator It = UseToDefs.find(&Use); + // The map may contain garbage that we need to ignore. + if (It == UseToDefs.end() || It->second.empty()) + return false; + const MachineInstr &Def = **It->second.begin(); + if (Def.getOpcode() != AArch64::ADRP) + return false; + // Check the number of users of ADRP. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def.getOperand(0).getReg())->second, Def); + if (Users->size() > 1) { + ++NumADRComplexCandidate; + return false; + } + ++NumADRSimpleCandidate; + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && + "ADRP already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && + "ADD already involved in LOH."); + DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); + + SmallVector<const MachineInstr *, 2> Args; + Args.push_back(&Def); + Args.push_back(&Use); + + AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd + : MCLOH_AdrpLdrGot, + Args); + return true; +} + +/// Based on the use to defs information (in non-ADRPMode), compute the +/// opportunities of LOH non-ADRP-related +static void computeOthers(const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, + const MachineDominatorTree *MDT) { + SetOfMachineInstr *InvolvedInLOHs = nullptr; +#ifdef DEBUG + SetOfMachineInstr InvolvedInLOHsStorage; + InvolvedInLOHs = &InvolvedInLOHsStorage; +#endif // DEBUG + DEBUG(dbgs() << "*** Compute LOH for Others\n"); + // ADRP -> ADD/LDR -> LDR/STR pattern. + // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. + + // FIXME: When the statistics are not important, + // This initial filtering loop can be merged into the next loop. + // Currently, we didn't do it to have the same code for both DEBUG and + // NDEBUG builds. Indeed, the iterator of the second loop would need + // to be changed. + SetOfMachineInstr PotentialCandidates; + SetOfMachineInstr PotentialADROpportunities; + for (auto &Use : UseToDefs) { + // If no definition is available, this is a non candidate. + if (Use.second.empty()) + continue; + // Keep only instructions that are load or store and at the end of + // a ADRP -> ADD/LDR/Nothing chain. + // We already filtered out the no-chain cases. + if (!isCandidate(Use.first, UseToDefs, MDT)) { + PotentialADROpportunities.insert(Use.first); + continue; + } + PotentialCandidates.insert(Use.first); + } + + // Make the following distinctions for statistics as the linker does + // know how to decode instructions: + // - ADD/LDR/Nothing make there different patterns. + // - LDR/STR make two different patterns. + // Hence, 6 - 1 base patterns. + // (because ADRP-> Nothing -> STR is not simplifiable) + + // The linker is only able to have a simple semantic, i.e., if pattern A + // do B. + // However, we want to see the opportunity we may miss if we were able to + // catch more complex cases. + + // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> + // A potential candidate becomes a candidate, if its current immediate + // operand is zero and all nodes of the chain have respectively only one user +#ifdef DEBUG + SetOfMachineInstr DefsOfPotentialCandidates; +#endif + for (const MachineInstr *Candidate : PotentialCandidates) { + // Get the definition of the candidate i.e., ADD or LDR. + const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); + // Record the elements of the chain. + const MachineInstr *L1 = Def; + const MachineInstr *L2 = nullptr; + unsigned ImmediateDefOpc = Def->getOpcode(); + if (Def->getOpcode() != AArch64::ADRP) { + // Check the number of users of this node. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifdef DEBUG + // if all the uses of this def are in potential candidate, this is + // a complex candidate of level 2. + bool IsLevel2 = true; + for (const MachineInstr *MI : *Users) { + if (!PotentialCandidates.count(MI)) { + ++NumTooCplxLvl2; + IsLevel2 = false; + break; + } + } + if (IsLevel2) + ++NumCplxLvl2; +#endif // DEBUG + PotentialADROpportunities.insert(Def); + continue; + } + L2 = Def; + Def = *UseToDefs.find(Def)->second.begin(); + L1 = Def; + } // else the element in the middle of the chain is nothing, thus + // Def already contains the first element of the chain. + + // Check the number of users of the first node in the chain, i.e., ADRP + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifdef DEBUG + // if all the uses of this def are in the defs of the potential candidate, + // this is a complex candidate of level 1 + if (DefsOfPotentialCandidates.empty()) { + // lazy init + DefsOfPotentialCandidates = PotentialCandidates; + for (const MachineInstr *Candidate : PotentialCandidates) { + if (!UseToDefs.find(Candidate)->second.empty()) + DefsOfPotentialCandidates.insert( + *UseToDefs.find(Candidate)->second.begin()); + } + } + bool Found = false; + for (auto &Use : *Users) { + if (!DefsOfPotentialCandidates.count(Use)) { + ++NumTooCplxLvl1; + Found = true; + break; + } + } + if (!Found) + ++NumCplxLvl1; +#endif // DEBUG + continue; + } + + bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); + // If the chain is three instructions long and ldr is the second element, + // then this ldr must load form GOT, otherwise this is not a correct chain. + if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT) + continue; + SmallVector<const MachineInstr *, 3> Args; + MCLOHType Kind; + if (isCandidateLoad(Candidate)) { + if (!L2) { + // At this point, the candidate LOH indicates that the ldr instruction + // may use a direct access to the symbol. There is not such encoding + // for loads of byte and half. + if (!supportLoadFromLiteral(Candidate)) + continue; + + DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate + << '\n'); + Kind = MCLOH_AdrpLdr; + Args.push_back(L1); + Args.push_back(Candidate); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); + ++NumADRPToLDR; + } else { + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifdef DEBUG + // get the immediate of the load + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDR; + else + ++NumLDRToLDR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDRWithImm; + else + ++NumLDRToLDRWithImm; +#endif // DEBUG + } + } else { + if (ImmediateDefOpc == AArch64::ADRP) + continue; + else { + + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifdef DEBUG + // get the immediate of the store + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTR; + else + ++NumLDRToSTR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTRWithImm; + else + ++NumLDRToSTRWithImm; +#endif // DEBUG + } + } + AArch64FI.addLOHDirective(Kind, Args); + } + + // Now, we grabbed all the big patterns, check ADR opportunities. + for (const MachineInstr *Candidate : PotentialADROpportunities) + registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, + InvolvedInLOHs, RegToId); +} + +/// Look for every register defined by potential LOHs candidates. +/// Map these registers with dense id in @p RegToId and vice-versa in +/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. +static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, + MapIdToReg &IdToReg, + const TargetRegisterInfo *TRI) { + unsigned CurRegId = 0; + if (!PreCollectRegister) { + unsigned NbReg = TRI->getNumRegs(); + for (; CurRegId < NbReg; ++CurRegId) { + RegToId[CurRegId] = CurRegId; + DEBUG(IdToReg.push_back(CurRegId)); + DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); + } + return; + } + + DEBUG(dbgs() << "** Collect Involved Register\n"); + for (const auto &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (!canDefBePartOfLOH(&MI)) + continue; + + // Process defs + for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), + IOEnd = MI.operands_end(); + IO != IOEnd; ++IO) { + if (!IO->isReg() || !IO->isDef()) + continue; + unsigned CurReg = IO->getReg(); + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) + if (RegToId.find(*AI) == RegToId.end()) { + DEBUG(IdToReg.push_back(*AI); + assert(IdToReg[CurRegId] == *AI && + "Reg index mismatches insertion index.")); + RegToId[*AI] = CurRegId++; + DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); + } + } + } + } +} + +bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); + + MapRegToId RegToId; + MapIdToReg IdToReg; + AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>(); + assert(AArch64FI && "No MachineFunctionInfo for this function!"); + + DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); + + collectInvolvedReg(MF, RegToId, IdToReg, TRI); + if (RegToId.empty()) + return false; + + MachineInstr *DummyOp = nullptr; + if (BasicBlockScopeOnly) { + const AArch64InstrInfo *TII = + static_cast<const AArch64InstrInfo *>(TM.getInstrInfo()); + // For local analysis, create a dummy operation to record uses that are not + // local. + DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); + } + + unsigned NbReg = RegToId.size(); + bool Modified = false; + + // Start with ADRP. + InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // Compute the reaching def in ADRP mode, meaning ADRP definitions + // are first considered as uses. + reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); + DEBUG(dbgs() << "ADRP reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Translate the definition to uses map into a use to definitions map to ease + // statistic computation. + InstrToInstrs ADRPToReachingDefs; + reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); + + // Compute LOH for ADRP. + computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); + delete[] ColorOpToReachedUses; + + // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. + ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // first perform a regular reaching def analysis. + reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); + DEBUG(dbgs() << "All reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Turn that into a use to defs to ease statistic computation. + InstrToInstrs UsesToReachingDefs; + reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); + + // Compute other than AdrpAdrp LOH. + computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, + MDT); + delete[] ColorOpToReachedUses; + + if (BasicBlockScopeOnly) + MF.DeleteMachineInstr(DummyOp); + + return Modified; +} + +/// createAArch64CollectLOHPass - returns an instance of the Statistic for +/// linker optimization pass. +FunctionPass *llvm::createAArch64CollectLOHPass() { + return new AArch64CollectLOH(); +} |