diff options
Diffstat (limited to 'include/llvm/Analysis')
33 files changed, 3133 insertions, 712 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index a06a562..8852866 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -75,7 +75,7 @@ protected: public: static char ID; // Class identification, replacement for typeinfo - AliasAnalysis() : DL(0), TLI(0), AA(0) {} + AliasAnalysis() : DL(nullptr), TLI(nullptr), AA(nullptr) {} virtual ~AliasAnalysis(); // We want to be subclassed /// UnknownSize - This is a special value which can be used with the @@ -116,8 +116,8 @@ public: /// the location, or null if there is no known unique tag. const MDNode *TBAATag; - explicit Location(const Value *P = 0, uint64_t S = UnknownSize, - const MDNode *N = 0) + explicit Location(const Value *P = nullptr, uint64_t S = UnknownSize, + const MDNode *N = nullptr) : Ptr(P), Size(S), TBAATag(N) {} Location getWithNewPtr(const Value *NewPtr) const { @@ -134,7 +134,7 @@ public: Location getWithoutTBAATag() const { Location Copy(*this); - Copy.TBAATag = 0; + Copy.TBAATag = nullptr; return Copy; } }; @@ -560,12 +560,12 @@ struct DenseMapInfo<AliasAnalysis::Location> { static inline AliasAnalysis::Location getEmptyKey() { return AliasAnalysis::Location(DenseMapInfo<const Value *>::getEmptyKey(), - 0, 0); + 0, nullptr); } static inline AliasAnalysis::Location getTombstoneKey() { return AliasAnalysis::Location(DenseMapInfo<const Value *>::getTombstoneKey(), - 0, 0); + 0, nullptr); } static unsigned getHashValue(const AliasAnalysis::Location &Val) { return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^ diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 72e75ec..6117d91 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -43,13 +43,13 @@ class AliasSet : public ilist_node<AliasSet> { const MDNode *TBAAInfo; public: PointerRec(Value *V) - : Val(V), PrevInList(0), NextInList(0), AS(0), Size(0), + : Val(V), PrevInList(nullptr), NextInList(nullptr), AS(nullptr), Size(0), TBAAInfo(DenseMapInfo<const MDNode *>::getEmptyKey()) {} Value *getValue() const { return Val; } PointerRec *getNext() const { return NextInList; } - bool hasAliasSet() const { return AS != 0; } + bool hasAliasSet() const { return AS != nullptr; } PointerRec** setPrevInList(PointerRec **PIL) { PrevInList = PIL; @@ -75,7 +75,7 @@ class AliasSet : public ilist_node<AliasSet> { // If we have missing or conflicting TBAAInfo, return null. if (TBAAInfo == DenseMapInfo<const MDNode *>::getEmptyKey() || TBAAInfo == DenseMapInfo<const MDNode *>::getTombstoneKey()) - return 0; + return nullptr; return TBAAInfo; } @@ -91,7 +91,7 @@ class AliasSet : public ilist_node<AliasSet> { } void setAliasSet(AliasSet *as) { - assert(AS == 0 && "Already have an alias set!"); + assert(!AS && "Already have an alias set!"); AS = as; } @@ -100,7 +100,7 @@ class AliasSet : public ilist_node<AliasSet> { *PrevInList = NextInList; if (AS->PtrListEnd == &NextInList) { AS->PtrListEnd = PrevInList; - assert(*AS->PtrListEnd == 0 && "List not terminated right!"); + assert(*AS->PtrListEnd == nullptr && "List not terminated right!"); } delete this; } @@ -174,7 +174,7 @@ public: class iterator; iterator begin() const { return iterator(PtrList); } iterator end() const { return iterator(); } - bool empty() const { return PtrList == 0; } + bool empty() const { return PtrList == nullptr; } void print(raw_ostream &OS) const; void dump() const; @@ -184,7 +184,7 @@ public: PointerRec, ptrdiff_t> { PointerRec *CurNode; public: - explicit iterator(PointerRec *CN = 0) : CurNode(CN) {} + explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {} bool operator==(const iterator& x) const { return CurNode == x.CurNode; @@ -220,8 +220,9 @@ private: // Can only be created by AliasSetTracker. Also, ilist creates one // to serve as a sentinel. friend struct ilist_sentinel_traits<AliasSet>; - AliasSet() : PtrList(0), PtrListEnd(&PtrList), Forward(0), RefCount(0), - AccessTy(NoModRef), AliasTy(MustAlias), Volatile(false) { + AliasSet() + : PtrList(nullptr), PtrListEnd(&PtrList), Forward(nullptr), RefCount(0), + AccessTy(NoModRef), AliasTy(MustAlias), Volatile(false) { } AliasSet(const AliasSet &AS) LLVM_DELETED_FUNCTION; @@ -285,7 +286,7 @@ class AliasSetTracker { void deleted() override; void allUsesReplacedWith(Value *) override; public: - ASTCallbackVH(Value *V, AliasSetTracker *AST = 0); + ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr); ASTCallbackVH &operator=(Value *V); }; /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that tell us how to @@ -354,7 +355,7 @@ public: /// pointer didn't alias anything). AliasSet &getAliasSetForPointer(Value *P, uint64_t Size, const MDNode *TBAAInfo, - bool *New = 0); + bool *New = nullptr); /// getAliasSetForPointerIfExists - Return the alias set containing the /// location specified if one exists, otherwise return null. @@ -408,7 +409,7 @@ private: // entry for the pointer if it doesn't already exist. AliasSet::PointerRec &getEntryFor(Value *V) { AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)]; - if (Entry == 0) + if (!Entry) Entry = new AliasSet::PointerRec(V); return *Entry; } diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h deleted file mode 100644 index 5488847..0000000 --- a/include/llvm/Analysis/BlockFrequencyImpl.h +++ /dev/null @@ -1,379 +0,0 @@ -//===-- BlockFrequencyImpl.h - Block Frequency Implementation --*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Shared implementation of BlockFrequency for IR and Machine Instructions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H -#define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <string> -#include <vector> - -namespace llvm { - - -class BlockFrequencyInfo; -class MachineBlockFrequencyInfo; - -/// BlockFrequencyImpl implements block frequency algorithm for IR and -/// Machine Instructions. Algorithm starts with value ENTRY_FREQ -/// for the entry block and then propagates frequencies using branch weights -/// from (Machine)BranchProbabilityInfo. LoopInfo is not required because -/// algorithm can find "backedges" by itself. -template<class BlockT, class FunctionT, class BlockProbInfoT> -class BlockFrequencyImpl { - - DenseMap<const BlockT *, BlockFrequency> Freqs; - - BlockProbInfoT *BPI; - - FunctionT *Fn; - - typedef GraphTraits< Inverse<BlockT *> > GT; - - static const uint64_t EntryFreq = 1 << 14; - - std::string getBlockName(BasicBlock *BB) const { - return BB->getName().str(); - } - - std::string getBlockName(MachineBasicBlock *MBB) const { - std::string str; - raw_string_ostream ss(str); - ss << "BB#" << MBB->getNumber(); - - if (const BasicBlock *BB = MBB->getBasicBlock()) - ss << " derived from LLVM BB " << BB->getName(); - - return ss.str(); - } - - void setBlockFreq(BlockT *BB, BlockFrequency Freq) { - Freqs[BB] = Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = "; - printBlockFreq(dbgs(), Freq) << "\n"); - } - - /// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst - /// edge probability. - BlockFrequency getEdgeFreq(BlockT *Src, BlockT *Dst) const { - BranchProbability Prob = BPI->getEdgeProbability(Src, Dst); - return getBlockFreq(Src) * Prob; - } - - /// incBlockFreq - Increase BB block frequency by FREQ. - /// - void incBlockFreq(BlockT *BB, BlockFrequency Freq) { - Freqs[BB] += Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += "; - printBlockFreq(dbgs(), Freq) << " --> "; - printBlockFreq(dbgs(), Freqs[BB]) << "\n"); - } - - // All blocks in postorder. - std::vector<BlockT *> POT; - - // Map Block -> Position in reverse-postorder list. - DenseMap<BlockT *, unsigned> RPO; - - // For each loop header, record the per-iteration probability of exiting the - // loop. This is the reciprocal of the expected number of loop iterations. - typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap; - LoopExitProbMap LoopExitProb; - - // (reverse-)postorder traversal iterators. - typedef typename std::vector<BlockT *>::iterator pot_iterator; - typedef typename std::vector<BlockT *>::reverse_iterator rpot_iterator; - - pot_iterator pot_begin() { return POT.begin(); } - pot_iterator pot_end() { return POT.end(); } - - rpot_iterator rpot_begin() { return POT.rbegin(); } - rpot_iterator rpot_end() { return POT.rend(); } - - rpot_iterator rpot_at(BlockT *BB) { - rpot_iterator I = rpot_begin(); - unsigned idx = RPO.lookup(BB); - assert(idx); - std::advance(I, idx - 1); - - assert(*I == BB); - return I; - } - - /// isBackedge - Return if edge Src -> Dst is a reachable backedge. - /// - bool isBackedge(BlockT *Src, BlockT *Dst) const { - unsigned a = RPO.lookup(Src); - if (!a) - return false; - unsigned b = RPO.lookup(Dst); - assert(b && "Destination block should be reachable"); - return a >= b; - } - - /// getSingleBlockPred - return single BB block predecessor or NULL if - /// BB has none or more predecessors. - BlockT *getSingleBlockPred(BlockT *BB) { - typename GT::ChildIteratorType - PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB), - PE = GraphTraits< Inverse<BlockT *> >::child_end(BB); - - if (PI == PE) - return 0; - - BlockT *Pred = *PI; - - ++PI; - if (PI != PE) - return 0; - - return Pred; - } - - void doBlock(BlockT *BB, BlockT *LoopHead, - SmallPtrSet<BlockT *, 8> &BlocksInLoop) { - - DEBUG(dbgs() << "doBlock(" << getBlockName(BB) << ")\n"); - setBlockFreq(BB, 0); - - if (BB == LoopHead) { - setBlockFreq(BB, EntryFreq); - return; - } - - if (BlockT *Pred = getSingleBlockPred(BB)) { - if (BlocksInLoop.count(Pred)) - setBlockFreq(BB, getEdgeFreq(Pred, BB)); - // TODO: else? irreducible, ignore it for now. - return; - } - - bool isInLoop = false; - bool isLoopHead = false; - - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB), - PE = GraphTraits< Inverse<BlockT *> >::child_end(BB); - PI != PE; ++PI) { - BlockT *Pred = *PI; - - if (isBackedge(Pred, BB)) { - isLoopHead = true; - } else if (BlocksInLoop.count(Pred)) { - incBlockFreq(BB, getEdgeFreq(Pred, BB)); - isInLoop = true; - } - // TODO: else? irreducible. - } - - if (!isInLoop) - return; - - if (!isLoopHead) - return; - - // This block is a loop header, so boost its frequency by the expected - // number of loop iterations. The loop blocks will be revisited so they all - // get this boost. - typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB); - assert(I != LoopExitProb.end() && "Loop header missing from table"); - Freqs[BB] /= I->second; - DEBUG(dbgs() << "Loop header scaled to "; - printBlockFreq(dbgs(), Freqs[BB]) << ".\n"); - } - - /// doLoop - Propagate block frequency down through the loop. - void doLoop(BlockT *Head, BlockT *Tail) { - DEBUG(dbgs() << "doLoop(" << getBlockName(Head) << ", " - << getBlockName(Tail) << ")\n"); - - SmallPtrSet<BlockT *, 8> BlocksInLoop; - - for (rpot_iterator I = rpot_at(Head), E = rpot_at(Tail); ; ++I) { - BlockT *BB = *I; - doBlock(BB, Head, BlocksInLoop); - - BlocksInLoop.insert(BB); - if (I == E) - break; - } - - // Compute loop's cyclic probability using backedges probabilities. - BlockFrequency BackFreq; - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head), - PE = GraphTraits< Inverse<BlockT *> >::child_end(Head); - PI != PE; ++PI) { - BlockT *Pred = *PI; - assert(Pred); - if (isBackedge(Pred, Head)) - BackFreq += getEdgeFreq(Pred, Head); - } - - // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head) - // only counts edges entering the loop, not the loop backedges. - // The probability of leaving the loop on each iteration is: - // - // ExitProb = 1 - CyclicProb - // - // The Expected number of loop iterations is: - // - // Iterations = 1 / ExitProb - // - uint64_t D = std::max(getBlockFreq(Head).getFrequency(), UINT64_C(1)); - uint64_t N = std::max(BackFreq.getFrequency(), UINT64_C(1)); - if (N < D) - N = D - N; - else - // We'd expect N < D, but rounding and saturation means that can't be - // guaranteed. - N = 1; - - // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction. - assert(N <= D); - if (D > UINT32_MAX) { - unsigned Shift = 32 - countLeadingZeros(D); - D >>= Shift; - N >>= Shift; - if (N == 0) - N = 1; - } - BranchProbability LEP = BranchProbability(N, D); - LoopExitProb.insert(std::make_pair(Head, LEP)); - DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP - << " from 1 - "; - printBlockFreq(dbgs(), BackFreq) << " / "; - printBlockFreq(dbgs(), getBlockFreq(Head)) << ".\n"); - } - - friend class BlockFrequencyInfo; - friend class MachineBlockFrequencyInfo; - - BlockFrequencyImpl() { } - - void doFunction(FunctionT *fn, BlockProbInfoT *bpi) { - Fn = fn; - BPI = bpi; - - // Clear everything. - RPO.clear(); - POT.clear(); - LoopExitProb.clear(); - Freqs.clear(); - - BlockT *EntryBlock = fn->begin(); - - std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT)); - - unsigned RPOidx = 0; - for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { - BlockT *BB = *I; - RPO[BB] = ++RPOidx; - DEBUG(dbgs() << "RPO[" << getBlockName(BB) << "] = " << RPO[BB] << "\n"); - } - - // Travel over all blocks in postorder. - for (pot_iterator I = pot_begin(), E = pot_end(); I != E; ++I) { - BlockT *BB = *I; - BlockT *LastTail = 0; - DEBUG(dbgs() << "POT: " << getBlockName(BB) << "\n"); - - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB), - PE = GraphTraits< Inverse<BlockT *> >::child_end(BB); - PI != PE; ++PI) { - - BlockT *Pred = *PI; - if (isBackedge(Pred, BB) && (!LastTail || RPO[Pred] > RPO[LastTail])) - LastTail = Pred; - } - - if (LastTail) - doLoop(BB, LastTail); - } - - // At the end assume the whole function as a loop, and travel over it once - // again. - doLoop(*(rpot_begin()), *(pot_begin())); - } - -public: - - uint64_t getEntryFreq() { return EntryFreq; } - - /// getBlockFreq - Return block frequency. Return 0 if we don't have it. - BlockFrequency getBlockFreq(const BlockT *BB) const { - typename DenseMap<const BlockT *, BlockFrequency>::const_iterator - I = Freqs.find(BB); - if (I != Freqs.end()) - return I->second; - return 0; - } - - void print(raw_ostream &OS) const { - OS << "\n\n---- Block Freqs ----\n"; - for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) { - BlockT *BB = I++; - OS << " " << getBlockName(BB) << " = "; - printBlockFreq(OS, getBlockFreq(BB)) << "\n"; - - for (typename GraphTraits<BlockT *>::ChildIteratorType - SI = GraphTraits<BlockT *>::child_begin(BB), - SE = GraphTraits<BlockT *>::child_end(BB); SI != SE; ++SI) { - BlockT *Succ = *SI; - OS << " " << getBlockName(BB) << " -> " << getBlockName(Succ) - << " = "; printBlockFreq(OS, getEdgeFreq(BB, Succ)) << "\n"; - } - } - } - - void dump() const { - print(dbgs()); - } - - // Utility method that looks up the block frequency associated with BB and - // prints it to OS. - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockT *BB) { - return printBlockFreq(OS, getBlockFreq(BB)); - } - - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency &Freq) const { - // Convert fixed-point number to decimal. - uint64_t Frequency = Freq.getFrequency(); - OS << Frequency / EntryFreq << "."; - uint64_t Rem = Frequency % EntryFreq; - uint64_t Eps = 1; - do { - Rem *= 10; - Eps *= 10; - OS << Rem / EntryFreq; - Rem = Rem % EntryFreq; - } while (Rem >= Eps/2); - return OS; - } - -}; - -} - -#endif diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h index 2f701d9..3289a28 100644 --- a/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/include/llvm/Analysis/BlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//===------- BlockFrequencyInfo.h - Block Frequency Analysis --*- C++ -*---===// +//===- BlockFrequencyInfo.h - Block Frequency Analysis ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,14 +21,12 @@ namespace llvm { class BranchProbabilityInfo; -template<class BlockT, class FunctionT, class BranchProbInfoT> -class BlockFrequencyImpl; +template <class BlockT> class BlockFrequencyInfoImpl; -/// BlockFrequencyInfo pass uses BlockFrequencyImpl implementation to estimate -/// IR basic block frequencies. +/// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to +/// estimate IR basic block frequencies. class BlockFrequencyInfo : public FunctionPass { - typedef BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo> - ImplType; + typedef BlockFrequencyInfoImpl<BasicBlock> ImplType; std::unique_ptr<ImplType> BFI; public: diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h new file mode 100644 index 0000000..bd72d3e --- /dev/null +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -0,0 +1,1859 @@ +//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared implementation of BlockFrequency for IR and Machine Instructions. +// See the documentation below for BlockFrequencyInfoImpl for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H +#define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <deque> +#include <list> +#include <string> +#include <vector> + +#define DEBUG_TYPE "block-freq" + +//===----------------------------------------------------------------------===// +// +// UnsignedFloat definition. +// +// TODO: Make this private to BlockFrequencyInfoImpl or delete. +// +//===----------------------------------------------------------------------===// +namespace llvm { + +class UnsignedFloatBase { +public: + static const int32_t MaxExponent = 16383; + static const int32_t MinExponent = -16382; + static const int DefaultPrecision = 10; + + static void dump(uint64_t D, int16_t E, int Width); + static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width, + unsigned Precision); + static std::string toString(uint64_t D, int16_t E, int Width, + unsigned Precision); + static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); } + static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); } + static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } + + static std::pair<uint64_t, bool> splitSigned(int64_t N) { + if (N >= 0) + return std::make_pair(N, false); + uint64_t Unsigned = N == INT64_MIN ? UINT64_C(1) << 63 : uint64_t(-N); + return std::make_pair(Unsigned, true); + } + static int64_t joinSigned(uint64_t U, bool IsNeg) { + if (U > uint64_t(INT64_MAX)) + return IsNeg ? INT64_MIN : INT64_MAX; + return IsNeg ? -int64_t(U) : int64_t(U); + } + + static int32_t extractLg(const std::pair<int32_t, int> &Lg) { + return Lg.first; + } + static int32_t extractLgFloor(const std::pair<int32_t, int> &Lg) { + return Lg.first - (Lg.second > 0); + } + static int32_t extractLgCeiling(const std::pair<int32_t, int> &Lg) { + return Lg.first + (Lg.second < 0); + } + + static std::pair<uint64_t, int16_t> divide64(uint64_t L, uint64_t R); + static std::pair<uint64_t, int16_t> multiply64(uint64_t L, uint64_t R); + + static int compare(uint64_t L, uint64_t R, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + + uint64_t L_adjusted = L >> Shift; + if (L_adjusted < R) + return -1; + if (L_adjusted > R) + return 1; + + return L > L_adjusted << Shift ? 1 : 0; + } +}; + +/// \brief Simple representation of an unsigned floating point. +/// +/// UnsignedFloat is a unsigned floating point number. It uses simple +/// saturation arithmetic, and every operation is well-defined for every value. +/// +/// The number is split into a signed exponent and unsigned digits. The number +/// represented is \c getDigits()*2^getExponent(). In this way, the digits are +/// much like the mantissa in the x87 long double, but there is no canonical +/// form, so the same number can be represented by many bit representations +/// (it's always in "denormal" mode). +/// +/// UnsignedFloat is templated on the underlying integer type for digits, which +/// is expected to be one of uint64_t, uint32_t, uint16_t or uint8_t. +/// +/// Unlike builtin floating point types, UnsignedFloat is portable. +/// +/// Unlike APFloat, UnsignedFloat does not model architecture floating point +/// behaviour (this should make it a little faster), and implements most +/// operators (this makes it usable). +/// +/// UnsignedFloat is totally ordered. However, there is no canonical form, so +/// there are multiple representations of most scalars. E.g.: +/// +/// UnsignedFloat(8u, 0) == UnsignedFloat(4u, 1) +/// UnsignedFloat(4u, 1) == UnsignedFloat(2u, 2) +/// UnsignedFloat(2u, 2) == UnsignedFloat(1u, 3) +/// +/// UnsignedFloat implements most arithmetic operations. Precision is kept +/// where possible. Uses simple saturation arithmetic, so that operations +/// saturate to 0.0 or getLargest() rather than under or overflowing. It has +/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0. +/// Any other division by 0.0 is defined to be getLargest(). +/// +/// As a convenience for modifying the exponent, left and right shifting are +/// both implemented, and both interpret negative shifts as positive shifts in +/// the opposite direction. +/// +/// Exponents are limited to the range accepted by x87 long double. This makes +/// it trivial to add functionality to convert to APFloat (this is already +/// relied on for the implementation of printing). +/// +/// The current plan is to gut this and make the necessary parts of it (even +/// more) private to BlockFrequencyInfo. +template <class DigitsT> class UnsignedFloat : UnsignedFloatBase { +public: + static_assert(!std::numeric_limits<DigitsT>::is_signed, + "only unsigned floats supported"); + + typedef DigitsT DigitsType; + +private: + typedef std::numeric_limits<DigitsType> DigitsLimits; + + static const int Width = sizeof(DigitsType) * 8; + static_assert(Width <= 64, "invalid integer width for digits"); + +private: + DigitsType Digits; + int16_t Exponent; + +public: + UnsignedFloat() : Digits(0), Exponent(0) {} + + UnsignedFloat(DigitsType Digits, int16_t Exponent) + : Digits(Digits), Exponent(Exponent) {} + +private: + UnsignedFloat(const std::pair<uint64_t, int16_t> &X) + : Digits(X.first), Exponent(X.second) {} + +public: + static UnsignedFloat getZero() { return UnsignedFloat(0, 0); } + static UnsignedFloat getOne() { return UnsignedFloat(1, 0); } + static UnsignedFloat getLargest() { + return UnsignedFloat(DigitsLimits::max(), MaxExponent); + } + static UnsignedFloat getFloat(uint64_t N) { return adjustToWidth(N, 0); } + static UnsignedFloat getInverseFloat(uint64_t N) { + return getFloat(N).invert(); + } + static UnsignedFloat getFraction(DigitsType N, DigitsType D) { + return getQuotient(N, D); + } + + int16_t getExponent() const { return Exponent; } + DigitsType getDigits() const { return Digits; } + + /// \brief Convert to the given integer type. + /// + /// Convert to \c IntT using simple saturating arithmetic, truncating if + /// necessary. + template <class IntT> IntT toInt() const; + + bool isZero() const { return !Digits; } + bool isLargest() const { return *this == getLargest(); } + bool isOne() const { + if (Exponent > 0 || Exponent <= -Width) + return false; + return Digits == DigitsType(1) << -Exponent; + } + + /// \brief The log base 2, rounded. + /// + /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN. + int32_t lg() const { return extractLg(lgImpl()); } + + /// \brief The log base 2, rounded towards INT32_MIN. + /// + /// Get the lg floor. lg 0 is defined to be INT32_MIN. + int32_t lgFloor() const { return extractLgFloor(lgImpl()); } + + /// \brief The log base 2, rounded towards INT32_MAX. + /// + /// Get the lg ceiling. lg 0 is defined to be INT32_MIN. + int32_t lgCeiling() const { return extractLgCeiling(lgImpl()); } + + bool operator==(const UnsignedFloat &X) const { return compare(X) == 0; } + bool operator<(const UnsignedFloat &X) const { return compare(X) < 0; } + bool operator!=(const UnsignedFloat &X) const { return compare(X) != 0; } + bool operator>(const UnsignedFloat &X) const { return compare(X) > 0; } + bool operator<=(const UnsignedFloat &X) const { return compare(X) <= 0; } + bool operator>=(const UnsignedFloat &X) const { return compare(X) >= 0; } + + bool operator!() const { return isZero(); } + + /// \brief Convert to a decimal representation in a string. + /// + /// Convert to a string. Uses scientific notation for very large/small + /// numbers. Scientific notation is used roughly for numbers outside of the + /// range 2^-64 through 2^64. + /// + /// \c Precision indicates the number of decimal digits of precision to use; + /// 0 requests the maximum available. + /// + /// As a special case to make debugging easier, if the number is small enough + /// to convert without scientific notation and has more than \c Precision + /// digits before the decimal place, it's printed accurately to the first + /// digit past zero. E.g., assuming 10 digits of precision: + /// + /// 98765432198.7654... => 98765432198.8 + /// 8765432198.7654... => 8765432198.8 + /// 765432198.7654... => 765432198.8 + /// 65432198.7654... => 65432198.77 + /// 5432198.7654... => 5432198.765 + std::string toString(unsigned Precision = DefaultPrecision) { + return UnsignedFloatBase::toString(Digits, Exponent, Width, Precision); + } + + /// \brief Print a decimal representation. + /// + /// Print a string. See toString for documentation. + raw_ostream &print(raw_ostream &OS, + unsigned Precision = DefaultPrecision) const { + return UnsignedFloatBase::print(OS, Digits, Exponent, Width, Precision); + } + void dump() const { return UnsignedFloatBase::dump(Digits, Exponent, Width); } + + UnsignedFloat &operator+=(const UnsignedFloat &X); + UnsignedFloat &operator-=(const UnsignedFloat &X); + UnsignedFloat &operator*=(const UnsignedFloat &X); + UnsignedFloat &operator/=(const UnsignedFloat &X); + UnsignedFloat &operator<<=(int16_t Shift) { shiftLeft(Shift); return *this; } + UnsignedFloat &operator>>=(int16_t Shift) { shiftRight(Shift); return *this; } + +private: + void shiftLeft(int32_t Shift); + void shiftRight(int32_t Shift); + + /// \brief Adjust two floats to have matching exponents. + /// + /// Adjust \c this and \c X to have matching exponents. Returns the new \c X + /// by value. Does nothing if \a isZero() for either. + /// + /// The value that compares smaller will lose precision, and possibly become + /// \a isZero(). + UnsignedFloat matchExponents(UnsignedFloat X); + + /// \brief Increase exponent to match another float. + /// + /// Increases \c this to have an exponent matching \c X. May decrease the + /// exponent of \c X in the process, and \c this may possibly become \a + /// isZero(). + void increaseExponentToMatch(UnsignedFloat &X, int32_t ExponentDiff); + +public: + /// \brief Scale a large number accurately. + /// + /// Scale N (multiply it by this). Uses full precision multiplication, even + /// if Width is smaller than 64, so information is not lost. + uint64_t scale(uint64_t N) const; + uint64_t scaleByInverse(uint64_t N) const { + // TODO: implement directly, rather than relying on inverse. Inverse is + // expensive. + return inverse().scale(N); + } + int64_t scale(int64_t N) const { + std::pair<uint64_t, bool> Unsigned = splitSigned(N); + return joinSigned(scale(Unsigned.first), Unsigned.second); + } + int64_t scaleByInverse(int64_t N) const { + std::pair<uint64_t, bool> Unsigned = splitSigned(N); + return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second); + } + + int compare(const UnsignedFloat &X) const; + int compareTo(uint64_t N) const { + UnsignedFloat Float = getFloat(N); + int Compare = compare(Float); + if (Width == 64 || Compare != 0) + return Compare; + + // Check for precision loss. We know *this == RoundTrip. + uint64_t RoundTrip = Float.template toInt<uint64_t>(); + return N == RoundTrip ? 0 : RoundTrip < N ? -1 : 1; + } + int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); } + + UnsignedFloat &invert() { return *this = UnsignedFloat::getFloat(1) / *this; } + UnsignedFloat inverse() const { return UnsignedFloat(*this).invert(); } + +private: + static UnsignedFloat getProduct(DigitsType L, DigitsType R); + static UnsignedFloat getQuotient(DigitsType Dividend, DigitsType Divisor); + + std::pair<int32_t, int> lgImpl() const; + static int countLeadingZerosWidth(DigitsType Digits) { + if (Width == 64) + return countLeadingZeros64(Digits); + if (Width == 32) + return countLeadingZeros32(Digits); + return countLeadingZeros32(Digits) + Width - 32; + } + + static UnsignedFloat adjustToWidth(uint64_t N, int32_t S) { + assert(S >= MinExponent); + assert(S <= MaxExponent); + if (Width == 64 || N <= DigitsLimits::max()) + return UnsignedFloat(N, S); + + // Shift right. + int Shift = 64 - Width - countLeadingZeros64(N); + DigitsType Shifted = N >> Shift; + + // Round. + assert(S + Shift <= MaxExponent); + return getRounded(UnsignedFloat(Shifted, S + Shift), + N & UINT64_C(1) << (Shift - 1)); + } + + static UnsignedFloat getRounded(UnsignedFloat P, bool Round) { + if (!Round) + return P; + if (P.Digits == DigitsLimits::max()) + // Careful of overflow in the exponent. + return UnsignedFloat(1, P.Exponent) <<= Width; + return UnsignedFloat(P.Digits + 1, P.Exponent); + } +}; + +#define UNSIGNED_FLOAT_BOP(op, base) \ + template <class DigitsT> \ + UnsignedFloat<DigitsT> operator op(const UnsignedFloat<DigitsT> &L, \ + const UnsignedFloat<DigitsT> &R) { \ + return UnsignedFloat<DigitsT>(L) base R; \ + } +UNSIGNED_FLOAT_BOP(+, += ) +UNSIGNED_FLOAT_BOP(-, -= ) +UNSIGNED_FLOAT_BOP(*, *= ) +UNSIGNED_FLOAT_BOP(/, /= ) +UNSIGNED_FLOAT_BOP(<<, <<= ) +UNSIGNED_FLOAT_BOP(>>, >>= ) +#undef UNSIGNED_FLOAT_BOP + +template <class DigitsT> +raw_ostream &operator<<(raw_ostream &OS, const UnsignedFloat<DigitsT> &X) { + return X.print(OS, 10); +} + +#define UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, T1, T2) \ + template <class DigitsT> \ + bool operator op(const UnsignedFloat<DigitsT> &L, T1 R) { \ + return L.compareTo(T2(R)) op 0; \ + } \ + template <class DigitsT> \ + bool operator op(T1 L, const UnsignedFloat<DigitsT> &R) { \ + return 0 op R.compareTo(T2(L)); \ + } +#define UNSIGNED_FLOAT_COMPARE_TO(op) \ + UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \ + UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \ + UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int64_t, int64_t) \ + UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int32_t, int64_t) +UNSIGNED_FLOAT_COMPARE_TO(< ) +UNSIGNED_FLOAT_COMPARE_TO(> ) +UNSIGNED_FLOAT_COMPARE_TO(== ) +UNSIGNED_FLOAT_COMPARE_TO(!= ) +UNSIGNED_FLOAT_COMPARE_TO(<= ) +UNSIGNED_FLOAT_COMPARE_TO(>= ) +#undef UNSIGNED_FLOAT_COMPARE_TO +#undef UNSIGNED_FLOAT_COMPARE_TO_TYPE + +template <class DigitsT> +uint64_t UnsignedFloat<DigitsT>::scale(uint64_t N) const { + if (Width == 64 || N <= DigitsLimits::max()) + return (getFloat(N) * *this).template toInt<uint64_t>(); + + // Defer to the 64-bit version. + return UnsignedFloat<uint64_t>(Digits, Exponent).scale(N); +} + +template <class DigitsT> +UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::getProduct(DigitsType L, + DigitsType R) { + // Check for zero. + if (!L || !R) + return getZero(); + + // Check for numbers that we can compute with 64-bit math. + if (Width <= 32 || (L <= UINT32_MAX && R <= UINT32_MAX)) + return adjustToWidth(uint64_t(L) * uint64_t(R), 0); + + // Do the full thing. + return UnsignedFloat(multiply64(L, R)); +} +template <class DigitsT> +UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::getQuotient(DigitsType Dividend, + DigitsType Divisor) { + // Check for zero. + if (!Dividend) + return getZero(); + if (!Divisor) + return getLargest(); + + if (Width == 64) + return UnsignedFloat(divide64(Dividend, Divisor)); + + // We can compute this with 64-bit math. + int Shift = countLeadingZeros64(Dividend); + uint64_t Shifted = uint64_t(Dividend) << Shift; + uint64_t Quotient = Shifted / Divisor; + + // If Quotient needs to be shifted, then adjustToWidth will round. + if (Quotient > DigitsLimits::max()) + return adjustToWidth(Quotient, -Shift); + + // Round based on the value of the next bit. + return getRounded(UnsignedFloat(Quotient, -Shift), + Shifted % Divisor >= getHalf(Divisor)); +} + +template <class DigitsT> +template <class IntT> +IntT UnsignedFloat<DigitsT>::toInt() const { + typedef std::numeric_limits<IntT> Limits; + if (*this < 1) + return 0; + if (*this >= Limits::max()) + return Limits::max(); + + IntT N = Digits; + if (Exponent > 0) { + assert(size_t(Exponent) < sizeof(IntT) * 8); + return N << Exponent; + } + if (Exponent < 0) { + assert(size_t(-Exponent) < sizeof(IntT) * 8); + return N >> -Exponent; + } + return N; +} + +template <class DigitsT> +std::pair<int32_t, int> UnsignedFloat<DigitsT>::lgImpl() const { + if (isZero()) + return std::make_pair(INT32_MIN, 0); + + // Get the floor of the lg of Digits. + int32_t LocalFloor = Width - countLeadingZerosWidth(Digits) - 1; + + // Get the floor of the lg of this. + int32_t Floor = Exponent + LocalFloor; + if (Digits == UINT64_C(1) << LocalFloor) + return std::make_pair(Floor, 0); + + // Round based on the next digit. + assert(LocalFloor >= 1); + bool Round = Digits & UINT64_C(1) << (LocalFloor - 1); + return std::make_pair(Floor + Round, Round ? 1 : -1); +} + +template <class DigitsT> +UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::matchExponents(UnsignedFloat X) { + if (isZero() || X.isZero() || Exponent == X.Exponent) + return X; + + int32_t Diff = int32_t(X.Exponent) - int32_t(Exponent); + if (Diff > 0) + increaseExponentToMatch(X, Diff); + else + X.increaseExponentToMatch(*this, -Diff); + return X; +} +template <class DigitsT> +void UnsignedFloat<DigitsT>::increaseExponentToMatch(UnsignedFloat &X, + int32_t ExponentDiff) { + assert(ExponentDiff > 0); + if (ExponentDiff >= 2 * Width) { + *this = getZero(); + return; + } + + // Use up any leading zeros on X, and then shift this. + int32_t ShiftX = std::min(countLeadingZerosWidth(X.Digits), ExponentDiff); + assert(ShiftX < Width); + + int32_t ShiftThis = ExponentDiff - ShiftX; + if (ShiftThis >= Width) { + *this = getZero(); + return; + } + + X.Digits <<= ShiftX; + X.Exponent -= ShiftX; + Digits >>= ShiftThis; + Exponent += ShiftThis; + return; +} + +template <class DigitsT> +UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>:: +operator+=(const UnsignedFloat &X) { + if (isLargest() || X.isZero()) + return *this; + if (isZero() || X.isLargest()) + return *this = X; + + // Normalize exponents. + UnsignedFloat Scaled = matchExponents(X); + + // Check for zero again. + if (isZero()) + return *this = Scaled; + if (Scaled.isZero()) + return *this; + + // Compute sum. + DigitsType Sum = Digits + Scaled.Digits; + bool DidOverflow = Sum < Digits; + Digits = Sum; + if (!DidOverflow) + return *this; + + if (Exponent == MaxExponent) + return *this = getLargest(); + + ++Exponent; + Digits = UINT64_C(1) << (Width - 1) | Digits >> 1; + + return *this; +} +template <class DigitsT> +UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>:: +operator-=(const UnsignedFloat &X) { + if (X.isZero()) + return *this; + if (*this <= X) + return *this = getZero(); + + // Normalize exponents. + UnsignedFloat Scaled = matchExponents(X); + assert(Digits >= Scaled.Digits); + + // Compute difference. + if (!Scaled.isZero()) { + Digits -= Scaled.Digits; + return *this; + } + + // Check if X just barely lost its last bit. E.g., for 32-bit: + // + // 1*2^32 - 1*2^0 == 0xffffffff != 1*2^32 + if (*this == UnsignedFloat(1, X.lgFloor() + Width)) { + Digits = DigitsType(0) - 1; + --Exponent; + } + return *this; +} +template <class DigitsT> +UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>:: +operator*=(const UnsignedFloat &X) { + if (isZero()) + return *this; + if (X.isZero()) + return *this = X; + + // Save the exponents. + int32_t Exponents = int32_t(Exponent) + int32_t(X.Exponent); + + // Get the raw product. + *this = getProduct(Digits, X.Digits); + + // Combine with exponents. + return *this <<= Exponents; +} +template <class DigitsT> +UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>:: +operator/=(const UnsignedFloat &X) { + if (isZero()) + return *this; + if (X.isZero()) + return *this = getLargest(); + + // Save the exponents. + int32_t Exponents = int32_t(Exponent) - int32_t(X.Exponent); + + // Get the raw quotient. + *this = getQuotient(Digits, X.Digits); + + // Combine with exponents. + return *this <<= Exponents; +} +template <class DigitsT> +void UnsignedFloat<DigitsT>::shiftLeft(int32_t Shift) { + if (!Shift || isZero()) + return; + assert(Shift != INT32_MIN); + if (Shift < 0) { + shiftRight(-Shift); + return; + } + + // Shift as much as we can in the exponent. + int32_t ExponentShift = std::min(Shift, MaxExponent - Exponent); + Exponent += ExponentShift; + if (ExponentShift == Shift) + return; + + // Check this late, since it's rare. + if (isLargest()) + return; + + // Shift the digits themselves. + Shift -= ExponentShift; + if (Shift > countLeadingZerosWidth(Digits)) { + // Saturate. + *this = getLargest(); + return; + } + + Digits <<= Shift; + return; +} + +template <class DigitsT> +void UnsignedFloat<DigitsT>::shiftRight(int32_t Shift) { + if (!Shift || isZero()) + return; + assert(Shift != INT32_MIN); + if (Shift < 0) { + shiftLeft(-Shift); + return; + } + + // Shift as much as we can in the exponent. + int32_t ExponentShift = std::min(Shift, Exponent - MinExponent); + Exponent -= ExponentShift; + if (ExponentShift == Shift) + return; + + // Shift the digits themselves. + Shift -= ExponentShift; + if (Shift >= Width) { + // Saturate. + *this = getZero(); + return; + } + + Digits >>= Shift; + return; +} + +template <class DigitsT> +int UnsignedFloat<DigitsT>::compare(const UnsignedFloat &X) const { + // Check for zero. + if (isZero()) + return X.isZero() ? 0 : -1; + if (X.isZero()) + return 1; + + // Check for the scale. Use lgFloor to be sure that the exponent difference + // is always lower than 64. + int32_t lgL = lgFloor(), lgR = X.lgFloor(); + if (lgL != lgR) + return lgL < lgR ? -1 : 1; + + // Compare digits. + if (Exponent < X.Exponent) + return UnsignedFloatBase::compare(Digits, X.Digits, X.Exponent - Exponent); + + return -UnsignedFloatBase::compare(X.Digits, Digits, Exponent - X.Exponent); +} + +template <class T> struct isPodLike<UnsignedFloat<T>> { + static const bool value = true; +}; +} + +//===----------------------------------------------------------------------===// +// +// BlockMass definition. +// +// TODO: Make this private to BlockFrequencyInfoImpl or delete. +// +//===----------------------------------------------------------------------===// +namespace llvm { + +/// \brief Mass of a block. +/// +/// This class implements a sort of fixed-point fraction always between 0.0 and +/// 1.0. getMass() == UINT64_MAX indicates a value of 1.0. +/// +/// Masses can be added and subtracted. Simple saturation arithmetic is used, +/// so arithmetic operations never overflow or underflow. +/// +/// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses +/// an inexpensive floating-point algorithm that's off-by-one (almost, but not +/// quite, maximum precision). +/// +/// Masses can be scaled by \a BranchProbability at maximum precision. +class BlockMass { + uint64_t Mass; + +public: + BlockMass() : Mass(0) {} + explicit BlockMass(uint64_t Mass) : Mass(Mass) {} + + static BlockMass getEmpty() { return BlockMass(); } + static BlockMass getFull() { return BlockMass(UINT64_MAX); } + + uint64_t getMass() const { return Mass; } + + bool isFull() const { return Mass == UINT64_MAX; } + bool isEmpty() const { return !Mass; } + + bool operator!() const { return isEmpty(); } + + /// \brief Add another mass. + /// + /// Adds another mass, saturating at \a isFull() rather than overflowing. + BlockMass &operator+=(const BlockMass &X) { + uint64_t Sum = Mass + X.Mass; + Mass = Sum < Mass ? UINT64_MAX : Sum; + return *this; + } + + /// \brief Subtract another mass. + /// + /// Subtracts another mass, saturating at \a isEmpty() rather than + /// undeflowing. + BlockMass &operator-=(const BlockMass &X) { + uint64_t Diff = Mass - X.Mass; + Mass = Diff > Mass ? 0 : Diff; + return *this; + } + + BlockMass &operator*=(const BranchProbability &P) { + Mass = P.scale(Mass); + return *this; + } + + bool operator==(const BlockMass &X) const { return Mass == X.Mass; } + bool operator!=(const BlockMass &X) const { return Mass != X.Mass; } + bool operator<=(const BlockMass &X) const { return Mass <= X.Mass; } + bool operator>=(const BlockMass &X) const { return Mass >= X.Mass; } + bool operator<(const BlockMass &X) const { return Mass < X.Mass; } + bool operator>(const BlockMass &X) const { return Mass > X.Mass; } + + /// \brief Convert to floating point. + /// + /// Convert to a float. \a isFull() gives 1.0, while \a isEmpty() gives + /// slightly above 0.0. + UnsignedFloat<uint64_t> toFloat() const; + + void dump() const; + raw_ostream &print(raw_ostream &OS) const; +}; + +inline BlockMass operator+(const BlockMass &L, const BlockMass &R) { + return BlockMass(L) += R; +} +inline BlockMass operator-(const BlockMass &L, const BlockMass &R) { + return BlockMass(L) -= R; +} +inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) { + return BlockMass(L) *= R; +} +inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) { + return BlockMass(R) *= L; +} + +inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) { + return X.print(OS); +} + +template <> struct isPodLike<BlockMass> { + static const bool value = true; +}; +} + +//===----------------------------------------------------------------------===// +// +// BlockFrequencyInfoImpl definition. +// +//===----------------------------------------------------------------------===// +namespace llvm { + +class BasicBlock; +class BranchProbabilityInfo; +class Function; +class Loop; +class LoopInfo; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoop; +class MachineLoopInfo; + +namespace bfi_detail { +struct IrreducibleGraph; + +// This is part of a workaround for a GCC 4.7 crash on lambdas. +template <class BT> struct BlockEdgesAdder; +} + +/// \brief Base class for BlockFrequencyInfoImpl +/// +/// BlockFrequencyInfoImplBase has supporting data structures and some +/// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on +/// the block type (or that call such algorithms) are skipped here. +/// +/// Nevertheless, the majority of the overall algorithm documention lives with +/// BlockFrequencyInfoImpl. See there for details. +class BlockFrequencyInfoImplBase { +public: + typedef UnsignedFloat<uint64_t> Float; + + /// \brief Representative of a block. + /// + /// This is a simple wrapper around an index into the reverse-post-order + /// traversal of the blocks. + /// + /// Unlike a block pointer, its order has meaning (location in the + /// topological sort) and it's class is the same regardless of block type. + struct BlockNode { + typedef uint32_t IndexType; + IndexType Index; + + bool operator==(const BlockNode &X) const { return Index == X.Index; } + bool operator!=(const BlockNode &X) const { return Index != X.Index; } + bool operator<=(const BlockNode &X) const { return Index <= X.Index; } + bool operator>=(const BlockNode &X) const { return Index >= X.Index; } + bool operator<(const BlockNode &X) const { return Index < X.Index; } + bool operator>(const BlockNode &X) const { return Index > X.Index; } + + BlockNode() : Index(UINT32_MAX) {} + BlockNode(IndexType Index) : Index(Index) {} + + bool isValid() const { return Index <= getMaxIndex(); } + static size_t getMaxIndex() { return UINT32_MAX - 1; } + }; + + /// \brief Stats about a block itself. + struct FrequencyData { + Float Floating; + uint64_t Integer; + }; + + /// \brief Data about a loop. + /// + /// Contains the data necessary to represent represent a loop as a + /// pseudo-node once it's packaged. + struct LoopData { + typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap; + typedef SmallVector<BlockNode, 4> NodeList; + LoopData *Parent; ///< The parent loop. + bool IsPackaged; ///< Whether this has been packaged. + uint32_t NumHeaders; ///< Number of headers. + ExitMap Exits; ///< Successor edges (and weights). + NodeList Nodes; ///< Header and the members of the loop. + BlockMass BackedgeMass; ///< Mass returned to loop header. + BlockMass Mass; + Float Scale; + + LoopData(LoopData *Parent, const BlockNode &Header) + : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {} + template <class It1, class It2> + LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, + It2 LastOther) + : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) { + NumHeaders = Nodes.size(); + Nodes.insert(Nodes.end(), FirstOther, LastOther); + } + bool isHeader(const BlockNode &Node) const { + if (isIrreducible()) + return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders, + Node); + return Node == Nodes[0]; + } + BlockNode getHeader() const { return Nodes[0]; } + bool isIrreducible() const { return NumHeaders > 1; } + + NodeList::const_iterator members_begin() const { + return Nodes.begin() + NumHeaders; + } + NodeList::const_iterator members_end() const { return Nodes.end(); } + iterator_range<NodeList::const_iterator> members() const { + return make_range(members_begin(), members_end()); + } + }; + + /// \brief Index of loop information. + struct WorkingData { + BlockNode Node; ///< This node. + LoopData *Loop; ///< The loop this block is inside. + BlockMass Mass; ///< Mass distribution from the entry block. + + WorkingData(const BlockNode &Node) : Node(Node), Loop(nullptr) {} + + bool isLoopHeader() const { return Loop && Loop->isHeader(Node); } + bool isDoubleLoopHeader() const { + return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() && + Loop->Parent->isHeader(Node); + } + + LoopData *getContainingLoop() const { + if (!isLoopHeader()) + return Loop; + if (!isDoubleLoopHeader()) + return Loop->Parent; + return Loop->Parent->Parent; + } + + /// \brief Resolve a node to its representative. + /// + /// Get the node currently representing Node, which could be a containing + /// loop. + /// + /// This function should only be called when distributing mass. As long as + /// there are no irreducilbe edges to Node, then it will have complexity + /// O(1) in this context. + /// + /// In general, the complexity is O(L), where L is the number of loop + /// headers Node has been packaged into. Since this method is called in + /// the context of distributing mass, L will be the number of loop headers + /// an early exit edge jumps out of. + BlockNode getResolvedNode() const { + auto L = getPackagedLoop(); + return L ? L->getHeader() : Node; + } + LoopData *getPackagedLoop() const { + if (!Loop || !Loop->IsPackaged) + return nullptr; + auto L = Loop; + while (L->Parent && L->Parent->IsPackaged) + L = L->Parent; + return L; + } + + /// \brief Get the appropriate mass for a node. + /// + /// Get appropriate mass for Node. If Node is a loop-header (whose loop + /// has been packaged), returns the mass of its pseudo-node. If it's a + /// node inside a packaged loop, it returns the loop's mass. + BlockMass &getMass() { + if (!isAPackage()) + return Mass; + if (!isADoublePackage()) + return Loop->Mass; + return Loop->Parent->Mass; + } + + /// \brief Has ContainingLoop been packaged up? + bool isPackaged() const { return getResolvedNode() != Node; } + /// \brief Has Loop been packaged up? + bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; } + /// \brief Has Loop been packaged up twice? + bool isADoublePackage() const { + return isDoubleLoopHeader() && Loop->Parent->IsPackaged; + } + }; + + /// \brief Unscaled probability weight. + /// + /// Probability weight for an edge in the graph (including the + /// successor/target node). + /// + /// All edges in the original function are 32-bit. However, exit edges from + /// loop packages are taken from 64-bit exit masses, so we need 64-bits of + /// space in general. + /// + /// In addition to the raw weight amount, Weight stores the type of the edge + /// in the current context (i.e., the context of the loop being processed). + /// Is this a local edge within the loop, an exit from the loop, or a + /// backedge to the loop header? + struct Weight { + enum DistType { Local, Exit, Backedge }; + DistType Type; + BlockNode TargetNode; + uint64_t Amount; + Weight() : Type(Local), Amount(0) {} + }; + + /// \brief Distribution of unscaled probability weight. + /// + /// Distribution of unscaled probability weight to a set of successors. + /// + /// This class collates the successor edge weights for later processing. + /// + /// \a DidOverflow indicates whether \a Total did overflow while adding to + /// the distribution. It should never overflow twice. + struct Distribution { + typedef SmallVector<Weight, 4> WeightList; + WeightList Weights; ///< Individual successor weights. + uint64_t Total; ///< Sum of all weights. + bool DidOverflow; ///< Whether \a Total did overflow. + + Distribution() : Total(0), DidOverflow(false) {} + void addLocal(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Local); + } + void addExit(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Exit); + } + void addBackedge(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Backedge); + } + + /// \brief Normalize the distribution. + /// + /// Combines multiple edges to the same \a Weight::TargetNode and scales + /// down so that \a Total fits into 32-bits. + /// + /// This is linear in the size of \a Weights. For the vast majority of + /// cases, adjacent edge weights are combined by sorting WeightList and + /// combining adjacent weights. However, for very large edge lists an + /// auxiliary hash table is used. + void normalize(); + + private: + void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type); + }; + + /// \brief Data about each block. This is used downstream. + std::vector<FrequencyData> Freqs; + + /// \brief Loop data: see initializeLoops(). + std::vector<WorkingData> Working; + + /// \brief Indexed information about loops. + std::list<LoopData> Loops; + + /// \brief Add all edges out of a packaged loop to the distribution. + /// + /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each + /// successor edge. + /// + /// \return \c true unless there's an irreducible backedge. + bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, + Distribution &Dist); + + /// \brief Add an edge to the distribution. + /// + /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the + /// edge is local/exit/backedge is in the context of LoopHead. Otherwise, + /// every edge should be a local edge (since all the loops are packaged up). + /// + /// \return \c true unless aborted due to an irreducible backedge. + bool addToDist(Distribution &Dist, const LoopData *OuterLoop, + const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); + + LoopData &getLoopPackage(const BlockNode &Head) { + assert(Head.Index < Working.size()); + assert(Working[Head.Index].isLoopHeader()); + return *Working[Head.Index].Loop; + } + + /// \brief Analyze irreducible SCCs. + /// + /// Separate irreducible SCCs from \c G, which is an explict graph of \c + /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr). + /// Insert them into \a Loops before \c Insert. + /// + /// \return the \c LoopData nodes representing the irreducible SCCs. + iterator_range<std::list<LoopData>::iterator> + analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, + std::list<LoopData>::iterator Insert); + + /// \brief Update a loop after packaging irreducible SCCs inside of it. + /// + /// Update \c OuterLoop. Before finding irreducible control flow, it was + /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a + /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged + /// up need to be removed from \a OuterLoop::Nodes. + void updateLoopWithIrreducible(LoopData &OuterLoop); + + /// \brief Distribute mass according to a distribution. + /// + /// Distributes the mass in Source according to Dist. If LoopHead.isValid(), + /// backedges and exits are stored in its entry in Loops. + /// + /// Mass is distributed in parallel from two copies of the source mass. + void distributeMass(const BlockNode &Source, LoopData *OuterLoop, + Distribution &Dist); + + /// \brief Compute the loop scale for a loop. + void computeLoopScale(LoopData &Loop); + + /// \brief Package up a loop. + void packageLoop(LoopData &Loop); + + /// \brief Unwrap loops. + void unwrapLoops(); + + /// \brief Finalize frequency metrics. + /// + /// Calculates final frequencies and cleans up no-longer-needed data + /// structures. + void finalizeMetrics(); + + /// \brief Clear all memory. + void clear(); + + virtual std::string getBlockName(const BlockNode &Node) const; + std::string getLoopName(const LoopData &Loop) const; + + virtual raw_ostream &print(raw_ostream &OS) const { return OS; } + void dump() const { print(dbgs()); } + + Float getFloatingBlockFreq(const BlockNode &Node) const; + + BlockFrequency getBlockFreq(const BlockNode &Node) const; + + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const; + + uint64_t getEntryFreq() const { + assert(!Freqs.empty()); + return Freqs[0].Integer; + } + /// \brief Virtual destructor. + /// + /// Need a virtual destructor to mask the compiler warning about + /// getBlockName(). + virtual ~BlockFrequencyInfoImplBase() {} +}; + +namespace bfi_detail { +template <class BlockT> struct TypeMap {}; +template <> struct TypeMap<BasicBlock> { + typedef BasicBlock BlockT; + typedef Function FunctionT; + typedef BranchProbabilityInfo BranchProbabilityInfoT; + typedef Loop LoopT; + typedef LoopInfo LoopInfoT; +}; +template <> struct TypeMap<MachineBasicBlock> { + typedef MachineBasicBlock BlockT; + typedef MachineFunction FunctionT; + typedef MachineBranchProbabilityInfo BranchProbabilityInfoT; + typedef MachineLoop LoopT; + typedef MachineLoopInfo LoopInfoT; +}; + +/// \brief Get the name of a MachineBasicBlock. +/// +/// Get the name of a MachineBasicBlock. It's templated so that including from +/// CodeGen is unnecessary (that would be a layering issue). +/// +/// This is used mainly for debug output. The name is similar to +/// MachineBasicBlock::getFullName(), but skips the name of the function. +template <class BlockT> std::string getBlockName(const BlockT *BB) { + assert(BB && "Unexpected nullptr"); + auto MachineName = "BB" + Twine(BB->getNumber()); + if (BB->getBasicBlock()) + return (MachineName + "[" + BB->getName() + "]").str(); + return MachineName.str(); +} +/// \brief Get the name of a BasicBlock. +template <> inline std::string getBlockName(const BasicBlock *BB) { + assert(BB && "Unexpected nullptr"); + return BB->getName().str(); +} + +/// \brief Graph of irreducible control flow. +/// +/// This graph is used for determining the SCCs in a loop (or top-level +/// function) that has irreducible control flow. +/// +/// During the block frequency algorithm, the local graphs are defined in a +/// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock +/// graphs for most edges, but getting others from \a LoopData::ExitMap. The +/// latter only has successor information. +/// +/// \a IrreducibleGraph makes this graph explicit. It's in a form that can use +/// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator), +/// and it explicitly lists predecessors and successors. The initialization +/// that relies on \c MachineBasicBlock is defined in the header. +struct IrreducibleGraph { + typedef BlockFrequencyInfoImplBase BFIBase; + + BFIBase &BFI; + + typedef BFIBase::BlockNode BlockNode; + struct IrrNode { + BlockNode Node; + unsigned NumIn; + std::deque<const IrrNode *> Edges; + IrrNode(const BlockNode &Node) : Node(Node), NumIn(0) {} + + typedef std::deque<const IrrNode *>::const_iterator iterator; + iterator pred_begin() const { return Edges.begin(); } + iterator succ_begin() const { return Edges.begin() + NumIn; } + iterator pred_end() const { return succ_begin(); } + iterator succ_end() const { return Edges.end(); } + }; + BlockNode Start; + const IrrNode *StartIrr; + std::vector<IrrNode> Nodes; + SmallDenseMap<uint32_t, IrrNode *, 4> Lookup; + + /// \brief Construct an explicit graph containing irreducible control flow. + /// + /// Construct an explicit graph of the control flow in \c OuterLoop (or the + /// top-level function, if \c OuterLoop is \c nullptr). Uses \c + /// addBlockEdges to add block successors that have not been packaged into + /// loops. + /// + /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected + /// user of this. + template <class BlockEdgesAdder> + IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) + : BFI(BFI), StartIrr(nullptr) { + initialize(OuterLoop, addBlockEdges); + } + + template <class BlockEdgesAdder> + void initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addNodesInLoop(const BFIBase::LoopData &OuterLoop); + void addNodesInFunction(); + void addNode(const BlockNode &Node) { + Nodes.emplace_back(Node); + BFI.Working[Node.Index].getMass() = BlockMass::getEmpty(); + } + void indexNodes(); + template <class BlockEdgesAdder> + void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop); +}; +template <class BlockEdgesAdder> +void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + if (OuterLoop) { + addNodesInLoop(*OuterLoop); + for (auto N : OuterLoop->Nodes) + addEdges(N, OuterLoop, addBlockEdges); + } else { + addNodesInFunction(); + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + addEdges(Index, OuterLoop, addBlockEdges); + } + StartIrr = Lookup[Start.Index]; +} +template <class BlockEdgesAdder> +void IrreducibleGraph::addEdges(const BlockNode &Node, + const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + auto L = Lookup.find(Node.Index); + if (L == Lookup.end()) + return; + IrrNode &Irr = *L->second; + const auto &Working = BFI.Working[Node.Index]; + + if (Working.isAPackage()) + for (const auto &I : Working.Loop->Exits) + addEdge(Irr, I.first, OuterLoop); + else + addBlockEdges(*this, Irr, OuterLoop); +} +} + +/// \brief Shared implementation for block frequency analysis. +/// +/// This is a shared implementation of BlockFrequencyInfo and +/// MachineBlockFrequencyInfo, and calculates the relative frequencies of +/// blocks. +/// +/// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block, +/// which is called the header. A given loop, L, can have sub-loops, which are +/// loops within the subgraph of L that exclude its header. (A "trivial" SCC +/// consists of a single block that does not have a self-edge.) +/// +/// In addition to loops, this algorithm has limited support for irreducible +/// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are +/// discovered on they fly, and modelled as loops with multiple headers. +/// +/// The headers of irreducible sub-SCCs consist of its entry blocks and all +/// nodes that are targets of a backedge within it (excluding backedges within +/// true sub-loops). Block frequency calculations act as if a block is +/// inserted that intercepts all the edges to the headers. All backedges and +/// entries point to this block. Its successors are the headers, which split +/// the frequency evenly. +/// +/// This algorithm leverages BlockMass and UnsignedFloat to maintain precision, +/// separates mass distribution from loop scaling, and dithers to eliminate +/// probability mass loss. +/// +/// The implementation is split between BlockFrequencyInfoImpl, which knows the +/// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and +/// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a +/// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in +/// reverse-post order. This gives two advantages: it's easy to compare the +/// relative ordering of two nodes, and maps keyed on BlockT can be represented +/// by vectors. +/// +/// This algorithm is O(V+E), unless there is irreducible control flow, in +/// which case it's O(V*E) in the worst case. +/// +/// These are the main stages: +/// +/// 0. Reverse post-order traversal (\a initializeRPOT()). +/// +/// Run a single post-order traversal and save it (in reverse) in RPOT. +/// All other stages make use of this ordering. Save a lookup from BlockT +/// to BlockNode (the index into RPOT) in Nodes. +/// +/// 1. Loop initialization (\a initializeLoops()). +/// +/// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of +/// the algorithm. In particular, store the immediate members of each loop +/// in reverse post-order. +/// +/// 2. Calculate mass and scale in loops (\a computeMassInLoops()). +/// +/// For each loop (bottom-up), distribute mass through the DAG resulting +/// from ignoring backedges and treating sub-loops as a single pseudo-node. +/// Track the backedge mass distributed to the loop header, and use it to +/// calculate the loop scale (number of loop iterations). Immediate +/// members that represent sub-loops will already have been visited and +/// packaged into a pseudo-node. +/// +/// Distributing mass in a loop is a reverse-post-order traversal through +/// the loop. Start by assigning full mass to the Loop header. For each +/// node in the loop: +/// +/// - Fetch and categorize the weight distribution for its successors. +/// If this is a packaged-subloop, the weight distribution is stored +/// in \a LoopData::Exits. Otherwise, fetch it from +/// BranchProbabilityInfo. +/// +/// - Each successor is categorized as \a Weight::Local, a local edge +/// within the current loop, \a Weight::Backedge, a backedge to the +/// loop header, or \a Weight::Exit, any successor outside the loop. +/// The weight, the successor, and its category are stored in \a +/// Distribution. There can be multiple edges to each successor. +/// +/// - If there's a backedge to a non-header, there's an irreducible SCC. +/// The usual flow is temporarily aborted. \a +/// computeIrreducibleMass() finds the irreducible SCCs within the +/// loop, packages them up, and restarts the flow. +/// +/// - Normalize the distribution: scale weights down so that their sum +/// is 32-bits, and coalesce multiple edges to the same node. +/// +/// - Distribute the mass accordingly, dithering to minimize mass loss, +/// as described in \a distributeMass(). +/// +/// Finally, calculate the loop scale from the accumulated backedge mass. +/// +/// 3. Distribute mass in the function (\a computeMassInFunction()). +/// +/// Finally, distribute mass through the DAG resulting from packaging all +/// loops in the function. This uses the same algorithm as distributing +/// mass in a loop, except that there are no exit or backedge edges. +/// +/// 4. Unpackage loops (\a unwrapLoops()). +/// +/// Initialize each block's frequency to a floating point representation of +/// its mass. +/// +/// Visit loops top-down, scaling the frequencies of its immediate members +/// by the loop's pseudo-node's frequency. +/// +/// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()). +/// +/// Using the min and max frequencies as a guide, translate floating point +/// frequencies to an appropriate range in uint64_t. +/// +/// It has some known flaws. +/// +/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting +/// BlockFrequency's 64-bit integer precision. +/// +/// - The model of irreducible control flow is a rough approximation. +/// +/// Modelling irreducible control flow exactly involves setting up and +/// solving a group of infinite geometric series. Such precision is +/// unlikely to be worthwhile, since most of our algorithms give up on +/// irreducible control flow anyway. +/// +/// Nevertheless, we might find that we need to get closer. Here's a sort +/// of TODO list for the model with diminishing returns, to be completed as +/// necessary. +/// +/// - The headers for the \a LoopData representing an irreducible SCC +/// include non-entry blocks. When these extra blocks exist, they +/// indicate a self-contained irreducible sub-SCC. We could treat them +/// as sub-loops, rather than arbitrarily shoving the problematic +/// blocks into the headers of the main irreducible SCC. +/// +/// - Backedge frequencies are assumed to be evenly split between the +/// headers of a given irreducible SCC. Instead, we could track the +/// backedge mass separately for each header, and adjust their relative +/// frequencies. +/// +/// - Entry frequencies are assumed to be evenly split between the +/// headers of a given irreducible SCC, which is the only option if we +/// need to compute mass in the SCC before its parent loop. Instead, +/// we could partially compute mass in the parent loop, and stop when +/// we get to the SCC. Here, we have the correct ratio of entry +/// masses, which we can use to adjust their relative frequencies. +/// Compute mass in the SCC, and then continue propagation in the +/// parent. +/// +/// - We can propagate mass iteratively through the SCC, for some fixed +/// number of iterations. Each iteration starts by assigning the entry +/// blocks their backedge mass from the prior iteration. The final +/// mass for each block (and each exit, and the total backedge mass +/// used for computing loop scale) is the sum of all iterations. +/// (Running this until fixed point would "solve" the geometric +/// series by simulation.) +template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { + typedef typename bfi_detail::TypeMap<BT>::BlockT BlockT; + typedef typename bfi_detail::TypeMap<BT>::FunctionT FunctionT; + typedef typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT + BranchProbabilityInfoT; + typedef typename bfi_detail::TypeMap<BT>::LoopT LoopT; + typedef typename bfi_detail::TypeMap<BT>::LoopInfoT LoopInfoT; + + // This is part of a workaround for a GCC 4.7 crash on lambdas. + friend struct bfi_detail::BlockEdgesAdder<BT>; + + typedef GraphTraits<const BlockT *> Successor; + typedef GraphTraits<Inverse<const BlockT *>> Predecessor; + + const BranchProbabilityInfoT *BPI; + const LoopInfoT *LI; + const FunctionT *F; + + // All blocks in reverse postorder. + std::vector<const BlockT *> RPOT; + DenseMap<const BlockT *, BlockNode> Nodes; + + typedef typename std::vector<const BlockT *>::const_iterator rpot_iterator; + + rpot_iterator rpot_begin() const { return RPOT.begin(); } + rpot_iterator rpot_end() const { return RPOT.end(); } + + size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); } + + BlockNode getNode(const rpot_iterator &I) const { + return BlockNode(getIndex(I)); + } + BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); } + + const BlockT *getBlock(const BlockNode &Node) const { + assert(Node.Index < RPOT.size()); + return RPOT[Node.Index]; + } + + /// \brief Run (and save) a post-order traversal. + /// + /// Saves a reverse post-order traversal of all the nodes in \a F. + void initializeRPOT(); + + /// \brief Initialize loop data. + /// + /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from + /// each block to the deepest loop it's in, but we need the inverse. For each + /// loop, we store in reverse post-order its "immediate" members, defined as + /// the header, the headers of immediate sub-loops, and all other blocks in + /// the loop that are not in sub-loops. + void initializeLoops(); + + /// \brief Propagate to a block's successors. + /// + /// In the context of distributing mass through \c OuterLoop, divide the mass + /// currently assigned to \c Node between its successors. + /// + /// \return \c true unless there's an irreducible backedge. + bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node); + + /// \brief Compute mass in a particular loop. + /// + /// Assign mass to \c Loop's header, and then for each block in \c Loop in + /// reverse post-order, distribute mass to its successors. Only visits nodes + /// that have not been packaged into sub-loops. + /// + /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop. + /// \return \c true unless there's an irreducible backedge. + bool computeMassInLoop(LoopData &Loop); + + /// \brief Try to compute mass in the top-level function. + /// + /// Assign mass to the entry block, and then for each block in reverse + /// post-order, distribute mass to its successors. Skips nodes that have + /// been packaged into loops. + /// + /// \pre \a computeMassInLoops() has been called. + /// \return \c true unless there's an irreducible backedge. + bool tryToComputeMassInFunction(); + + /// \brief Compute mass in (and package up) irreducible SCCs. + /// + /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front + /// of \c Insert), and call \a computeMassInLoop() on each of them. + /// + /// If \c OuterLoop is \c nullptr, it refers to the top-level function. + /// + /// \pre \a computeMassInLoop() has been called for each subloop of \c + /// OuterLoop. + /// \pre \c Insert points at the the last loop successfully processed by \a + /// computeMassInLoop(). + /// \pre \c OuterLoop has irreducible SCCs. + void computeIrreducibleMass(LoopData *OuterLoop, + std::list<LoopData>::iterator Insert); + + /// \brief Compute mass in all loops. + /// + /// For each loop bottom-up, call \a computeMassInLoop(). + /// + /// \a computeMassInLoop() aborts (and returns \c false) on loops that + /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then + /// re-enter \a computeMassInLoop(). + /// + /// \post \a computeMassInLoop() has returned \c true for every loop. + void computeMassInLoops(); + + /// \brief Compute mass in the top-level function. + /// + /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to + /// compute mass in the top-level function. + /// + /// \post \a tryToComputeMassInFunction() has returned \c true. + void computeMassInFunction(); + + std::string getBlockName(const BlockNode &Node) const override { + return bfi_detail::getBlockName(getBlock(Node)); + } + +public: + const FunctionT *getFunction() const { return F; } + + void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI, + const LoopInfoT *LI); + BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {} + + using BlockFrequencyInfoImplBase::getEntryFreq; + BlockFrequency getBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); + } + Float getFloatingBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); + } + + /// \brief Print the frequencies for the current function. + /// + /// Prints the frequencies for the blocks in the current function. + /// + /// Blocks are printed in the natural iteration order of the function, rather + /// than reverse post-order. This provides two advantages: writing -analyze + /// tests is easier (since blocks come out in source order), and even + /// unreachable blocks are printed. + /// + /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so + /// we need to override it here. + raw_ostream &print(raw_ostream &OS) const override; + using BlockFrequencyInfoImplBase::dump; + + using BlockFrequencyInfoImplBase::printBlockFreq; + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { + return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); + } +}; + +template <class BT> +void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F, + const BranchProbabilityInfoT *BPI, + const LoopInfoT *LI) { + // Save the parameters. + this->BPI = BPI; + this->LI = LI; + this->F = F; + + // Clean up left-over data structures. + BlockFrequencyInfoImplBase::clear(); + RPOT.clear(); + Nodes.clear(); + + // Initialize. + DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n=================" + << std::string(F->getName().size(), '=') << "\n"); + initializeRPOT(); + initializeLoops(); + + // Visit loops in post-order to find thelocal mass distribution, and then do + // the full function. + computeMassInLoops(); + computeMassInFunction(); + unwrapLoops(); + finalizeMetrics(); +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() { + const BlockT *Entry = F->begin(); + RPOT.reserve(F->size()); + std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); + std::reverse(RPOT.begin(), RPOT.end()); + + assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && + "More nodes in function than Block Frequency Info supports"); + + DEBUG(dbgs() << "reverse-post-order-traversal\n"); + for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { + BlockNode Node = getNode(I); + DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node) << "\n"); + Nodes[*I] = Node; + } + + Working.reserve(RPOT.size()); + for (size_t Index = 0; Index < RPOT.size(); ++Index) + Working.emplace_back(Index); + Freqs.resize(RPOT.size()); +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() { + DEBUG(dbgs() << "loop-detection\n"); + if (LI->empty()) + return; + + // Visit loops top down and assign them an index. + std::deque<std::pair<const LoopT *, LoopData *>> Q; + for (const LoopT *L : *LI) + Q.emplace_back(L, nullptr); + while (!Q.empty()) { + const LoopT *Loop = Q.front().first; + LoopData *Parent = Q.front().second; + Q.pop_front(); + + BlockNode Header = getNode(Loop->getHeader()); + assert(Header.isValid()); + + Loops.emplace_back(Parent, Header); + Working[Header.Index].Loop = &Loops.back(); + DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n"); + + for (const LoopT *L : *Loop) + Q.emplace_back(L, &Loops.back()); + } + + // Visit nodes in reverse post-order and add them to their deepest containing + // loop. + for (size_t Index = 0; Index < RPOT.size(); ++Index) { + // Loop headers have already been mostly mapped. + if (Working[Index].isLoopHeader()) { + LoopData *ContainingLoop = Working[Index].getContainingLoop(); + if (ContainingLoop) + ContainingLoop->Nodes.push_back(Index); + continue; + } + + const LoopT *Loop = LI->getLoopFor(RPOT[Index]); + if (!Loop) + continue; + + // Add this node to its containing loop's member list. + BlockNode Header = getNode(Loop->getHeader()); + assert(Header.isValid()); + const auto &HeaderData = Working[Header.Index]; + assert(HeaderData.isLoopHeader()); + + Working[Index].Loop = HeaderData.Loop; + HeaderData.Loop->Nodes.push_back(Index); + DEBUG(dbgs() << " - loop = " << getBlockName(Header) + << ": member = " << getBlockName(Index) << "\n"); + } +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() { + // Visit loops with the deepest first, and the top-level loops last. + for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) { + if (computeMassInLoop(*L)) + continue; + auto Next = std::next(L); + computeIrreducibleMass(&*L, L.base()); + L = std::prev(Next); + if (computeMassInLoop(*L)) + continue; + llvm_unreachable("unhandled irreducible control flow"); + } +} + +template <class BT> +bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { + // Compute mass in loop. + DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); + + if (Loop.isIrreducible()) { + BlockMass Remaining = BlockMass::getFull(); + for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { + auto &Mass = Working[Loop.Nodes[H].Index].getMass(); + Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); + Remaining -= Mass; + } + for (const BlockNode &M : Loop.Nodes) + if (!propagateMassToSuccessors(&Loop, M)) + llvm_unreachable("unhandled irreducible control flow"); + } else { + Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); + if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) + llvm_unreachable("irreducible control flow to loop header!?"); + for (const BlockNode &M : Loop.members()) + if (!propagateMassToSuccessors(&Loop, M)) + // Irreducible backedge. + return false; + } + + computeLoopScale(Loop); + packageLoop(Loop); + return true; +} + +template <class BT> +bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() { + // Compute mass in function. + DEBUG(dbgs() << "compute-mass-in-function\n"); + assert(!Working.empty() && "no blocks in function"); + assert(!Working[0].isLoopHeader() && "entry block is a loop header"); + + Working[0].getMass() = BlockMass::getFull(); + for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) { + // Check for nodes that have been packaged. + BlockNode Node = getNode(I); + if (Working[Node.Index].isPackaged()) + continue; + + if (!propagateMassToSuccessors(nullptr, Node)) + return false; + } + return true; +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { + if (tryToComputeMassInFunction()) + return; + computeIrreducibleMass(nullptr, Loops.begin()); + if (tryToComputeMassInFunction()) + return; + llvm_unreachable("unhandled irreducible control flow"); +} + +/// \note This should be a lambda, but that crashes GCC 4.7. +namespace bfi_detail { +template <class BT> struct BlockEdgesAdder { + typedef BT BlockT; + typedef BlockFrequencyInfoImplBase::LoopData LoopData; + typedef GraphTraits<const BlockT *> Successor; + + const BlockFrequencyInfoImpl<BT> &BFI; + explicit BlockEdgesAdder(const BlockFrequencyInfoImpl<BT> &BFI) + : BFI(BFI) {} + void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, + const LoopData *OuterLoop) { + const BlockT *BB = BFI.RPOT[Irr.Node.Index]; + for (auto I = Successor::child_begin(BB), E = Successor::child_end(BB); + I != E; ++I) + G.addEdge(Irr, BFI.getNode(*I), OuterLoop); + } +}; +} +template <class BT> +void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( + LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { + DEBUG(dbgs() << "analyze-irreducible-in-"; + if (OuterLoop) dbgs() << "loop: " << getLoopName(*OuterLoop) << "\n"; + else dbgs() << "function\n"); + + using namespace bfi_detail; + // Ideally, addBlockEdges() would be declared here as a lambda, but that + // crashes GCC 4.7. + BlockEdgesAdder<BT> addBlockEdges(*this); + IrreducibleGraph G(*this, OuterLoop, addBlockEdges); + + for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) + computeMassInLoop(L); + + if (!OuterLoop) + return; + updateLoopWithIrreducible(*OuterLoop); +} + +template <class BT> +bool +BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop, + const BlockNode &Node) { + DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); + // Calculate probability for successors. + Distribution Dist; + if (auto *Loop = Working[Node.Index].getPackagedLoop()) { + assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); + if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) + // Irreducible backedge. + return false; + } else { + const BlockT *BB = getBlock(Node); + for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB); + SI != SE; ++SI) + // Do not dereference SI, or getEdgeWeight() is linear in the number of + // successors. + if (!addToDist(Dist, OuterLoop, Node, getNode(*SI), + BPI->getEdgeWeight(BB, SI))) + // Irreducible backedge. + return false; + } + + // Distribute mass to successors, saving exit and backedge data in the + // loop header. + distributeMass(Node, OuterLoop, Dist); + return true; +} + +template <class BT> +raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { + if (!F) + return OS; + OS << "block-frequency-info: " << F->getName() << "\n"; + for (const BlockT &BB : *F) + OS << " - " << bfi_detail::getBlockName(&BB) + << ": float = " << getFloatingBlockFreq(&BB) + << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + + // Add an extra newline for readability. + OS << "\n"; + return OS; +} +} + +#undef DEBUG_TYPE + +#endif diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 4a6a280..4414c84 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -47,7 +47,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; - void print(raw_ostream &OS, const Module *M = 0) const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; /// \brief Get an edge's probability, relative to other out-edges of the Src. /// diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index 02e3b45..7f92eda 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -65,8 +65,8 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// on branchy code but not loops, and LI is most useful on code with loops but /// does not help on branchy code outside loops. bool isPotentiallyReachable(const Instruction *From, const Instruction *To, - const DominatorTree *DT = 0, - const LoopInfo *LI = 0); + const DominatorTree *DT = nullptr, + const LoopInfo *LI = nullptr); /// \brief Determine whether block 'To' is reachable from 'From', returning /// true if uncertain. @@ -75,8 +75,8 @@ bool isPotentiallyReachable(const Instruction *From, const Instruction *To, /// Returns false only if we can prove that once 'From' has been reached then /// 'To' can not be executed. Conservatively returns true. bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To, - const DominatorTree *DT = 0, - const LoopInfo *LI = 0); + const DominatorTree *DT = nullptr, + const LoopInfo *LI = nullptr); } // End llvm namespace diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h new file mode 100644 index 0000000..09101ae --- /dev/null +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -0,0 +1,591 @@ +//===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This header provides classes for managing passes over SCCs of the call +/// graph. These passes form an important component of LLVM's interprocedural +/// optimizations. Because they operate on the SCCs of the call graph, and they +/// wtraverse the graph in post order, they can effectively do pair-wise +/// interprocedural optimizations for all call edges in the program. At each +/// call site edge, the callee has already been optimized as much as is +/// possible. This in turn allows very accurate analysis of it for IPO. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CGSCC_PASS_MANAGER_H +#define LLVM_ANALYSIS_CGSCC_PASS_MANAGER_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" + +namespace llvm { + +class CGSCCAnalysisManager; + +class CGSCCPassManager { +public: + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCPassManager() {} + CGSCCPassManager(CGSCCPassManager &&Arg) : Passes(std::move(Arg.Passes)) {} + CGSCCPassManager &operator=(CGSCCPassManager &&RHS) { + Passes = std::move(RHS.Passes); + return *this; + } + + /// \brief Run all of the CGSCC passes in this pass manager over a SCC. + PreservedAnalyses run(LazyCallGraph::SCC *C, + CGSCCAnalysisManager *AM = nullptr); + + template <typename CGSCCPassT> void addPass(CGSCCPassT Pass) { + Passes.emplace_back(new CGSCCPassModel<CGSCCPassT>(std::move(Pass))); + } + + static StringRef name() { return "CGSCCPassManager"; } + +private: + // Pull in the concept type and model template specialized for SCCs. + typedef detail::PassConcept<LazyCallGraph::SCC *, CGSCCAnalysisManager> + CGSCCPassConcept; + template <typename PassT> + struct CGSCCPassModel + : detail::PassModel<LazyCallGraph::SCC *, CGSCCAnalysisManager, PassT> { + CGSCCPassModel(PassT Pass) + : detail::PassModel<LazyCallGraph::SCC *, CGSCCAnalysisManager, PassT>( + std::move(Pass)) {} + }; + + CGSCCPassManager(const CGSCCPassManager &) LLVM_DELETED_FUNCTION; + CGSCCPassManager &operator=(const CGSCCPassManager &) LLVM_DELETED_FUNCTION; + + std::vector<std::unique_ptr<CGSCCPassConcept>> Passes; +}; + +/// \brief A function analysis manager to coordinate and cache analyses run over +/// a module. +class CGSCCAnalysisManager : public detail::AnalysisManagerBase< + CGSCCAnalysisManager, LazyCallGraph::SCC *> { + friend class detail::AnalysisManagerBase<CGSCCAnalysisManager, + LazyCallGraph::SCC *>; + typedef detail::AnalysisManagerBase<CGSCCAnalysisManager, + LazyCallGraph::SCC *> BaseT; + typedef BaseT::ResultConceptT ResultConceptT; + typedef BaseT::PassConceptT PassConceptT; + +public: + // Most public APIs are inherited from the CRTP base class. + + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCAnalysisManager() {} + CGSCCAnalysisManager(CGSCCAnalysisManager &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), + CGSCCAnalysisResults(std::move(Arg.CGSCCAnalysisResults)) {} + CGSCCAnalysisManager &operator=(CGSCCAnalysisManager &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + CGSCCAnalysisResults = std::move(RHS.CGSCCAnalysisResults); + return *this; + } + + /// \brief Returns true if the analysis manager has an empty results cache. + bool empty() const; + + /// \brief Clear the function analysis result cache. + /// + /// This routine allows cleaning up when the set of functions itself has + /// potentially changed, and thus we can't even look up a a result and + /// invalidate it directly. Notably, this does *not* call invalidate + /// functions as there is nothing to be done for them. + void clear(); + +private: + CGSCCAnalysisManager(const CGSCCAnalysisManager &) LLVM_DELETED_FUNCTION; + CGSCCAnalysisManager & + operator=(const CGSCCAnalysisManager &) LLVM_DELETED_FUNCTION; + + /// \brief Get a function pass result, running the pass if necessary. + ResultConceptT &getResultImpl(void *PassID, LazyCallGraph::SCC *C); + + /// \brief Get a cached function pass result or return null. + ResultConceptT *getCachedResultImpl(void *PassID, + LazyCallGraph::SCC *C) const; + + /// \brief Invalidate a function pass result. + void invalidateImpl(void *PassID, LazyCallGraph::SCC *C); + + /// \brief Invalidate the results for a function.. + void invalidateImpl(LazyCallGraph::SCC *C, const PreservedAnalyses &PA); + + /// \brief List of function analysis pass IDs and associated concept pointers. + /// + /// Requires iterators to be valid across appending new entries and arbitrary + /// erases. Provides both the pass ID and concept pointer such that it is + /// half of a bijection and provides storage for the actual result concept. + typedef std::list< + std::pair<void *, std::unique_ptr<detail::AnalysisResultConcept< + LazyCallGraph::SCC *>>>> CGSCCAnalysisResultListT; + + /// \brief Map type from function pointer to our custom list type. + typedef DenseMap<LazyCallGraph::SCC *, CGSCCAnalysisResultListT> + CGSCCAnalysisResultListMapT; + + /// \brief Map from function to a list of function analysis results. + /// + /// Provides linear time removal of all analysis results for a function and + /// the ultimate storage for a particular cached analysis result. + CGSCCAnalysisResultListMapT CGSCCAnalysisResultLists; + + /// \brief Map type from a pair of analysis ID and function pointer to an + /// iterator into a particular result list. + typedef DenseMap<std::pair<void *, LazyCallGraph::SCC *>, + CGSCCAnalysisResultListT::iterator> CGSCCAnalysisResultMapT; + + /// \brief Map from an analysis ID and function to a particular cached + /// analysis result. + CGSCCAnalysisResultMapT CGSCCAnalysisResults; +}; + +/// \brief A module analysis which acts as a proxy for a CGSCC analysis +/// manager. +/// +/// This primarily proxies invalidation information from the module analysis +/// manager and module pass manager to a CGSCC analysis manager. You should +/// never use a CGSCC analysis manager from within (transitively) a module +/// pass manager unless your parent module pass has received a proxy result +/// object for it. +class CGSCCAnalysisManagerModuleProxy { +public: + class Result { + public: + explicit Result(CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {} + // We have to explicitly define all the special member functions because + // MSVC refuses to generate them. + Result(const Result &Arg) : CGAM(Arg.CGAM) {} + Result(Result &&Arg) : CGAM(std::move(Arg.CGAM)) {} + Result &operator=(Result RHS) { + std::swap(CGAM, RHS.CGAM); + return *this; + } + ~Result(); + + /// \brief Accessor for the \c CGSCCAnalysisManager. + CGSCCAnalysisManager &getManager() { return *CGAM; } + + /// \brief Handler for invalidation of the module. + /// + /// If this analysis itself is preserved, then we assume that the call + /// graph of the module hasn't changed and thus we don't need to invalidate + /// *all* cached data associated with a \c SCC* in the \c + /// CGSCCAnalysisManager. + /// + /// Regardless of whether this analysis is marked as preserved, all of the + /// analyses in the \c CGSCCAnalysisManager are potentially invalidated + /// based on the set of preserved analyses. + bool invalidate(Module *M, const PreservedAnalyses &PA); + + private: + CGSCCAnalysisManager *CGAM; + }; + + static void *ID() { return (void *)&PassID; } + + explicit CGSCCAnalysisManagerModuleProxy(CGSCCAnalysisManager &CGAM) + : CGAM(&CGAM) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCAnalysisManagerModuleProxy( + const CGSCCAnalysisManagerModuleProxy &Arg) + : CGAM(Arg.CGAM) {} + CGSCCAnalysisManagerModuleProxy(CGSCCAnalysisManagerModuleProxy &&Arg) + : CGAM(std::move(Arg.CGAM)) {} + CGSCCAnalysisManagerModuleProxy & + operator=(CGSCCAnalysisManagerModuleProxy RHS) { + std::swap(CGAM, RHS.CGAM); + return *this; + } + + /// \brief Run the analysis pass and create our proxy result object. + /// + /// This doesn't do any interesting work, it is primarily used to insert our + /// proxy result object into the module analysis cache so that we can proxy + /// invalidation to the CGSCC analysis manager. + /// + /// In debug builds, it will also assert that the analysis manager is empty + /// as no queries should arrive at the CGSCC analysis manager prior to + /// this analysis being requested. + Result run(Module *M); + +private: + static char PassID; + + CGSCCAnalysisManager *CGAM; +}; + +/// \brief A CGSCC analysis which acts as a proxy for a module analysis +/// manager. +/// +/// This primarily provides an accessor to a parent module analysis manager to +/// CGSCC passes. Only the const interface of the module analysis manager is +/// provided to indicate that once inside of a CGSCC analysis pass you +/// cannot request a module analysis to actually run. Instead, the user must +/// rely on the \c getCachedResult API. +/// +/// This proxy *doesn't* manage the invalidation in any way. That is handled by +/// the recursive return path of each layer of the pass manager and the +/// returned PreservedAnalysis set. +class ModuleAnalysisManagerCGSCCProxy { +public: + /// \brief Result proxy object for \c ModuleAnalysisManagerCGSCCProxy. + class Result { + public: + explicit Result(const ModuleAnalysisManager &MAM) : MAM(&MAM) {} + // We have to explicitly define all the special member functions because + // MSVC refuses to generate them. + Result(const Result &Arg) : MAM(Arg.MAM) {} + Result(Result &&Arg) : MAM(std::move(Arg.MAM)) {} + Result &operator=(Result RHS) { + std::swap(MAM, RHS.MAM); + return *this; + } + + const ModuleAnalysisManager &getManager() const { return *MAM; } + + /// \brief Handle invalidation by ignoring it, this pass is immutable. + bool invalidate(LazyCallGraph::SCC *) { return false; } + + private: + const ModuleAnalysisManager *MAM; + }; + + static void *ID() { return (void *)&PassID; } + + ModuleAnalysisManagerCGSCCProxy(const ModuleAnalysisManager &MAM) + : MAM(&MAM) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + ModuleAnalysisManagerCGSCCProxy( + const ModuleAnalysisManagerCGSCCProxy &Arg) + : MAM(Arg.MAM) {} + ModuleAnalysisManagerCGSCCProxy(ModuleAnalysisManagerCGSCCProxy &&Arg) + : MAM(std::move(Arg.MAM)) {} + ModuleAnalysisManagerCGSCCProxy & + operator=(ModuleAnalysisManagerCGSCCProxy RHS) { + std::swap(MAM, RHS.MAM); + return *this; + } + + /// \brief Run the analysis pass and create our proxy result object. + /// Nothing to see here, it just forwards the \c MAM reference into the + /// result. + Result run(LazyCallGraph::SCC *) { return Result(*MAM); } + +private: + static char PassID; + + const ModuleAnalysisManager *MAM; +}; + +/// \brief The core module pass which does a post-order walk of the SCCs and +/// runs a CGSCC pass over each one. +/// +/// Designed to allow composition of a CGSCCPass(Manager) and +/// a ModulePassManager. Note that this pass must be run with a module analysis +/// manager as it uses the LazyCallGraph analysis. It will also run the +/// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC +/// pass over the module to enable a \c FunctionAnalysisManager to be used +/// within this run safely. +template <typename CGSCCPassT> class ModuleToPostOrderCGSCCPassAdaptor { +public: + explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) + : Pass(std::move(Pass)) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + ModuleToPostOrderCGSCCPassAdaptor( + const ModuleToPostOrderCGSCCPassAdaptor &Arg) + : Pass(Arg.Pass) {} + ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) + : Pass(std::move(Arg.Pass)) {} + friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, + ModuleToPostOrderCGSCCPassAdaptor &RHS) { + using std::swap; + swap(LHS.Pass, RHS.Pass); + } + ModuleToPostOrderCGSCCPassAdaptor & + operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { + swap(*this, RHS); + return *this; + } + + /// \brief Runs the CGSCC pass across every SCC in the module. + PreservedAnalyses run(Module *M, ModuleAnalysisManager *AM) { + assert(AM && "We need analyses to compute the call graph!"); + + // Setup the CGSCC analysis manager from its proxy. + CGSCCAnalysisManager &CGAM = + AM->getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); + + // Get the call graph for this module. + LazyCallGraph &CG = AM->getResult<LazyCallGraphAnalysis>(M); + + PreservedAnalyses PA = PreservedAnalyses::all(); + for (LazyCallGraph::SCC &C : CG.postorder_sccs()) { + PreservedAnalyses PassPA = Pass.run(&C, &CGAM); + + // We know that the CGSCC pass couldn't have invalidated any other + // SCC's analyses (that's the contract of a CGSCC pass), so + // directly handle the CGSCC analysis manager's invalidation here. + // FIXME: This isn't quite correct. We need to handle the case where the + // pass updated the CG, particularly some child of the current SCC, and + // invalidate its analyses. + CGAM.invalidate(&C, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + } + + // By definition we preserve the proxy. This precludes *any* invalidation + // of CGSCC analyses by the proxy, but that's OK because we've taken + // care to invalidate analyses in the CGSCC analysis manager + // incrementally above. + PA.preserve<CGSCCAnalysisManagerModuleProxy>(); + return PA; + } + + static StringRef name() { return "ModuleToPostOrderCGSCCPassAdaptor"; } + +private: + CGSCCPassT Pass; +}; + +/// \brief A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template <typename CGSCCPassT> +ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> +createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { + return std::move( + ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass))); +} + +/// \brief A CGSCC analysis which acts as a proxy for a function analysis +/// manager. +/// +/// This primarily proxies invalidation information from the CGSCC analysis +/// manager and CGSCC pass manager to a function analysis manager. You should +/// never use a function analysis manager from within (transitively) a CGSCC +/// pass manager unless your parent CGSCC pass has received a proxy result +/// object for it. +class FunctionAnalysisManagerCGSCCProxy { +public: + class Result { + public: + explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {} + // We have to explicitly define all the special member functions because + // MSVC refuses to generate them. + Result(const Result &Arg) : FAM(Arg.FAM) {} + Result(Result &&Arg) : FAM(std::move(Arg.FAM)) {} + Result &operator=(Result RHS) { + std::swap(FAM, RHS.FAM); + return *this; + } + ~Result(); + + /// \brief Accessor for the \c FunctionAnalysisManager. + FunctionAnalysisManager &getManager() { return *FAM; } + + /// \brief Handler for invalidation of the SCC. + /// + /// If this analysis itself is preserved, then we assume that the set of \c + /// Function objects in the \c SCC hasn't changed and thus we don't need + /// to invalidate *all* cached data associated with a \c Function* in the \c + /// FunctionAnalysisManager. + /// + /// Regardless of whether this analysis is marked as preserved, all of the + /// analyses in the \c FunctionAnalysisManager are potentially invalidated + /// based on the set of preserved analyses. + bool invalidate(LazyCallGraph::SCC *C, const PreservedAnalyses &PA); + + private: + FunctionAnalysisManager *FAM; + }; + + static void *ID() { return (void *)&PassID; } + + explicit FunctionAnalysisManagerCGSCCProxy(FunctionAnalysisManager &FAM) + : FAM(&FAM) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + FunctionAnalysisManagerCGSCCProxy( + const FunctionAnalysisManagerCGSCCProxy &Arg) + : FAM(Arg.FAM) {} + FunctionAnalysisManagerCGSCCProxy(FunctionAnalysisManagerCGSCCProxy &&Arg) + : FAM(std::move(Arg.FAM)) {} + FunctionAnalysisManagerCGSCCProxy & + operator=(FunctionAnalysisManagerCGSCCProxy RHS) { + std::swap(FAM, RHS.FAM); + return *this; + } + + /// \brief Run the analysis pass and create our proxy result object. + /// + /// This doesn't do any interesting work, it is primarily used to insert our + /// proxy result object into the module analysis cache so that we can proxy + /// invalidation to the function analysis manager. + /// + /// In debug builds, it will also assert that the analysis manager is empty + /// as no queries should arrive at the function analysis manager prior to + /// this analysis being requested. + Result run(LazyCallGraph::SCC *C); + +private: + static char PassID; + + FunctionAnalysisManager *FAM; +}; + +/// \brief A function analysis which acts as a proxy for a CGSCC analysis +/// manager. +/// +/// This primarily provides an accessor to a parent CGSCC analysis manager to +/// function passes. Only the const interface of the CGSCC analysis manager is +/// provided to indicate that once inside of a function analysis pass you +/// cannot request a CGSCC analysis to actually run. Instead, the user must +/// rely on the \c getCachedResult API. +/// +/// This proxy *doesn't* manage the invalidation in any way. That is handled by +/// the recursive return path of each layer of the pass manager and the +/// returned PreservedAnalysis set. +class CGSCCAnalysisManagerFunctionProxy { +public: + /// \brief Result proxy object for \c ModuleAnalysisManagerFunctionProxy. + class Result { + public: + explicit Result(const CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {} + // We have to explicitly define all the special member functions because + // MSVC refuses to generate them. + Result(const Result &Arg) : CGAM(Arg.CGAM) {} + Result(Result &&Arg) : CGAM(std::move(Arg.CGAM)) {} + Result &operator=(Result RHS) { + std::swap(CGAM, RHS.CGAM); + return *this; + } + + const CGSCCAnalysisManager &getManager() const { return *CGAM; } + + /// \brief Handle invalidation by ignoring it, this pass is immutable. + bool invalidate(Function *) { return false; } + + private: + const CGSCCAnalysisManager *CGAM; + }; + + static void *ID() { return (void *)&PassID; } + + CGSCCAnalysisManagerFunctionProxy(const CGSCCAnalysisManager &CGAM) + : CGAM(&CGAM) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCAnalysisManagerFunctionProxy( + const CGSCCAnalysisManagerFunctionProxy &Arg) + : CGAM(Arg.CGAM) {} + CGSCCAnalysisManagerFunctionProxy(CGSCCAnalysisManagerFunctionProxy &&Arg) + : CGAM(std::move(Arg.CGAM)) {} + CGSCCAnalysisManagerFunctionProxy & + operator=(CGSCCAnalysisManagerFunctionProxy RHS) { + std::swap(CGAM, RHS.CGAM); + return *this; + } + + /// \brief Run the analysis pass and create our proxy result object. + /// Nothing to see here, it just forwards the \c CGAM reference into the + /// result. + Result run(Function *) { return Result(*CGAM); } + +private: + static char PassID; + + const CGSCCAnalysisManager *CGAM; +}; + +/// \brief Adaptor that maps from a SCC to its functions. +/// +/// Designed to allow composition of a FunctionPass(Manager) and +/// a CGSCCPassManager. Note that if this pass is constructed with a pointer +/// to a \c CGSCCAnalysisManager it will run the +/// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function +/// pass over the SCC to enable a \c FunctionAnalysisManager to be used +/// within this run safely. +template <typename FunctionPassT> class CGSCCToFunctionPassAdaptor { +public: + explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) + : Pass(std::move(Pass)) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) + : Pass(Arg.Pass) {} + CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) + : Pass(std::move(Arg.Pass)) {} + friend void swap(CGSCCToFunctionPassAdaptor &LHS, CGSCCToFunctionPassAdaptor &RHS) { + using std::swap; + swap(LHS.Pass, RHS.Pass); + } + CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { + swap(*this, RHS); + return *this; + } + + /// \brief Runs the function pass across every function in the module. + PreservedAnalyses run(LazyCallGraph::SCC *C, CGSCCAnalysisManager *AM) { + FunctionAnalysisManager *FAM = nullptr; + if (AM) + // Setup the function analysis manager from its proxy. + FAM = &AM->getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); + + PreservedAnalyses PA = PreservedAnalyses::all(); + for (LazyCallGraph::Node *N : *C) { + PreservedAnalyses PassPA = Pass.run(&N->getFunction(), FAM); + + // We know that the function pass couldn't have invalidated any other + // function's analyses (that's the contract of a function pass), so + // directly handle the function analysis manager's invalidation here. + if (FAM) + FAM->invalidate(&N->getFunction(), PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + } + + // By definition we preserve the proxy. This precludes *any* invalidation + // of function analyses by the proxy, but that's OK because we've taken + // care to invalidate analyses in the function analysis manager + // incrementally above. + // FIXME: We need to update the call graph here to account for any deleted + // edges! + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + return PA; + } + + static StringRef name() { return "CGSCCToFunctionPassAdaptor"; } + +private: + FunctionPassT Pass; +}; + +/// \brief A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template <typename FunctionPassT> +CGSCCToFunctionPassAdaptor<FunctionPassT> +createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { + return std::move(CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass))); +} + +} + +#endif diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index 0018a56..09d45ca 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -36,15 +36,16 @@ namespace llvm { /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *ConstantFoldInstruction(Instruction *I, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0); +Constant *ConstantFoldInstruction(Instruction *I, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. Constant *ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI =nullptr); /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the /// specified operands. If successful, the constant result is returned, if not, @@ -54,8 +55,8 @@ Constant *ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare /// instruction (icmp/fcmp) with the specified operands. If it fails, it @@ -63,8 +64,8 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// Constant *ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI=nullptr); /// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue /// instruction with the specified operands and indices. The constant result is @@ -75,7 +76,8 @@ Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. -Constant *ConstantFoldLoadFromConstPtr(Constant *C, const DataLayout *TD = 0); +Constant *ConstantFoldLoadFromConstPtr(Constant *C, + const DataLayout *TD = nullptr); /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the @@ -96,7 +98,7 @@ bool canConstantFoldCallTo(const Function *F); /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, - const TargetLibraryInfo *TLI = 0); + const TargetLibraryInfo *TLI = nullptr); } #endif diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index ff3392a..53c832c 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" namespace llvm { diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index a142828..279755e 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -73,8 +73,8 @@ namespace llvm { Instruction *Destination) : Src(Source), Dst(Destination), - NextPredecessor(NULL), - NextSuccessor(NULL) {} + NextPredecessor(nullptr), + NextSuccessor(nullptr) {} virtual ~Dependence() {} /// Dependence::DVEntry - Each level in the distance/direction vector @@ -96,7 +96,7 @@ namespace llvm { bool Splitable : 1; // Splitting the loop will break dependence. const SCEV *Distance; // NULL implies no distance available. DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false), - PeelLast(false), Splitable(false), Distance(NULL) { } + PeelLast(false), Splitable(false), Distance(nullptr) { } }; /// getSrc - Returns the source instruction for this dependence. @@ -154,7 +154,7 @@ namespace llvm { /// getDistance - Returns the distance (or NULL) associated with a /// particular level. - virtual const SCEV *getDistance(unsigned Level) const { return NULL; } + virtual const SCEV *getDistance(unsigned Level) const { return nullptr; } /// isPeelFirst - Returns true if peeling the first iteration from /// this loop will break this dependence. @@ -910,7 +910,8 @@ namespace llvm { const Constraint &CurConstraint) const; bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl<Subscript> &Pair) const; + SmallVectorImpl<Subscript> &Pair, + const SCEV *ElementSize) const; public: static char ID; // Class identification, replacement for typeinfo @@ -921,7 +922,7 @@ namespace llvm { bool runOnFunction(Function &F) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; - void print(raw_ostream &, const Module * = 0) const override; + void print(raw_ostream &, const Module * = nullptr) const override; }; // class DependenceAnalysis /// createDependenceAnalysisPass - This creates an instance of the diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h index 4dcea2d..0fbaa13 100644 --- a/include/llvm/Analysis/DominanceFrontier.h +++ b/include/llvm/Analysis/DominanceFrontier.h @@ -142,7 +142,7 @@ public: /// print - Convert to human readable form /// - void print(raw_ostream &OS, const Module* = 0) const override; + void print(raw_ostream &OS, const Module* = nullptr) const override; /// dump - Dump the dominance frontier to dbgs(). void dump() const; diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index c6bb494..6038872 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -169,7 +169,7 @@ public: return Processed.count(Inst); } - void print(raw_ostream &OS, const Module* = 0) const override; + void print(raw_ostream &OS, const Module* = nullptr) const override; /// dump - This method is used for debugging. void dump() const; diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 775d0df..2367c0b 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -48,160 +48,166 @@ namespace llvm { /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// Given operands for an FMul, see if we can fold the result. If not, this /// returns null. Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. - Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. - Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifySDivInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyUDivInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyFDivInst - Given operands for an FDiv, see if we can /// fold the result. If not, this returns null. - Value *SimplifyFDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyFDivInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifySRemInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. - Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyURemInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyFRemInst - Given operands for an FRem, see if we can /// fold the result. If not, this returns null. - Value *SimplifyFRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyFRemInst(Value *LHS, Value *RHS, + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyShlInst - Given operands for a Shl, see if we can /// fold the result. If not, this returns null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyLShrInst - Given operands for a LShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyAShrInst - Given operands for a AShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. - Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. - Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. - Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. - Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold /// the result. If not, this returns null. - Value *SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); //=== Helper functions for higher up the class hierarchy. @@ -209,40 +215,40 @@ namespace llvm { /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// \brief Given a function and iterators over arguments, see if we can fold /// the result. /// /// If this call could not be simplified returns null. Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + User::op_iterator ArgEnd, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// \brief Given a function and set of arguments, see if we can fold the /// result. /// /// If this call could not be simplified returns null. Value *SimplifyCall(Value *V, ArrayRef<Value *> Args, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. - Value *SimplifyInstruction(Instruction *I, const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + Value *SimplifyInstruction(Instruction *I, const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// \brief Replace all uses of 'I' with 'SimpleV' and simplify the uses @@ -254,9 +260,9 @@ namespace llvm { /// /// The function returns true if any simplifications were performed. bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); /// \brief Recursively attempt to simplify an instruction. /// @@ -265,9 +271,9 @@ namespace llvm { /// of the users impacted. It returns true if any simplifications were /// performed. bool recursivelySimplifyInstruction(Instruction *I, - const DataLayout *TD = 0, - const TargetLibraryInfo *TLI = 0, - const DominatorTree *DT = 0); + const DataLayout *TD = nullptr, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr); } // end namespace llvm #endif diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h index 05248bd..274be2b 100644 --- a/include/llvm/Analysis/IntervalPartition.h +++ b/include/llvm/Analysis/IntervalPartition.h @@ -48,7 +48,7 @@ class IntervalPartition : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - IntervalPartition() : FunctionPass(ID), RootInterval(0) { + IntervalPartition() : FunctionPass(ID), RootInterval(nullptr) { initializeIntervalPartitionPass(*PassRegistry::getPassRegistry()); } @@ -62,7 +62,7 @@ public: IntervalPartition(IntervalPartition &I, bool); // print - Show contents in human readable format... - void print(raw_ostream &O, const Module* = 0) const override; + void print(raw_ostream &O, const Module* = nullptr) const override; // getRootInterval() - Return the root interval that contains the starting // block of the function. @@ -77,7 +77,7 @@ public: // getBlockInterval - Return the interval that a basic block exists in. inline Interval *getBlockInterval(BasicBlock *BB) { IntervalMapTy::iterator I = IntervalMap.find(BB); - return I != IntervalMap.end() ? I->second : 0; + return I != IntervalMap.end() ? I->second : nullptr; } // getAnalysisUsage - Implement the Pass API diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 74b0c8e..70a4df5 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -38,8 +38,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -100,6 +103,7 @@ class raw_ostream; class LazyCallGraph { public: class Node; + class SCC; typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT; typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT; @@ -109,67 +113,271 @@ public: /// be scanned for "calls" or uses of functions and its child information /// will be constructed. All of these results are accumulated and cached in /// the graph. - class iterator : public std::iterator<std::bidirectional_iterator_tag, Node *, - ptrdiff_t, Node *, Node *> { + class iterator + : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator, + std::forward_iterator_tag, Node> { friend class LazyCallGraph; friend class LazyCallGraph::Node; - typedef std::iterator<std::bidirectional_iterator_tag, Node *, ptrdiff_t, - Node *, Node *> BaseT; - /// \brief Nonce type to select the constructor for the end iterator. - struct IsAtEndT {}; - - LazyCallGraph &G; - NodeVectorImplT::iterator NI; + LazyCallGraph *G; + NodeVectorImplT::iterator E; - // Build the begin iterator for a node. - explicit iterator(LazyCallGraph &G, NodeVectorImplT &Nodes) - : G(G), NI(Nodes.begin()) {} - - // Build the end iterator for a node. This is selected purely by overload. - iterator(LazyCallGraph &G, NodeVectorImplT &Nodes, IsAtEndT /*Nonce*/) - : G(G), NI(Nodes.end()) {} + // Build the iterator for a specific position in a node list. + iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI, + NodeVectorImplT::iterator E) + : iterator_adaptor_base(NI), G(&G), E(E) { + while (I != E && I->isNull()) + ++I; + } public: - iterator(const iterator &Arg) : G(Arg.G), NI(Arg.NI) {} - iterator(iterator &&Arg) : G(Arg.G), NI(std::move(Arg.NI)) {} - iterator &operator=(iterator Arg) { - std::swap(Arg, *this); + iterator() {} + + using iterator_adaptor_base::operator++; + iterator &operator++() { + do { + ++I; + } while (I != E && I->isNull()); return *this; } - bool operator==(const iterator &Arg) { return NI == Arg.NI; } - bool operator!=(const iterator &Arg) { return !operator==(Arg); } - reference operator*() const { - if (NI->is<Node *>()) - return NI->get<Node *>(); + if (I->is<Node *>()) + return *I->get<Node *>(); - Function *F = NI->get<Function *>(); - Node *ChildN = G.get(*F); - *NI = ChildN; + Function *F = I->get<Function *>(); + Node &ChildN = G->get(*F); + *I = &ChildN; return ChildN; } - pointer operator->() const { return operator*(); } + }; - iterator &operator++() { - ++NI; - return *this; + /// \brief A node in the call graph. + /// + /// This represents a single node. It's primary roles are to cache the list of + /// callees, de-duplicate and provide fast testing of whether a function is + /// a callee, and facilitate iteration of child nodes in the graph. + class Node { + friend class LazyCallGraph; + friend class LazyCallGraph::SCC; + + LazyCallGraph *G; + Function &F; + + // We provide for the DFS numbering and Tarjan walk lowlink numbers to be + // stored directly within the node. + int DFSNumber; + int LowLink; + + mutable NodeVectorT Callees; + DenseMap<Function *, size_t> CalleeIndexMap; + + /// \brief Basic constructor implements the scanning of F into Callees and + /// CalleeIndexMap. + Node(LazyCallGraph &G, Function &F); + + /// \brief Internal helper to insert a callee. + void insertEdgeInternal(Function &Callee); + + /// \brief Internal helper to insert a callee. + void insertEdgeInternal(Node &CalleeN); + + /// \brief Internal helper to remove a callee from this node. + void removeEdgeInternal(Function &Callee); + + public: + typedef LazyCallGraph::iterator iterator; + + Function &getFunction() const { + return F; + }; + + iterator begin() const { + return iterator(*G, Callees.begin(), Callees.end()); } - iterator operator++(int) { - iterator prev = *this; - ++*this; - return prev; + iterator end() const { return iterator(*G, Callees.end(), Callees.end()); } + + /// Equality is defined as address equality. + bool operator==(const Node &N) const { return this == &N; } + bool operator!=(const Node &N) const { return !operator==(N); } + }; + + /// \brief An SCC of the call graph. + /// + /// This represents a Strongly Connected Component of the call graph as + /// a collection of call graph nodes. While the order of nodes in the SCC is + /// stable, it is not any particular order. + class SCC { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + LazyCallGraph *G; + SmallPtrSet<SCC *, 1> ParentSCCs; + SmallVector<Node *, 1> Nodes; + + SCC(LazyCallGraph &G) : G(&G) {} + + void insert(Node &N); + + void + internalDFS(SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack, + SmallVectorImpl<Node *> &PendingSCCStack, Node *N, + SmallVectorImpl<SCC *> &ResultSCCs); + + public: + typedef SmallVectorImpl<Node *>::const_iterator iterator; + typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator> parent_iterator; + + iterator begin() const { return Nodes.begin(); } + iterator end() const { return Nodes.end(); } + + parent_iterator parent_begin() const { return ParentSCCs.begin(); } + parent_iterator parent_end() const { return ParentSCCs.end(); } + + iterator_range<parent_iterator> parents() const { + return iterator_range<parent_iterator>(parent_begin(), parent_end()); } - iterator &operator--() { - --NI; - return *this; + /// \brief Test if this SCC is a parent of \a C. + bool isParentOf(const SCC &C) const { return C.isChildOf(*this); } + + /// \brief Test if this SCC is an ancestor of \a C. + bool isAncestorOf(const SCC &C) const { return C.isDescendantOf(*this); } + + /// \brief Test if this SCC is a child of \a C. + bool isChildOf(const SCC &C) const { + return ParentSCCs.count(const_cast<SCC *>(&C)); } - iterator operator--(int) { - iterator next = *this; - --*this; - return next; + + /// \brief Test if this SCC is a descendant of \a C. + bool isDescendantOf(const SCC &C) const; + + ///@{ + /// \name Mutation API + /// + /// These methods provide the core API for updating the call graph in the + /// presence of a (potentially still in-flight) DFS-found SCCs. + /// + /// Note that these methods sometimes have complex runtimes, so be careful + /// how you call them. + + /// \brief Insert an edge from one node in this SCC to another in this SCC. + /// + /// By the definition of an SCC, this does not change the nature or make-up + /// of any SCCs. + void insertIntraSCCEdge(Node &CallerN, Node &CalleeN); + + /// \brief Insert an edge whose tail is in this SCC and head is in some + /// child SCC. + /// + /// There must be an existing path from the caller to the callee. This + /// operation is inexpensive and does not change the set of SCCs in the + /// graph. + void insertOutgoingEdge(Node &CallerN, Node &CalleeN); + + /// \brief Insert an edge whose tail is in a descendant SCC and head is in + /// this SCC. + /// + /// There must be an existing path from the callee to the caller in this + /// case. NB! This is has the potential to be a very expensive function. It + /// inherently forms a cycle in the prior SCC DAG and we have to merge SCCs + /// to resolve that cycle. But finding all of the SCCs which participate in + /// the cycle can in the worst case require traversing every SCC in the + /// graph. Every attempt is made to avoid that, but passes must still + /// exercise caution calling this routine repeatedly. + /// + /// FIXME: We could possibly optimize this quite a bit for cases where the + /// caller and callee are very nearby in the graph. See comments in the + /// implementation for details, but that use case might impact users. + SmallVector<SCC *, 1> insertIncomingEdge(Node &CallerN, Node &CalleeN); + + /// \brief Remove an edge whose source is in this SCC and target is *not*. + /// + /// This removes an inter-SCC edge. All inter-SCC edges originating from + /// this SCC have been fully explored by any in-flight DFS SCC formation, + /// so this is always safe to call once you have the source SCC. + /// + /// This operation does not change the set of SCCs or the members of the + /// SCCs and so is very inexpensive. It may change the connectivity graph + /// of the SCCs though, so be careful calling this while iterating over + /// them. + void removeInterSCCEdge(Node &CallerN, Node &CalleeN); + + /// \brief Remove an edge which is entirely within this SCC. + /// + /// Both the \a Caller and the \a Callee must be within this SCC. Removing + /// such an edge make break cycles that form this SCC and thus this + /// operation may change the SCC graph significantly. In particular, this + /// operation will re-form new SCCs based on the remaining connectivity of + /// the graph. The following invariants are guaranteed to hold after + /// calling this method: + /// + /// 1) This SCC is still an SCC in the graph. + /// 2) This SCC will be the parent of any new SCCs. Thus, this SCC is + /// preserved as the root of any new SCC directed graph formed. + /// 3) No SCC other than this SCC has its member set changed (this is + /// inherent in the definition of removing such an edge). + /// 4) All of the parent links of the SCC graph will be updated to reflect + /// the new SCC structure. + /// 5) All SCCs formed out of this SCC, excluding this SCC, will be + /// returned in a vector. + /// 6) The order of the SCCs in the vector will be a valid postorder + /// traversal of the new SCCs. + /// + /// These invariants are very important to ensure that we can build + /// optimization pipeliens on top of the CGSCC pass manager which + /// intelligently update the SCC graph without invalidating other parts of + /// the SCC graph. + /// + /// The runtime complexity of this method is, in the worst case, O(V+E) + /// where V is the number of nodes in this SCC and E is the number of edges + /// leaving the nodes in this SCC. Note that E includes both edges within + /// this SCC and edges from this SCC to child SCCs. Some effort has been + /// made to minimize the overhead of common cases such as self-edges and + /// edge removals which result in a spanning tree with no more cycles. + SmallVector<SCC *, 1> removeIntraSCCEdge(Node &CallerN, Node &CalleeN); + + ///@} + }; + + /// \brief A post-order depth-first SCC iterator over the call graph. + /// + /// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for + /// the call graph, walking it lazily in depth-first post-order. That is, it + /// always visits SCCs for a callee prior to visiting the SCC for a caller + /// (when they are in different SCCs). + class postorder_scc_iterator + : public iterator_facade_base<postorder_scc_iterator, + std::forward_iterator_tag, SCC> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + /// \brief Nonce type to select the constructor for the end iterator. + struct IsAtEndT {}; + + LazyCallGraph *G; + SCC *C; + + // Build the begin iterator for a node. + postorder_scc_iterator(LazyCallGraph &G) : G(&G) { + C = G.getNextSCCInPostOrder(); + } + + // Build the end iterator for a node. This is selected purely by overload. + postorder_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) + : G(&G), C(nullptr) {} + + public: + bool operator==(const postorder_scc_iterator &Arg) const { + return G == Arg.G && C == Arg.C; + } + + reference operator*() const { return *C; } + + using iterator_facade_base::operator++; + postorder_scc_iterator &operator++() { + C = G->getNextSCCInPostOrder(); + return *this; } }; @@ -180,44 +388,75 @@ public: /// requested during traversal. LazyCallGraph(Module &M); - /// \brief Copy constructor. - /// - /// This does a deep copy of the graph. It does no verification that the - /// graph remains valid for the module. It is also relatively expensive. - LazyCallGraph(const LazyCallGraph &G); - - /// \brief Move constructor. - /// - /// This is a deep move. It leaves G in an undefined but destroyable state. - /// Any other operation on G is likely to fail. LazyCallGraph(LazyCallGraph &&G); + LazyCallGraph &operator=(LazyCallGraph &&RHS); + + iterator begin() { + return iterator(*this, EntryNodes.begin(), EntryNodes.end()); + } + iterator end() { return iterator(*this, EntryNodes.end(), EntryNodes.end()); } - /// \brief Copy and move assignment. - LazyCallGraph &operator=(LazyCallGraph RHS) { - std::swap(*this, RHS); - return *this; + postorder_scc_iterator postorder_scc_begin() { + return postorder_scc_iterator(*this); + } + postorder_scc_iterator postorder_scc_end() { + return postorder_scc_iterator(*this, postorder_scc_iterator::IsAtEndT()); } - iterator begin() { return iterator(*this, EntryNodes); } - iterator end() { return iterator(*this, EntryNodes, iterator::IsAtEndT()); } + iterator_range<postorder_scc_iterator> postorder_sccs() { + return iterator_range<postorder_scc_iterator>(postorder_scc_begin(), + postorder_scc_end()); + } /// \brief Lookup a function in the graph which has already been scanned and /// added. Node *lookup(const Function &F) const { return NodeMap.lookup(&F); } + /// \brief Lookup a function's SCC in the graph. + /// + /// \returns null if the function hasn't been assigned an SCC via the SCC + /// iterator walk. + SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); } + /// \brief Get a graph node for a given function, scanning it to populate the /// graph data as necessary. - Node *get(Function &F) { + Node &get(Function &F) { Node *&N = NodeMap[&F]; if (N) - return N; + return *N; return insertInto(F, N); } -private: - Module &M; + ///@{ + /// \name Pre-SCC Mutation API + /// + /// These methods are only valid to call prior to forming any SCCs for this + /// call graph. They can be used to update the core node-graph during + /// a node-based inorder traversal that precedes any SCC-based traversal. + /// + /// Once you begin manipulating a call graph's SCCs, you must perform all + /// mutation of the graph via the SCC methods. + + /// \brief Update the call graph after inserting a new edge. + void insertEdge(Node &Caller, Function &Callee); + + /// \brief Update the call graph after inserting a new edge. + void insertEdge(Function &Caller, Function &Callee) { + return insertEdge(get(Caller), Callee); + } + + /// \brief Update the call graph after deleting an edge. + void removeEdge(Node &Caller, Function &Callee); + + /// \brief Update the call graph after deleting an edge. + void removeEdge(Function &Caller, Function &Callee) { + return removeEdge(get(Caller), Callee); + } + + ///@} +private: /// \brief Allocator that holds all the call graph nodes. SpecificBumpPtrAllocator<Node> BPA; @@ -230,56 +469,46 @@ private: /// escape at the module scope. NodeVectorT EntryNodes; - /// \brief Set of the entry nodes to the graph. - SmallPtrSet<Function *, 4> EntryNodeSet; - - /// \brief Helper to insert a new function, with an already looked-up entry in - /// the NodeMap. - Node *insertInto(Function &F, Node *&MappedN); + /// \brief Map of the entry nodes in the graph to their indices in + /// \c EntryNodes. + DenseMap<Function *, size_t> EntryIndexMap; - /// \brief Helper to copy a node from another graph into this one. - Node *copyInto(const Node &OtherN); + /// \brief Allocator that holds all the call graph SCCs. + SpecificBumpPtrAllocator<SCC> SCCBPA; - /// \brief Helper to move a node from another graph into this one. - Node *moveInto(Node &&OtherN); -}; + /// \brief Maps Function -> SCC for fast lookup. + DenseMap<Node *, SCC *> SCCMap; -/// \brief A node in the call graph. -/// -/// This represents a single node. It's primary roles are to cache the list of -/// callees, de-duplicate and provide fast testing of whether a function is -/// a callee, and facilitate iteration of child nodes in the graph. -class LazyCallGraph::Node { - friend class LazyCallGraph; + /// \brief The leaf SCCs of the graph. + /// + /// These are all of the SCCs which have no children. + SmallVector<SCC *, 4> LeafSCCs; - LazyCallGraph &G; - Function &F; - mutable NodeVectorT Callees; - SmallPtrSet<Function *, 4> CalleeSet; + /// \brief Stack of nodes in the DFS walk. + SmallVector<std::pair<Node *, iterator>, 4> DFSStack; - /// \brief Basic constructor implements the scanning of F into Callees and - /// CalleeSet. - Node(LazyCallGraph &G, Function &F); + /// \brief Set of entry nodes not-yet-processed into SCCs. + SmallVector<Function *, 4> SCCEntryNodes; - /// \brief Constructor used when copying a node from one graph to another. - Node(LazyCallGraph &G, const Node &OtherN); + /// \brief Stack of nodes the DFS has walked but not yet put into a SCC. + SmallVector<Node *, 4> PendingSCCStack; - /// \brief Constructor used when moving a node from one graph to another. - Node(LazyCallGraph &G, Node &&OtherN); + /// \brief Counter for the next DFS number to assign. + int NextDFSNumber; -public: - typedef LazyCallGraph::iterator iterator; + /// \brief Helper to insert a new function, with an already looked-up entry in + /// the NodeMap. + Node &insertInto(Function &F, Node *&MappedN); - Function &getFunction() const { - return F; - }; + /// \brief Helper to update pointers back to the graph object during moves. + void updateGraphPtrs(); - iterator begin() const { return iterator(G, Callees); } - iterator end() const { return iterator(G, Callees, iterator::IsAtEndT()); } + /// \brief Helper to form a new SCC out of the top of a DFSStack-like + /// structure. + SCC *formSCC(Node *RootN, SmallVectorImpl<Node *> &NodeStack); - /// Equality is defined as address equality. - bool operator==(const Node &N) const { return this == &N; } - bool operator!=(const Node &N) const { return !operator==(N); } + /// \brief Retrieve the next node in the post-order SCC walk of the call graph. + SCC *getNextSCCInPostOrder(); }; // Provide GraphTraits specializations for call graphs. diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index a4cb806..2fe7386 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -33,10 +33,10 @@ class LazyValueInfo : public FunctionPass { void operator=(const LazyValueInfo&) LLVM_DELETED_FUNCTION; public: static char ID; - LazyValueInfo() : FunctionPass(ID), PImpl(0) { + LazyValueInfo() : FunctionPass(ID), PImpl(nullptr) { initializeLazyValueInfoPass(*PassRegistry::getPassRegistry()); } - ~LazyValueInfo() { assert(PImpl == 0 && "releaseMemory not called"); } + ~LazyValueInfo() { assert(!PImpl && "releaseMemory not called"); } /// Tristate - This is used to return true/false/dunno results. enum Tristate { diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h index 481015e..4c03c92 100644 --- a/include/llvm/Analysis/LibCallAliasAnalysis.h +++ b/include/llvm/Analysis/LibCallAliasAnalysis.h @@ -27,7 +27,7 @@ namespace llvm { LibCallInfo *LCI; - explicit LibCallAliasAnalysis(LibCallInfo *LC = 0) + explicit LibCallAliasAnalysis(LibCallInfo *LC = nullptr) : FunctionPass(ID), LCI(LC) { initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry()); } diff --git a/include/llvm/Analysis/LibCallSemantics.h b/include/llvm/Analysis/LibCallSemantics.h index 0f0bc23..8bd747f 100644 --- a/include/llvm/Analysis/LibCallSemantics.h +++ b/include/llvm/Analysis/LibCallSemantics.h @@ -130,7 +130,7 @@ namespace llvm { mutable const LibCallLocationInfo *Locations; mutable unsigned NumLocations; public: - LibCallInfo() : Impl(0), Locations(0), NumLocations(0) {} + LibCallInfo() : Impl(nullptr), Locations(nullptr), NumLocations(0) {} virtual ~LibCallInfo(); //===------------------------------------------------------------------===// diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index ebcb762..25c5928 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -27,7 +27,8 @@ class MDNode; /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const DataLayout *TD = 0); + unsigned Align, + const DataLayout *TD = nullptr); /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at /// the instruction before ScanFrom) checking to see if we have the value at @@ -49,8 +50,8 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = 6, - AliasAnalysis *AA = 0, - MDNode **TBAATag = 0); + AliasAnalysis *AA = nullptr, + MDNode **TBAATag = nullptr); } diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index aeeea3c..bef03e9 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -79,7 +79,7 @@ class LoopBase { operator=(const LoopBase<BlockT, LoopT> &) LLVM_DELETED_FUNCTION; public: /// Loop ctor - This creates an empty loop. - LoopBase() : ParentLoop(0) {} + LoopBase() : ParentLoop(nullptr) {} ~LoopBase() { for (size_t i = 0, e = SubLoops.size(); i != e; ++i) delete SubLoops[i]; @@ -106,7 +106,7 @@ public: /// bool contains(const LoopT *L) const { if (L == this) return true; - if (L == 0) return false; + if (!L) return false; return contains(L->getParentLoop()); } @@ -265,7 +265,7 @@ public: /// updates the loop depth of the new child. /// void addChildLoop(LoopT *NewChild) { - assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); + assert(!NewChild->ParentLoop && "NewChild already has a parent!"); NewChild->ParentLoop = static_cast<LoopT *>(this); SubLoops.push_back(NewChild); } @@ -278,7 +278,7 @@ public: LoopT *Child = *I; assert(Child->ParentLoop == this && "Child is not a child of this loop!"); SubLoops.erase(SubLoops.begin()+(I-begin())); - Child->ParentLoop = 0; + Child->ParentLoop = nullptr; return Child; } @@ -333,7 +333,7 @@ public: protected: friend class LoopInfoBase<BlockT, LoopT>; - explicit LoopBase(BlockT *BB) : ParentLoop(0) { + explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) { Blocks.push_back(BB); DenseBlockSet.insert(BB); } @@ -372,7 +372,7 @@ public: /// If null, the terminator of the loop preheader is used. /// bool makeLoopInvariant(Value *V, bool &Changed, - Instruction *InsertPt = 0) const; + Instruction *InsertPt = nullptr) const; /// makeLoopInvariant - If the given instruction is inside of the /// loop and it can be hoisted, do so to make it trivially loop-invariant. @@ -384,7 +384,7 @@ public: /// If null, the terminator of the loop preheader is used. /// bool makeLoopInvariant(Instruction *I, bool &Changed, - Instruction *InsertPt = 0) const; + Instruction *InsertPt = nullptr) const; /// getCanonicalInductionVariable - Check to see if the loop has a canonical /// induction variable: an integer recurrence that starts at 0 and increments @@ -453,6 +453,31 @@ public: void dump() const; + /// \brief Return the debug location of the start of this loop. + /// This looks for a BB terminating instruction with a known debug + /// location by looking at the preheader and header blocks. If it + /// cannot find a terminating instruction with location information, + /// it returns an unknown location. + DebugLoc getStartLoc() const { + DebugLoc StartLoc; + BasicBlock *HeadBB; + + // Try the pre-header first. + if ((HeadBB = getLoopPreheader()) != nullptr) { + StartLoc = HeadBB->getTerminator()->getDebugLoc(); + if (!StartLoc.isUnknown()) + return StartLoc; + } + + // If we have no pre-header or there are no instructions with debug + // info in it, try the header. + HeadBB = getHeader(); + if (HeadBB) + StartLoc = HeadBB->getTerminator()->getDebugLoc(); + + return StartLoc; + } + private: friend class LoopInfoBase<BasicBlock, Loop>; explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {} @@ -531,7 +556,7 @@ public: LoopT *removeLoop(iterator I) { assert(I != end() && "Cannot remove end iterator!"); LoopT *L = *I; - assert(L->getParentLoop() == 0 && "Not a top-level loop!"); + assert(!L->getParentLoop() && "Not a top-level loop!"); TopLevelLoops.erase(TopLevelLoops.begin() + (I-begin())); return L; } @@ -555,14 +580,14 @@ public: std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop); assert(I != TopLevelLoops.end() && "Old loop not at top level!"); *I = NewLoop; - assert(NewLoop->ParentLoop == 0 && OldLoop->ParentLoop == 0 && + assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop && "Loops already embedded into a subloop!"); } /// addTopLevelLoop - This adds the specified loop to the collection of /// top-level loops. void addTopLevelLoop(LoopT *New) { - assert(New->getParentLoop() == 0 && "Loop already in subloop!"); + assert(!New->getParentLoop() && "Loop already in subloop!"); TopLevelLoops.push_back(New); } @@ -583,7 +608,7 @@ public: static bool isNotAlreadyContainedIn(const LoopT *SubLoop, const LoopT *ParentLoop) { - if (SubLoop == 0) return true; + if (!SubLoop) return true; if (SubLoop == ParentLoop) return false; return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop); } @@ -660,7 +685,7 @@ public: void releaseMemory() override { LI.releaseMemory(); } - void print(raw_ostream &O, const Module* M = 0) const override; + void print(raw_ostream &O, const Module* M = nullptr) const override; void getAnalysisUsage(AnalysisUsage &AU) const override; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index dd2dc28..948be0f 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -53,7 +53,7 @@ BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const { getExitingBlocks(ExitingBlocks); if (ExitingBlocks.size() == 1) return ExitingBlocks[0]; - return 0; + return nullptr; } /// getExitBlocks - Return all of the successor blocks of this loop. These @@ -80,7 +80,7 @@ BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const { getExitBlocks(ExitBlocks); if (ExitBlocks.size() == 1) return ExitBlocks[0]; - return 0; + return nullptr; } /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). @@ -108,14 +108,14 @@ template<class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { // Keep track of nodes outside the loop branching to the header... BlockT *Out = getLoopPredecessor(); - if (!Out) return 0; + if (!Out) return nullptr; // Make sure there is only one exit out of the preheader. typedef GraphTraits<BlockT*> BlockTraits; typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); ++SI; if (SI != BlockTraits::child_end(Out)) - return 0; // Multiple exits from the block, must not be a preheader. + return nullptr; // Multiple exits from the block, must not be a preheader. // The predecessor has exactly one successor, so it is a preheader. return Out; @@ -129,7 +129,7 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { template<class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { // Keep track of nodes outside the loop branching to the header... - BlockT *Out = 0; + BlockT *Out = nullptr; // Loop over the predecessors of the header node... BlockT *Header = getHeader(); @@ -140,7 +140,7 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { typename InvBlockTraits::NodeType *N = *PI; if (!contains(N)) { // If the block is not in the loop... if (Out && Out != N) - return 0; // Multiple predecessors outside the loop + return nullptr; // Multiple predecessors outside the loop Out = N; } } @@ -160,11 +160,11 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const { InvBlockTraits::child_begin(Header); typename InvBlockTraits::ChildIteratorType PE = InvBlockTraits::child_end(Header); - BlockT *Latch = 0; + BlockT *Latch = nullptr; for (; PI != PE; ++PI) { typename InvBlockTraits::NodeType *N = *PI; if (contains(N)) { - if (Latch) return 0; + if (Latch) return nullptr; Latch = N; } } @@ -188,7 +188,7 @@ addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { assert((Blocks.empty() || LIB[getHeader()] == this) && "Incorrect LI specified for this loop!"); assert(NewBB && "Cannot add a null basic block to the loop!"); - assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!"); + assert(!LIB[NewBB] && "BasicBlock already in the loop!"); LoopT *L = static_cast<LoopT *>(this); @@ -210,12 +210,12 @@ template<class BlockT, class LoopT> void LoopBase<BlockT, LoopT>:: replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild) { assert(OldChild->ParentLoop == this && "This loop is already broken!"); - assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); + assert(!NewChild->ParentLoop && "NewChild already has a parent!"); typename std::vector<LoopT *>::iterator I = std::find(SubLoops.begin(), SubLoops.end(), OldChild); assert(I != SubLoops.end() && "OldChild not in loop!"); *I = NewChild; - OldChild->ParentLoop = 0; + OldChild->ParentLoop = nullptr; NewChild->ParentLoop = static_cast<LoopT *>(this); } @@ -270,11 +270,10 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const { // though it is permitted if the predecessor is not itself actually // reachable. BlockT *EntryBB = BB->getParent()->begin(); - for (df_iterator<BlockT *> NI = df_begin(EntryBB), - NE = df_end(EntryBB); NI != NE; ++NI) - for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) - assert(*NI != OutsideLoopPreds[i] && - "Loop has multiple entry points!"); + for (BlockT *CB : depth_first(EntryBB)) + for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) + assert(CB != OutsideLoopPreds[i] && + "Loop has multiple entry points!"); } assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index ff4bc22..d414680 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -233,7 +233,7 @@ class ObjectSizeOffsetEvaluator bool RoundToAlign; SizeOffsetEvalType unknown() { - return std::make_pair((Value*)0, (Value*)0); + return std::make_pair(nullptr, nullptr); } SizeOffsetEvalType compute_(Value *V); diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 123d435..1c4441b 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -97,7 +97,7 @@ namespace llvm { PairTy Value; explicit MemDepResult(PairTy V) : Value(V) {} public: - MemDepResult() : Value(0, Invalid) {} + MemDepResult() : Value(nullptr, Invalid) {} /// get methods: These are static ctor methods for creating various /// MemDepResult kinds. @@ -155,7 +155,7 @@ namespace llvm { /// getInst() - If this is a normal dependency, return the instruction that /// is depended on. Otherwise, return null. Instruction *getInst() const { - if (Value.getInt() == Other) return NULL; + if (Value.getInt() == Other) return nullptr; return Value.getPointer(); } @@ -285,7 +285,8 @@ namespace llvm { /// pointer. May be null if there are no tags or conflicting tags. const MDNode *TBAATag; - NonLocalPointerInfo() : Size(AliasAnalysis::UnknownSize), TBAATag(0) {} + NonLocalPointerInfo() + : Size(AliasAnalysis::UnknownSize), TBAATag(nullptr) {} }; /// CachedNonLocalPointerInfo - This map stores the cached results of doing @@ -401,7 +402,7 @@ namespace llvm { bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, - Instruction *QueryInst = 0); + Instruction *QueryInst = nullptr); /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h index 6d70edd..69f5907 100644 --- a/include/llvm/Analysis/PHITransAddr.h +++ b/include/llvm/Analysis/PHITransAddr.h @@ -45,7 +45,8 @@ class PHITransAddr { /// InstInputs - The inputs for our symbolic address. SmallVector<Instruction*, 4> InstInputs; public: - PHITransAddr(Value *addr, const DataLayout *DL) : Addr(addr), DL(DL), TLI(0) { + PHITransAddr(Value *addr, const DataLayout *DL) + : Addr(addr), DL(DL), TLI(nullptr) { // If the address is an instruction, the whole thing is considered an input. if (Instruction *I = dyn_cast<Instruction>(Addr)) InstInputs.push_back(I); diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h index 572d5d7..6e61fc3 100644 --- a/include/llvm/Analysis/PtrUseVisitor.h +++ b/include/llvm/Analysis/PtrUseVisitor.h @@ -48,13 +48,13 @@ public: /// analysis and whether the visit completed or aborted early. class PtrInfo { public: - PtrInfo() : AbortedInfo(0, false), EscapedInfo(0, false) {} + PtrInfo() : AbortedInfo(nullptr, false), EscapedInfo(nullptr, false) {} /// \brief Reset the pointer info, clearing all state. void reset() { - AbortedInfo.setPointer(0); + AbortedInfo.setPointer(nullptr); AbortedInfo.setInt(false); - EscapedInfo.setPointer(0); + EscapedInfo.setPointer(nullptr); EscapedInfo.setInt(false); } @@ -76,14 +76,14 @@ public: /// \brief Mark the visit as aborted. Intended for use in a void return. /// \param I The instruction which caused the visit to abort, if available. - void setAborted(Instruction *I = 0) { + void setAborted(Instruction *I = nullptr) { AbortedInfo.setInt(true); AbortedInfo.setPointer(I); } /// \brief Mark the pointer as escaped. Intended for use in a void return. /// \param I The instruction which escapes the pointer, if available. - void setEscaped(Instruction *I = 0) { + void setEscaped(Instruction *I = nullptr) { EscapedInfo.setInt(true); EscapedInfo.setPointer(I); } @@ -92,7 +92,7 @@ public: /// for use in a void return. /// \param I The instruction which both escapes the pointer and aborts the /// visit, if available. - void setEscapedAndAborted(Instruction *I = 0) { + void setEscapedAndAborted(Instruction *I = nullptr) { setEscaped(I); setAborted(I); } diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 4d55408..82a788d 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -33,6 +33,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Support/Allocator.h" #include <map> +#include <memory> namespace llvm { @@ -213,7 +214,7 @@ class Region : public RegionNode { // (The entry BasicBlock is part of RegionNode) BasicBlock *exit; - typedef std::vector<Region*> RegionSet; + typedef std::vector<std::unique_ptr<Region>> RegionSet; // The subregions of this region. RegionSet children; @@ -246,7 +247,7 @@ public: /// @param Parent The surrounding region or NULL if this is a top level /// region. Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RI, - DominatorTree *DT, Region *Parent = 0); + DominatorTree *DT, Region *Parent = nullptr); /// Delete the Region and all its subregions. ~Region(); @@ -311,7 +312,7 @@ public: /// @brief Check if a Region is the TopLevel region. /// /// The toplevel region represents the whole function. - bool isTopLevelRegion() const { return exit == NULL; } + bool isTopLevelRegion() const { return exit == nullptr; } /// @brief Return a new (non-canonical) region, that is obtained by joining /// this region with its predecessors. @@ -515,7 +516,7 @@ public: } // Construct the end iterator. - block_iterator_wrapper() : super(df_end<pointer>((BasicBlock *)0)) {} + block_iterator_wrapper() : super(df_end<pointer>((BasicBlock *)nullptr)) {} /*implicit*/ block_iterator_wrapper(super I) : super(I) {} diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 06489d8..0570826 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -210,7 +210,7 @@ namespace llvm { void deleted() override; void allUsesReplacedWith(Value *New) override; public: - SCEVCallbackVH(Value *V, ScalarEvolution *SE = 0); + SCEVCallbackVH(Value *V, ScalarEvolution *SE = nullptr); }; friend class SCEVCallbackVH; @@ -291,7 +291,7 @@ namespace llvm { const SCEV *ExactNotTaken; PointerIntPair<ExitNotTakenInfo*, 1> NextExit; - ExitNotTakenInfo() : ExitingBlock(0), ExactNotTaken(0) {} + ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} /// isCompleteList - Return true if all loop exits are computable. bool isCompleteList() const { @@ -321,7 +321,7 @@ namespace llvm { const SCEV *Max; public: - BackedgeTakenInfo() : Max(0) {} + BackedgeTakenInfo() : Max(nullptr) {} /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo( @@ -894,10 +894,19 @@ namespace llvm { /// indirect operand. bool hasOperand(const SCEV *S, const SCEV *Op) const; + /// Return the size of an element read or written by Inst. + const SCEV *getElementSize(Instruction *Inst); + + /// Compute the array dimensions Sizes from the set of Terms extracted from + /// the memory access function of this SCEVAddRecExpr. + void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const; + bool runOnFunction(Function &F) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &OS, const Module* = 0) const override; + void print(raw_ostream &OS, const Module* = nullptr) const override; void verifyAnalysis() const override; private: diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 9162735..b9bef97 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -92,7 +92,7 @@ namespace llvm { public: /// SCEVExpander - Construct a SCEVExpander in "canonical" mode. explicit SCEVExpander(ScalarEvolution &se, const char *name) - : SE(se), IVName(name), IVIncInsertLoop(0), IVIncInsertPos(0), + : SE(se), IVName(name), IVIncInsertLoop(nullptr), IVIncInsertPos(nullptr), CanonicalMode(true), LSRMode(false), Builder(se.getContext(), TargetFolder(se.DL)) { #ifndef NDEBUG @@ -131,7 +131,7 @@ namespace llvm { /// representative. Return the number of phis eliminated. unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl<WeakVH> &DeadInsts, - const TargetTransformInfo *TTI = NULL); + const TargetTransformInfo *TTI = nullptr); /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the @@ -219,7 +219,7 @@ namespace llvm { /// expression into the program. The inserted code is inserted into the /// SCEVExpander's current insertion point. If a type is specified, the /// result will be expanded to have that type, with a cast if necessary. - Value *expandCodeFor(const SCEV *SH, Type *Ty = 0); + Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr); /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index ed8c133..01b034f 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H #define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/ErrorHandling.h" @@ -151,8 +152,12 @@ namespace llvm { } typedef const SCEV *const *op_iterator; + typedef iterator_range<op_iterator> op_range; op_iterator op_begin() const { return Operands; } op_iterator op_end() const { return Operands + NumOperands; } + op_range operands() const { + return make_range(op_begin(), op_end()); + } Type *getType() const { return getOperand(0)->getType(); } @@ -352,12 +357,83 @@ namespace llvm { return S->getSCEVType() == scAddRecExpr; } - /// Splits the SCEV into two vectors of SCEVs representing the subscripts - /// and sizes of an array access. Returns the remainder of the - /// delinearization that is the offset start of the array. - const SCEV *delinearize(ScalarEvolution &SE, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes) const; + /// Collect parametric terms occurring in step expressions. + void collectParametricTerms(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Terms) const; + + /// Return in Subscripts the access functions for each dimension in Sizes. + void computeAccessFunctions(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const; + + /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the + /// subscripts and sizes of an array access. + /// + /// The delinearization is a 3 step process: the first two steps compute the + /// sizes of each subscript and the third step computes the access functions + /// for the delinearized array: + /// + /// 1. Find the terms in the step functions + /// 2. Compute the array size + /// 3. Compute the access function: divide the SCEV by the array size + /// starting with the innermost dimensions found in step 2. The Quotient + /// is the SCEV to be divided in the next step of the recursion. The + /// Remainder is the subscript of the innermost dimension. Loop over all + /// array dimensions computed in step 2. + /// + /// To compute a uniform array size for several memory accesses to the same + /// object, one can collect in step 1 all the step terms for all the memory + /// accesses, and compute in step 2 a unique array shape. This guarantees + /// that the array shape will be the same across all memory accesses. + /// + /// FIXME: We could derive the result of steps 1 and 2 from a description of + /// the array shape given in metadata. + /// + /// Example: + /// + /// A[][n][m] + /// + /// for i + /// for j + /// for k + /// A[j+k][2i][5i] = + /// + /// The initial SCEV: + /// + /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] + /// + /// 1. Find the different terms in the step functions: + /// -> [2*m, 5, n*m, n*m] + /// + /// 2. Compute the array size: sort and unique them + /// -> [n*m, 2*m, 5] + /// find the GCD of all the terms = 1 + /// divide by the GCD and erase constant terms + /// -> [n*m, 2*m] + /// GCD = m + /// divide by GCD -> [n, 2] + /// remove constant terms + /// -> [n] + /// size of the array is A[unknown][n][m] + /// + /// 3. Compute the access function + /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m + /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k + /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k + /// The remainder is the subscript of the innermost array dimension: [5i]. + /// + /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n + /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k + /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k + /// The Remainder is the subscript of the next array dimension: [2i]. + /// + /// The subscript of the outermost dimension is the Quotient: [j+k]. + /// + /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. + void delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const; }; //===--------------------------------------------------------------------===// diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index 76c8ccf..65ff2f6 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -82,7 +82,7 @@ public: /// constant value, return it. Otherwise return null. The returned value /// must be in the same LLVM type as Val. virtual Constant *GetConstant(LatticeVal LV, Value *Val, SparseSolver &SS) { - return 0; + return nullptr; } /// ComputeArgument - Given a formal argument value, compute and return a diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 2ac6ffa..79fe1dc 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -105,7 +105,7 @@ public: /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy = 0) const; + Type *OpTy = nullptr) const; /// \brief Estimate the cost of a GEP operation when lowered. /// @@ -356,7 +356,7 @@ public: /// The index and subtype parameters are used by the subvector insertion and /// extraction shuffle kinds. virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, - Type *SubTp = 0) const; + Type *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. @@ -369,7 +369,7 @@ public: /// \returns The expected cost of compare and select instructions. virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = 0) const; + Type *CondTy = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 0392f98..ce78967 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -27,24 +27,22 @@ namespace llvm { class MDNode; class TargetLibraryInfo; - /// ComputeMaskedBits - Determine which of the bits specified in Mask are - /// known to be either zero or one and return them in the KnownZero/KnownOne - /// bit sets. This code only analyzes bits in Mask, in order to short-circuit - /// processing. + /// Determine which bits of V are known to be either zero or one and return + /// them in the KnownZero/KnownOne bit sets. /// /// This function is defined on values with integer type, values with pointer /// type (but only if TD is non-null), and vectors of integers. In the case - /// where V is a vector, the mask, known zero, and known one values are the + /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD = 0, unsigned Depth = 0); - void computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero); + void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD = nullptr, unsigned Depth = 0); + void computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero); /// ComputeSignBit - Determine whether the sign bit is known to be zero or - /// one. Convenience wrapper around ComputeMaskedBits. + /// one. Convenience wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD = 0, unsigned Depth = 0); + const DataLayout *TD = nullptr, unsigned Depth = 0); /// isKnownToBeAPowerOfTwo - Return true if the given value is known to have /// exactly one bit set when defined. For vectors return true if every @@ -57,7 +55,8 @@ namespace llvm { /// when defined. For vectors return true if every element is known to be /// non-zero when defined. Supports values with integer or pointer type and /// vectors of integers. - bool isKnownNonZero(Value *V, const DataLayout *TD = 0, unsigned Depth = 0); + bool isKnownNonZero(Value *V, const DataLayout *TD = nullptr, + unsigned Depth = 0); /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be @@ -69,7 +68,7 @@ namespace llvm { /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD = 0, unsigned Depth = 0); + const DataLayout *TD = nullptr, unsigned Depth = 0); /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -80,7 +79,7 @@ namespace llvm { /// /// 'Op' must have a scalar integer type. /// - unsigned ComputeNumSignBits(Value *Op, const DataLayout *TD = 0, + unsigned ComputeNumSignBits(Value *Op, const DataLayout *TD = nullptr, unsigned Depth = 0); /// ComputeMultiple - This function computes the integer multiple of Base that @@ -112,7 +111,7 @@ namespace llvm { /// insertvalues when a part of a nested struct is extracted. Value *FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, - Instruction *InsertBefore = 0); + Instruction *InsertBefore = nullptr); /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if /// it can be expressed as a base pointer plus a constant offset. Return the @@ -143,10 +142,10 @@ namespace llvm { /// being addressed. Note that the returned value has pointer type if the /// specified value does. If the MaxLookup value is non-zero, it limits the /// number of instructions to be stripped off. - Value *GetUnderlyingObject(Value *V, const DataLayout *TD = 0, + Value *GetUnderlyingObject(Value *V, const DataLayout *TD = nullptr, unsigned MaxLookup = 6); static inline const Value * - GetUnderlyingObject(const Value *V, const DataLayout *TD = 0, + GetUnderlyingObject(const Value *V, const DataLayout *TD = nullptr, unsigned MaxLookup = 6) { return GetUnderlyingObject(const_cast<Value *>(V), TD, MaxLookup); } @@ -156,7 +155,7 @@ namespace llvm { /// multiple objects. void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects, - const DataLayout *TD = 0, + const DataLayout *TD = nullptr, unsigned MaxLookup = 6); /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer @@ -182,12 +181,12 @@ namespace llvm { /// However, this method can return true for instructions that read memory; /// for such instructions, moving them may change the resulting value. bool isSafeToSpeculativelyExecute(const Value *V, - const DataLayout *TD = 0); + const DataLayout *TD = nullptr); /// isKnownNonNull - Return true if this pointer couldn't possibly be null by /// its definition. This returns true for allocas, non-extern-weak globals /// and byval arguments. - bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = 0); + bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr); } // end namespace llvm |