diff options
author | Stephen Hines <srhines@google.com> | 2015-03-23 12:10:34 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2015-03-23 12:10:34 -0700 |
commit | ebe69fe11e48d322045d5949c83283927a0d790b (patch) | |
tree | c92f1907a6b8006628a4b01615f38264d29834ea /lib/Analysis | |
parent | b7d2e72b02a4cb8034f32f8247a2558d2434e121 (diff) | |
download | external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.zip external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.gz external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.bz2 |
Update aosp/master LLVM for rebase to r230699.
Change-Id: I2b5be30509658cb8266be782de0ab24f9099f9b9
Diffstat (limited to 'lib/Analysis')
51 files changed, 4764 insertions, 2018 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 5171a45..4e95aa0 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -37,7 +38,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. @@ -465,7 +465,8 @@ AliasAnalysis::~AliasAnalysis() {} void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); + auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &P->getAnalysis<AliasAnalysis>(); } @@ -483,21 +484,22 @@ uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { } /// canBasicBlockModify - Return true if it is possible for execution of the -/// specified basic block to modify the value pointed to by Ptr. +/// specified basic block to modify the location Loc. /// bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, const Location &Loc) { - return canInstructionRangeModify(BB.front(), BB.back(), Loc); + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod); } -/// canInstructionRangeModify - Return true if it is possible for the execution -/// of the specified instructions to modify the value pointed to by Ptr. The -/// instructions to consider are all of the instructions in the range of [I1,I2] -/// INCLUSIVE. I1 and I2 must be in the same basic block. -/// -bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, +/// canInstructionRangeModRef - Return true if it is possible for the +/// execution of the specified instructions to mod\ref (according to the +/// mode) the location Loc. The instructions to consider are all +/// of the instructions in the range of [I1,I2] INCLUSIVE. +/// I1 and I2 must be in the same basic block. +bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, - const Location &Loc) { + const Location &Loc, + const ModRefResult Mode) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); BasicBlock::const_iterator I = &I1; @@ -505,7 +507,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, Loc) & Mod) + if (getModRefInfo(I, Loc) & Mode) return true; return false; } diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index f64bf0e..1bfb06d 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -53,8 +53,9 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLoopInfoPass(Registry); + initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); + initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); @@ -65,7 +66,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionOnlyPrinterPass(Registry); initializeScalarEvolutionPass(Registry); initializeScalarEvolutionAliasAnalysisPass(Registry); - initializeTargetTransformInfoAnalysisGroup(Registry); + initializeTargetTransformInfoWrapperPassPass(Registry); initializeTypeBasedAliasAnalysisPass(Registry); initializeScopedNoAliasAAPass(Registry); } diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 8770fa7..e17b870 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -7,7 +7,7 @@ analysis_SRC_FILES := \ AliasDebugger.cpp \ AliasSetTracker.cpp \ Analysis.cpp \ - AssumptionTracker.cpp \ + AssumptionCache.cpp \ BasicAliasAnalysis.cpp \ BlockFrequencyInfo.cpp \ BlockFrequencyInfoImpl.cpp \ @@ -24,7 +24,6 @@ analysis_SRC_FILES := \ DependenceAnalysis.cpp \ DomPrinter.cpp \ DominanceFrontier.cpp \ - FunctionTargetTransformInfo.cpp \ IVUsers.cpp \ InstCount.cpp \ InstructionSimplify.cpp \ @@ -37,9 +36,11 @@ analysis_SRC_FILES := \ LibCallSemantics.cpp \ Lint.cpp \ Loads.cpp \ + LoopAccessAnalysis.cpp \ LoopInfo.cpp \ LoopPass.cpp \ MemDepPrinter.cpp \ + MemDerefPrinter.cpp \ MemoryBuiltins.cpp \ MemoryDependenceAnalysis.cpp \ ModuleDebugInfoPrinter.cpp \ @@ -56,6 +57,7 @@ analysis_SRC_FILES := \ ScalarEvolutionNormalization.cpp \ ScopedNoAliasAA.cpp \ SparsePropagation.cpp \ + TargetLibraryInfo.cpp \ TargetTransformInfo.cpp \ Trace.cpp \ TypeBasedAliasAnalysis.cpp \ diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp new file mode 100644 index 0000000..f468a43 --- /dev/null +++ b/lib/Analysis/AssumptionCache.cpp @@ -0,0 +1,140 @@ +//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that keeps track of @llvm.assume intrinsics in +// the functions of a module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +void AssumptionCache::scanFunction() { + assert(!Scanned && "Tried to scan the function twice!"); + assert(AssumeHandles.empty() && "Already have assumes when scanning!"); + + // Go through all instructions in all blocks, add all calls to @llvm.assume + // to this cache. + for (BasicBlock &B : F) + for (Instruction &II : B) + if (match(&II, m_Intrinsic<Intrinsic::assume>())) + AssumeHandles.push_back(&II); + + // Mark the scan as complete. + Scanned = true; +} + +void AssumptionCache::registerAssumption(CallInst *CI) { + assert(match(CI, m_Intrinsic<Intrinsic::assume>()) && + "Registered call does not call @llvm.assume"); + + // If we haven't scanned the function yet, just drop this assumption. It will + // be found when we scan later. + if (!Scanned) + return; + + AssumeHandles.push_back(CI); + +#ifndef NDEBUG + assert(CI->getParent() && + "Cannot register @llvm.assume call not in a basic block"); + assert(&F == CI->getParent()->getParent() && + "Cannot register @llvm.assume call not in this function"); + + // We expect the number of assumptions to be small, so in an asserts build + // check that we don't accumulate duplicates and that all assumptions point + // to the same function. + SmallPtrSet<Value *, 16> AssumptionSet; + for (auto &VH : AssumeHandles) { + if (!VH) + continue; + + assert(&F == cast<Instruction>(VH)->getParent()->getParent() && + "Cached assumption not inside this function!"); + assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) && + "Cached something other than a call to @llvm.assume!"); + assert(AssumptionSet.insert(VH).second && + "Cache contains multiple copies of a call!"); + } +#endif +} + +char AssumptionAnalysis::PassID; + +PreservedAnalyses AssumptionPrinterPass::run(Function &F, + AnalysisManager<Function> *AM) { + AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F); + + OS << "Cached assumptions for function: " << F.getName() << "\n"; + for (auto &VH : AC.assumptions()) + if (VH) + OS << " " << *cast<CallInst>(VH)->getArgOperand(0) << "\n"; + + return PreservedAnalyses::all(); +} + +void AssumptionCacheTracker::FunctionCallbackVH::deleted() { + auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr())); + if (I != ACT->AssumptionCaches.end()) + ACT->AssumptionCaches.erase(I); + // 'this' now dangles! +} + +AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { + // We probe the function map twice to try and avoid creating a value handle + // around the function in common cases. This makes insertion a bit slower, + // but if we have to insert we're going to scan the whole function so that + // shouldn't matter. + auto I = AssumptionCaches.find_as(&F); + if (I != AssumptionCaches.end()) + return *I->second; + + // Ok, build a new cache by scanning the function, insert it and the value + // handle into our map, and return the newly populated cache. + auto IP = AssumptionCaches.insert(std::make_pair( + FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F))); + assert(IP.second && "Scanning function already in the map?"); + return *IP.first->second; +} + +void AssumptionCacheTracker::verifyAnalysis() const { +#ifndef NDEBUG + SmallPtrSet<const CallInst *, 4> AssumptionSet; + for (const auto &I : AssumptionCaches) { + for (auto &VH : I.second->assumptions()) + if (VH) + AssumptionSet.insert(cast<CallInst>(VH)); + + for (const BasicBlock &B : cast<Function>(*I.first)) + for (const Instruction &II : B) + if (match(&II, m_Intrinsic<Intrinsic::assume>())) + assert(AssumptionSet.count(cast<CallInst>(&II)) && + "Assumption in scanned function not in cache"); + } +#endif +} + +AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) { + initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry()); +} + +AssumptionCacheTracker::~AssumptionCacheTracker() {} + +INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker", + "Assumption Cache Tracker", false, true) +char AssumptionCacheTracker::ID = 0; diff --git a/lib/Analysis/AssumptionTracker.cpp b/lib/Analysis/AssumptionTracker.cpp deleted file mode 100644 index 775ce1d..0000000 --- a/lib/Analysis/AssumptionTracker.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//===- AssumptionTracker.cpp - Track @llvm.assume -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that keeps track of @llvm.assume intrinsics in -// the functions of a module. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/AssumptionTracker.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -using namespace llvm; -using namespace llvm::PatternMatch; - -void AssumptionTracker::FunctionCallbackVH::deleted() { - AT->forgetCachedAssumptions(cast<Function>(getValPtr())); - // 'this' now dangles! -} - -void AssumptionTracker::forgetCachedAssumptions(Function *F) { - auto I = CachedAssumeCalls.find_as(F); - if (I != CachedAssumeCalls.end()) - CachedAssumeCalls.erase(I); -} - -void AssumptionTracker::CallCallbackVH::deleted() { - assert(F && "delete callback called on dummy handle"); - FunctionCallsMap::iterator I = AT->CachedAssumeCalls.find_as(F); - assert(I != AT->CachedAssumeCalls.end() && - "Function cleared from the map without removing the values?"); - - I->second->erase(*this); - // 'this' now dangles! -} - -AssumptionTracker::FunctionCallsMap::iterator -AssumptionTracker::scanFunction(Function *F) { - auto IP = CachedAssumeCalls.insert(std::make_pair( - FunctionCallbackVH(F, this), llvm::make_unique<CallHandleSet>())); - assert(IP.second && "Scanning function already in the map?"); - - FunctionCallsMap::iterator I = IP.first; - - // Go through all instructions in all blocks, add all calls to @llvm.assume - // to our cache. - for (BasicBlock &B : *F) - for (Instruction &II : B) - if (match(&II, m_Intrinsic<Intrinsic::assume>())) - I->second->insert(CallCallbackVH(&II, this)); - - return I; -} - -void AssumptionTracker::verifyAnalysis() const { -#ifndef NDEBUG - for (const auto &I : CachedAssumeCalls) { - for (const BasicBlock &B : cast<Function>(*I.first)) - for (const Instruction &II : B) { - if (match(&II, m_Intrinsic<Intrinsic::assume>())) { - assert(I.second->find_as(&II) != I.second->end() && - "Assumption in scanned function not in cache"); - } - } - } -#endif -} - -void AssumptionTracker::registerAssumption(CallInst *CI) { - assert(match(CI, m_Intrinsic<Intrinsic::assume>()) && - "Registered call does not call @llvm.assume"); - assert(CI->getParent() && - "Cannot register @llvm.assume call not in a basic block"); - - Function *F = CI->getParent()->getParent(); - assert(F && "Cannot register @llvm.assume call not in a function"); - - FunctionCallsMap::iterator I = CachedAssumeCalls.find_as(F); - if (I == CachedAssumeCalls.end()) { - // If this function has not already been scanned, then don't do anything - // here. This intrinsic will be found, if it still exists, if the list of - // assumptions in this function is requested at some later point. This - // maintains the following invariant: if a function is present in the - // cache, then its list of assumption intrinsic calls is complete. - return; - } - - I->second->insert(CallCallbackVH(CI, this)); -} - -AssumptionTracker::AssumptionTracker() : ImmutablePass(ID) { - initializeAssumptionTrackerPass(*PassRegistry::getPassRegistry()); -} - -AssumptionTracker::~AssumptionTracker() {} - -INITIALIZE_PASS(AssumptionTracker, "assumption-tracker", "Assumption Tracker", - false, true) -char AssumptionTracker::ID = 0; - diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 9aba0d3..46ca6ee 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,12 +17,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -38,7 +39,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -196,8 +196,7 @@ namespace { static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, ExtensionKind &Extension, const DataLayout &DL, unsigned Depth, - AssumptionTracker *AT, - DominatorTree *DT) { + AssumptionCache *AC, DominatorTree *DT) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -222,24 +221,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, - AT, BOp, DT)) + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC, + BOp, DT)) break; // FALL THROUGH. case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1, AT, DT); + DL, Depth + 1, AC, DT); Offset += RHSC->getValue(); return V; case Instruction::Mul: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1, AT, DT); + DL, Depth + 1, AC, DT); Offset *= RHSC->getValue(); Scale *= RHSC->getValue(); return V; case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1, AT, DT); + DL, Depth + 1, AC, DT); Offset <<= RHSC->getValue().getLimitedValue(); Scale <<= RHSC->getValue().getLimitedValue(); return V; @@ -259,8 +258,8 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Offset = Offset.trunc(SmallWidth); Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; - Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, - DL, Depth+1, AT, DT); + Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL, + Depth + 1, AC, DT); Scale = Scale.zext(OldWidth); // We have to sign-extend even if Extension == EK_ZeroExt as we can't @@ -294,7 +293,7 @@ static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached, const DataLayout *DL, - AssumptionTracker *AT, DominatorTree *DT) { + AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; MaxLookupReached = false; @@ -325,7 +324,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Get a DominatorTree and AssumptionTracker and use them here + // TODO: Get a DominatorTree and AssumptionCache and use them here // (these are both now available in this function, but this should be // updated when GetUnderlyingObject is updated). TLI should be // provided also. @@ -387,7 +386,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *DL, 0, AT, DT); + *DL, 0, AC, DT); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -468,8 +467,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); - AU.addRequired<AssumptionTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } AliasResult alias(const Location &LocA, const Location &LocB) override { @@ -591,8 +590,8 @@ char BasicAliasAnalysis::ID = 0; INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) @@ -719,7 +718,8 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (isMemsetPattern16(F, TLI)) Min = OnlyAccessesArgumentPointees; @@ -731,7 +731,8 @@ AliasAnalysis::Location BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, ModRefResult &Mask) { Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask); - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); if (II != nullptr) switch (II->getIntrinsicID()) { @@ -889,6 +890,99 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } +/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP +/// operators, both having the exact same pointer operand. +static AliasAnalysis::AliasResult +aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, + const GEPOperator *GEP2, uint64_t V2Size, + const DataLayout &DL) { + + assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() && + "Expected GEPs with the same pointer operand"); + + // Try to determine whether GEP1 and GEP2 index through arrays, into structs, + // such that the struct field accesses provably cannot alias. + // We also need at least two indices (the pointer, and the struct field). + if (GEP1->getNumIndices() != GEP2->getNumIndices() || + GEP1->getNumIndices() < 2) + return AliasAnalysis::MayAlias; + + // If we don't know the size of the accesses through both GEPs, we can't + // determine whether the struct fields accessed can't alias. + if (V1Size == AliasAnalysis::UnknownSize || + V2Size == AliasAnalysis::UnknownSize) + return AliasAnalysis::MayAlias; + + ConstantInt *C1 = + dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1)); + ConstantInt *C2 = + dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1)); + + // If the last (struct) indices aren't constants, we can't say anything. + // If they're identical, the other indices might be also be dynamically + // equal, so the GEPs can alias. + if (!C1 || !C2 || C1 == C2) + return AliasAnalysis::MayAlias; + + // Find the last-indexed type of the GEP, i.e., the type you'd get if + // you stripped the last index. + // On the way, look at each indexed type. If there's something other + // than an array, different indices can lead to different final types. + SmallVector<Value *, 8> IntermediateIndices; + + // Insert the first index; we don't need to check the type indexed + // through it as it only drops the pointer indirection. + assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine"); + IntermediateIndices.push_back(GEP1->getOperand(1)); + + // Insert all the remaining indices but the last one. + // Also, check that they all index through arrays. + for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) { + if (!isa<ArrayType>(GetElementPtrInst::getIndexedType( + GEP1->getPointerOperandType(), IntermediateIndices))) + return AliasAnalysis::MayAlias; + IntermediateIndices.push_back(GEP1->getOperand(i + 1)); + } + + StructType *LastIndexedStruct = + dyn_cast<StructType>(GetElementPtrInst::getIndexedType( + GEP1->getPointerOperandType(), IntermediateIndices)); + + if (!LastIndexedStruct) + return AliasAnalysis::MayAlias; + + // We know that: + // - both GEPs begin indexing from the exact same pointer; + // - the last indices in both GEPs are constants, indexing into a struct; + // - said indices are different, hence, the pointed-to fields are different; + // - both GEPs only index through arrays prior to that. + // + // This lets us determine that the struct that GEP1 indexes into and the + // struct that GEP2 indexes into must either precisely overlap or be + // completely disjoint. Because they cannot partially overlap, indexing into + // different non-overlapping fields of the struct will never alias. + + // Therefore, the only remaining thing needed to show that both GEPs can't + // alias is that the fields are not overlapping. + const StructLayout *SL = DL.getStructLayout(LastIndexedStruct); + const uint64_t StructSize = SL->getSizeInBytes(); + const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue()); + const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue()); + + auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size, + uint64_t V2Off, uint64_t V2Size) { + return V1Off < V2Off && V1Off + V1Size <= V2Off && + ((V2Off + V2Size <= StructSize) || + (V2Off + V2Size - StructSize <= V1Off)); + }; + + if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) || + EltsDontOverlap(V2Off, V2Size, V1Off, V1Size)) + return AliasAnalysis::NoAlias; + + return AliasAnalysis::MayAlias; +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), @@ -905,7 +999,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, bool GEP1MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; - AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); + // We have to get two AssumptionCaches here because GEP1 and V2 may be from + // different functions. + // FIXME: This really doesn't make any sense. We get a dominator tree below + // that can only refer to a single function. But this function (aliasGEP) is + // a method on an immutable pass that can be called when there *isn't* + // a single function. The old pass management layer makes this "work", but + // this isn't really a clean solution. + AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>(); + AssumptionCache *AC1 = nullptr, *AC2 = nullptr; + if (auto *GEP1I = dyn_cast<Instruction>(GEP1)) + AC1 = &ACT.getAssumptionCache( + const_cast<Function &>(*GEP1I->getParent()->getParent())); + if (auto *I2 = dyn_cast<Instruction>(V2)) + AC2 = &ACT.getAssumptionCache( + const_cast<Function &>(*I2->getParent()->getParent())); + DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; @@ -932,11 +1041,11 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = - DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AT, DT); + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, + GEP2MaxLookupReached, DL, AC2, DT); const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AT, DT); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -964,15 +1073,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AT, DT); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL, AC1, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = - DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AT, DT); + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, + GEP2MaxLookupReached, DL, AC2, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -981,6 +1090,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } + + // If we know the two GEPs are based off of the exact same pointer (and not + // just the same underlying object), see if that tells us anything about + // the resulting pointers. + if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) { + AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL); + // If we couldn't find anything interesting, don't abandon just yet. + if (R != MayAlias) + return R; + } + // If the max search depth is reached the result is undefined if (GEP2MaxLookupReached || GEP1MaxLookupReached) return MayAlias; @@ -1010,8 +1130,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return R; const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AT, DT); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -1080,10 +1200,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *V = GEP1VariableIndices[i].V; bool SignKnownZero, SignKnownOne; - ComputeSignBit( - const_cast<Value *>(V), - SignKnownZero, SignKnownOne, - DL, 0, AT, nullptr, DT); + ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, + 0, AC1, nullptr, DT); // Zero-extension widens the variable, and so forces the sign // bit to zero. @@ -1422,7 +1540,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>(); + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 8ed8e3e..37f2fae 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -108,7 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) @@ -123,13 +123,13 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {} void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BranchProbabilityInfo>(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - LoopInfo &LI = getAnalysis<LoopInfo>(); + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); if (!BFI) BFI.reset(new ImplType); BFI->doFunction(&F, &BPI, &LI); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 06b8acd..278073c 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Support/raw_ostream.h" +#include <numeric> using namespace llvm; using namespace llvm::bfi_detail; @@ -122,8 +123,12 @@ static void combineWeight(Weight &W, const Weight &OtherW) { } assert(W.Type == OtherW.Type); assert(W.TargetNode == OtherW.TargetNode); - assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow"); - W.Amount += OtherW.Amount; + assert(OtherW.Amount && "Expected non-zero weight"); + if (W.Amount > W.Amount + OtherW.Amount) + // Saturate on overflow. + W.Amount = UINT64_MAX; + else + W.Amount += OtherW.Amount; } static void combineWeightsBySorting(WeightList &Weights) { // Sort so edges to the same node are adjacent. @@ -206,11 +211,19 @@ void Distribution::normalize() { Shift = 33 - countLeadingZeros(Total); // Early exit if nothing needs to be scaled. - if (!Shift) + if (!Shift) { + // If we didn't overflow then combineWeights() shouldn't have changed the + // sum of the weights, but let's double-check. + assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0), + [](uint64_t Sum, const Weight &W) { + return Sum + W.Amount; + }) && + "Expected total to be correct"); return; + } // Recompute the total through accumulation (rather than shifting it) so that - // it's accurate after shifting. + // it's accurate after shifting and any changes combineWeights() made above. Total = 0; // Sum the weights to each node and shift right if necessary. diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index bbd8750..8cd6ea4 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -28,7 +28,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) @@ -196,7 +196,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { SmallVector<uint32_t, 2> Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { - ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i)); + ConstantInt *Weight = + mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i)); if (!Weight) return false; Weights.push_back( @@ -483,7 +484,7 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { } void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } @@ -491,7 +492,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); assert(PostDominatedByUnreachable.empty()); assert(PostDominatedByColdCall.empty()); diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 25e7bc0..8ecd70b 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -27,7 +27,7 @@ using namespace llvm; void llvm::FindFunctionBackedges(const Function &F, SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) { const BasicBlock *BB = &F.getEntryBlock(); - if (succ_begin(BB) == succ_end(BB)) + if (succ_empty(BB)) return; SmallPtrSet<const BasicBlock*, 8> Visited; diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp index 5f1b3d3..82fbfe0 100644 --- a/lib/Analysis/CFLAliasAnalysis.cpp +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -29,20 +29,21 @@ //===----------------------------------------------------------------------===// #include "StratifiedSets.h" -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> #include <cassert> @@ -51,6 +52,8 @@ using namespace llvm; +#define DEBUG_TYPE "cfl-aa" + // Try to go from a Value* to a Function*. Never returns nullptr. static Optional<Function *> parentFunctionOfValue(Value *); @@ -227,10 +230,14 @@ public: // Comparisons between global variables and other constants should be // handled by BasicAA. if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { - return MayAlias; + return AliasAnalysis::alias(LocA, LocB); } - return query(LocA, LocB); + AliasResult QueryResult = query(LocA, LocB); + if (QueryResult == MayAlias) + return AliasAnalysis::alias(LocA, LocB); + + return QueryResult; } void initializePass() override { InitializeAliasAnalysis(this); } @@ -295,8 +302,11 @@ public: } void visitSelectInst(SelectInst &Inst) { - auto *Condition = Inst.getCondition(); - Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone)); + // Condition is not processed here (The actual statement producing + // the condition result is processed elsewhere). For select, the + // condition is evaluated, but not loaded, stored, or assigned + // simply as a result of being the condition of a select. + auto *TrueVal = Inst.getTrueValue(); Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone)); auto *FalseVal = Inst.getFalseValue(); @@ -768,13 +778,16 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) { return AttrGlobalIndex; if (auto *Arg = dyn_cast<Argument>(Val)) - if (!Arg->hasNoAliasAttr()) + // Only pointer arguments should have the argument attribute, + // because things can't escape through scalars without us seeing a + // cast, and thus, interaction with them doesn't matter. + if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy()) return argNumberToAttrIndex(Arg->getArgNo()); return NoneType(); } static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) { - if (ArgNum > AttrMaxNumArgs) + if (ArgNum >= AttrMaxNumArgs) return AttrAllIndex; return ArgNum + AttrFirstArgIndex; } @@ -964,8 +977,10 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, auto MaybeFnA = parentFunctionOfValue(ValA); auto MaybeFnB = parentFunctionOfValue(ValB); if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { - llvm_unreachable("Don't know how to extract the parent function " - "from values A or B"); + // The only times this is known to happen are when globals + InlineAsm + // are involved + DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n"); + return AliasAnalysis::MayAlias; } if (MaybeFnA.hasValue()) { @@ -991,23 +1006,31 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, auto SetA = *MaybeA; auto SetB = *MaybeB; - - if (SetA.Index == SetB.Index) - return AliasAnalysis::PartialAlias; - auto AttrsA = Sets.getLink(SetA.Index).Attrs; auto AttrsB = Sets.getLink(SetB.Index).Attrs; + // Stratified set attributes are used as markets to signify whether a member - // of a StratifiedSet (or a member of a set above the current set) has + // of a StratifiedSet (or a member of a set above the current set) has // interacted with either arguments or globals. "Interacted with" meaning - // its value may be different depending on the value of an argument or + // its value may be different depending on the value of an argument or // global. The thought behind this is that, because arguments and globals // may alias each other, if AttrsA and AttrsB have touched args/globals, - // we must conservatively say that they alias. However, if at least one of - // the sets has no values that could legally be altered by changing the value + // we must conservatively say that they alias. However, if at least one of + // the sets has no values that could legally be altered by changing the value // of an argument or global, then we don't have to be as conservative. if (AttrsA.any() && AttrsB.any()) return AliasAnalysis::MayAlias; + // We currently unify things even if the accesses to them may not be in + // bounds, so we can't return partial alias here because we don't + // know whether the pointer is really within the object or not. + // IE Given an out of bounds GEP and an alloca'd pointer, we may + // unify the two. We can't return partial alias for this case. + // Since we do not currently track enough information to + // differentiate + + if (SetA.Index == SetB.Index) + return AliasAnalysis::MayAlias; + return AliasAnalysis::NoAlias; } diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp index 5d1d8a9..4a03002 100644 --- a/lib/Analysis/CGSCCPassManager.cpp +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -13,105 +13,10 @@ using namespace llvm; -static cl::opt<bool> -DebugPM("debug-cgscc-pass-manager", cl::Hidden, - cl::desc("Print CGSCC pass management debugging information")); - -PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C, - CGSCCAnalysisManager *AM) { - PreservedAnalyses PA = PreservedAnalyses::all(); - - if (DebugPM) - dbgs() << "Starting CGSCC pass manager run.\n"; - - for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { - if (DebugPM) - dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n"; - - PreservedAnalyses PassPA = Passes[Idx]->run(C, AM); - if (AM) - AM->invalidate(C, PassPA); - PA.intersect(std::move(PassPA)); - } - - if (DebugPM) - dbgs() << "Finished CGSCC pass manager run.\n"; - - return PA; -} - -bool CGSCCAnalysisManager::empty() const { - assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() && - "The storage and index of analysis results disagree on how many there " - "are!"); - return CGSCCAnalysisResults.empty(); -} - -void CGSCCAnalysisManager::clear() { - CGSCCAnalysisResults.clear(); - CGSCCAnalysisResultLists.clear(); -} - -CGSCCAnalysisManager::ResultConceptT & -CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) { - CGSCCAnalysisResultMapT::iterator RI; - bool Inserted; - std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair( - std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator())); - - // If we don't have a cached result for this function, look up the pass and - // run it to produce a result, which we then add to the cache. - if (Inserted) { - CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C]; - ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this)); - RI->second = std::prev(ResultList.end()); - } - - return *RI->second->second; -} - -CGSCCAnalysisManager::ResultConceptT * -CGSCCAnalysisManager::getCachedResultImpl(void *PassID, - LazyCallGraph::SCC *C) const { - CGSCCAnalysisResultMapT::const_iterator RI = - CGSCCAnalysisResults.find(std::make_pair(PassID, C)); - return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second; -} - -void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) { - CGSCCAnalysisResultMapT::iterator RI = - CGSCCAnalysisResults.find(std::make_pair(PassID, C)); - if (RI == CGSCCAnalysisResults.end()) - return; - - CGSCCAnalysisResultLists[C].erase(RI->second); -} - -void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C, - const PreservedAnalyses &PA) { - // Clear all the invalidated results associated specifically with this - // function. - SmallVector<void *, 8> InvalidatedPassIDs; - CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C]; - for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(), - E = ResultsList.end(); - I != E;) - if (I->second->invalidate(C, PA)) { - InvalidatedPassIDs.push_back(I->first); - I = ResultsList.erase(I); - } else { - ++I; - } - while (!InvalidatedPassIDs.empty()) - CGSCCAnalysisResults.erase( - std::make_pair(InvalidatedPassIDs.pop_back_val(), C)); - CGSCCAnalysisResultLists.erase(C); -} - char CGSCCAnalysisManagerModuleProxy::PassID; CGSCCAnalysisManagerModuleProxy::Result -CGSCCAnalysisManagerModuleProxy::run(Module *M) { +CGSCCAnalysisManagerModuleProxy::run(Module &M) { assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!"); return Result(*CGAM); } @@ -123,7 +28,7 @@ CGSCCAnalysisManagerModuleProxy::Result::~Result() { } bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( - Module *M, const PreservedAnalyses &PA) { + Module &M, const PreservedAnalyses &PA) { // If this proxy isn't marked as preserved, then we can't even invalidate // individual CGSCC analyses, there may be an invalid set of SCC objects in // the cache making it impossible to incrementally preserve them. @@ -140,7 +45,7 @@ char ModuleAnalysisManagerCGSCCProxy::PassID; char FunctionAnalysisManagerCGSCCProxy::PassID; FunctionAnalysisManagerCGSCCProxy::Result -FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) { +FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C) { assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!"); return Result(*FAM); } @@ -152,7 +57,7 @@ FunctionAnalysisManagerCGSCCProxy::Result::~Result() { } bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( - LazyCallGraph::SCC *C, const PreservedAnalyses &PA) { + LazyCallGraph::SCC &C, const PreservedAnalyses &PA) { // If this proxy isn't marked as preserved, then we can't even invalidate // individual function analyses, there may be an invalid set of Function // objects in the cache making it impossible to incrementally preserve them. diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 4e9664f..d840037 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -5,7 +5,7 @@ add_llvm_library(LLVMAnalysis AliasDebugger.cpp AliasSetTracker.cpp Analysis.cpp - AssumptionTracker.cpp + AssumptionCache.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp BlockFrequencyInfoImpl.cpp @@ -22,7 +22,6 @@ add_llvm_library(LLVMAnalysis DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp - FunctionTargetTransformInfo.cpp IVUsers.cpp InstCount.cpp InstructionSimplify.cpp @@ -35,9 +34,11 @@ add_llvm_library(LLVMAnalysis LibCallSemantics.cpp Lint.cpp Loads.cpp + LoopAccessAnalysis.cpp LoopInfo.cpp LoopPass.cpp MemDepPrinter.cpp + MemDerefPrinter.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp ModuleDebugInfoPrinter.cpp @@ -53,11 +54,15 @@ add_llvm_library(LLVMAnalysis ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp SparsePropagation.cpp + TargetLibraryInfo.cpp TargetTransformInfo.cpp Trace.cpp TypeBasedAliasAnalysis.cpp ScopedNoAliasAA.cpp ValueTracking.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis ) add_dependencies(LLVMAnalysis intrinsics_gen) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a271729..5a54754 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index f29e4a2..fa5683c 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -66,11 +66,16 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet, } // Find all ephemeral values. -void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT, - SmallPtrSetImpl<const Value*> &EphValues) { +void CodeMetrics::collectEphemeralValues( + const Loop *L, AssumptionCache *AC, + SmallPtrSetImpl<const Value *> &EphValues) { SmallVector<const Value *, 16> WorkSet; - for (auto &I : AT->assumptions(L->getHeader()->getParent())) { + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + Instruction *I = cast<Instruction>(AssumeVH); + // Filter out call sites outside of the loop so we don't to a function's // worth of work for each of its loops (and, in the common case, ephemeral // values in the loop are likely due to @llvm.assume calls in the loop). @@ -83,12 +88,19 @@ void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT, completeEphemeralValues(WorkSet, EphValues); } -void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT, - SmallPtrSetImpl<const Value*> &EphValues) { +void CodeMetrics::collectEphemeralValues( + const Function *F, AssumptionCache *AC, + SmallPtrSetImpl<const Value *> &EphValues) { SmallVector<const Value *, 16> WorkSet; - for (auto &I : AT->assumptions(const_cast<Function*>(F))) + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + Instruction *I = cast<Instruction>(AssumeVH); + assert(I->getParent()->getParent() == F && + "Found assumption for the wrong function!"); WorkSet.push_back(I); + } completeEphemeralValues(WorkSet, EphValues); } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index fd8f2ae..fcafb41 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -33,7 +34,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <cerrno> #include <cmath> diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 1b74f8c..b529c1a 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -83,7 +83,8 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); + TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; return false; } diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 9334ceb..d603b7b 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -59,14 +59,14 @@ public: void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<ScalarEvolution>(); } bool Delinearization::runOnFunction(Function &F) { this->F = &F; SE = &getAnalysis<ScalarEvolution>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -141,7 +141,7 @@ char Delinearization::ID = 0; static const char delinearization_name[] = "Delinearization"; INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 092df5c..fda664b 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -114,7 +114,7 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", "Dependence Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(DependenceAnalysis, "da", @@ -132,7 +132,7 @@ bool DependenceAnalysis::runOnFunction(Function &F) { this->F = &F; AA = &getAnalysis<AliasAnalysis>(); SE = &getAnalysis<ScalarEvolution>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -145,7 +145,7 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<AliasAnalysis>(); AU.addRequiredTransitive<ScalarEvolution>(); - AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); } diff --git a/lib/Analysis/FunctionTargetTransformInfo.cpp b/lib/Analysis/FunctionTargetTransformInfo.cpp deleted file mode 100644 index a686bec..0000000 --- a/lib/Analysis/FunctionTargetTransformInfo.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass wraps a TargetTransformInfo in a FunctionPass so that it can -// forward along the current Function so that we can make target specific -// decisions based on the particular subtarget specified for each Function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/FunctionTargetTransformInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "function-tti" -static const char ftti_name[] = "Function TargetTransformInfo"; -INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) -char FunctionTargetTransformInfo::ID = 0; - -namespace llvm { -FunctionPass *createFunctionTargetTransformInfoPass() { - return new FunctionTargetTransformInfo(); -} -} - -FunctionTargetTransformInfo::FunctionTargetTransformInfo() - : FunctionPass(ID), Fn(nullptr), TTI(nullptr) { - initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry()); -} - -void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<TargetTransformInfo>(); -} - -void FunctionTargetTransformInfo::releaseMemory() {} - -bool FunctionTargetTransformInfo::runOnFunction(Function &F) { - Fn = &F; - TTI = &getAnalysis<TargetTransformInfo>(); - return false; -} diff --git a/lib/Analysis/IPA/Android.mk b/lib/Analysis/IPA/Android.mk index d56d931..2e5e571 100644 --- a/lib/Analysis/IPA/Android.mk +++ b/lib/Analysis/IPA/Android.mk @@ -4,7 +4,6 @@ analysis_ipa_SRC_FILES := \ CallGraph.cpp \ CallGraphSCCPass.cpp \ CallPrinter.cpp \ - FindUsedTypes.cpp \ GlobalsModRef.cpp \ IPA.cpp \ InlineCost.cpp diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 67b4135..6095136 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_library(LLVMipa CallGraph.cpp CallGraphSCCPass.cpp CallPrinter.cpp - FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp InlineCost.cpp diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 665aa7f..ded1de7 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -21,8 +21,8 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LegacyPassManagers.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp deleted file mode 100644 index b37344b..0000000 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ /dev/null @@ -1,100 +0,0 @@ -//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is used to seek out all of the types in use by the program. Note -// that this analysis explicitly does not include types only used by the symbol -// table. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -char FindUsedTypes::ID = 0; -INITIALIZE_PASS(FindUsedTypes, "print-used-types", - "Find Used Types", false, true) - -// IncorporateType - Incorporate one type and all of its subtypes into the -// collection of used types. -// -void FindUsedTypes::IncorporateType(Type *Ty) { - // If ty doesn't already exist in the used types map, add it now, otherwise - // return. - if (!UsedTypes.insert(Ty)) return; // Already contain Ty. - - // Make sure to add any types this type references now. - // - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) - IncorporateType(*I); -} - -void FindUsedTypes::IncorporateValue(const Value *V) { - IncorporateType(V->getType()); - - // If this is a constant, it could be using other types... - if (const Constant *C = dyn_cast<Constant>(V)) { - if (!isa<GlobalValue>(C)) - for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); - OI != OE; ++OI) - IncorporateValue(*OI); - } -} - - -// run - This incorporates all types used by the specified module -// -bool FindUsedTypes::runOnModule(Module &m) { - UsedTypes.clear(); // reset if run multiple times... - - // Loop over global variables, incorporating their types - for (Module::const_global_iterator I = m.global_begin(), E = m.global_end(); - I != E; ++I) { - IncorporateType(I->getType()); - if (I->hasInitializer()) - IncorporateValue(I->getInitializer()); - } - - for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) { - IncorporateType(MI->getType()); - const Function &F = *MI; - - // Loop over all of the instructions in the function, adding their return - // type as well as the types of their operands. - // - for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); - II != IE; ++II) { - const Instruction &I = *II; - - IncorporateType(I.getType()); // Incorporate the type of the instruction - for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); - OI != OE; ++OI) - IncorporateValue(*OI); // Insert inst operand types as well - } - } - - return false; -} - -// Print the types found in the module. If the optional Module parameter is -// passed in, then the types are printed symbolically if possible, using the -// symbol table from the module. -// -void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { - OS << "Types in use by this module:\n"; - for (SetVector<Type *>::const_iterator I = UsedTypes.begin(), - E = UsedTypes.end(); I != E; ++I) { - OS << " " << **I << '\n'; - } -} diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index b26c052..806bfb8 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -22,7 +22,6 @@ void llvm::initializeIPA(PassRegistry &Registry) { initializeCallGraphWrapperPassPass(Registry); initializeCallGraphPrinterPass(Registry); initializeCallGraphViewerPass(Registry); - initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 85db278..cd494ba 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionTracker.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" @@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { const TargetTransformInfo &TTI; /// The cache of @llvm.assume intrinsics. - AssumptionTracker *AT; + AssumptionCacheTracker *ACT; // The called function. Function &F; @@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { public: CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, - AssumptionTracker *AT, Function &Callee, int Threshold) - : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0), + AssumptionCacheTracker *ACT, Function &Callee, int Threshold) + : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -601,7 +601,13 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + Value *SimpleV = nullptr; + if (auto FI = dyn_cast<FPMathOperator>(&I)) + SimpleV = + SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + else + SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { SimplifiedValues[&I] = C; return true; @@ -713,8 +719,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.hasFnAttr(Attribute::ReturnsTwice) && - !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice)) { + !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; @@ -783,7 +788,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -907,6 +912,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; + // If the instruction is floating point, and the target says this operation is + // expensive or the function has the "use-soft-float" attribute, this may + // eventually become a library call. Treat the cost as such. + if (I->getType()->isFloatingPointTy()) { + bool hasSoftFloatAttr = false; + + // If the function has the "use-soft-float" attribute, mark it as expensive. + if (F.hasFnAttribute("use-soft-float")) { + Attribute Attr = F.getFnAttribute("use-soft-float"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + hasSoftFloatAttr = true; + } + + if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || + hasSoftFloatAttr) + Cost += InlineConstants::CallPenalty; + } + // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we @@ -1110,7 +1134,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(&F, AT, EphValues); + CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this @@ -1232,8 +1256,8 @@ void CallAnalyzer::dump() { INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) @@ -1245,14 +1269,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {} void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<AssumptionTracker>(); - AU.addRequired<TargetTransformInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); CallGraphSCCPass::getAnalysisUsage(AU); } bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TTI = &getAnalysis<TargetTransformInfo>(); - AT = &getAnalysis<AssumptionTracker>(); + TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); + ACT = &getAnalysis<AssumptionCacheTracker>(); return false; } @@ -1309,7 +1333,8 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold); + CallAnalyzer CA(Callee->getDataLayout(), TTIWP->getTTI(*Callee), + ACT, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1324,9 +1349,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, } bool InlineCostAnalysis::isInlineViable(Function &F) { - bool ReturnsTwice = - F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); + bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches or // blockaddresses. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 6b5f370..140753c 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -33,7 +33,7 @@ using namespace llvm; char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", @@ -241,7 +241,7 @@ IVUsers::IVUsers() } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); AU.setPreservesAll(); @@ -250,7 +250,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { L = l; - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index f151a3a..0cb0982 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include <algorithm> using namespace llvm; using namespace llvm::PatternMatch; @@ -46,19 +48,21 @@ struct Query { const DataLayout *DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; - AssumptionTracker *AT; + AssumptionCache *AC; const Instruction *CxtI; Query(const DataLayout *DL, const TargetLibraryInfo *tli, - const DominatorTree *dt, AssumptionTracker *at = nullptr, + const DominatorTree *dt, AssumptionCache *ac = nullptr, const Instruction *cxti = nullptr) - : DL(DL), TLI(tli), DT(dt), AT(at), CxtI(cxti) {} + : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} }; } // end anonymous namespace static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, unsigned); +static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const Query &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, unsigned); static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); @@ -581,10 +585,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, - Query (DL, TLI, DT, AT, CxtI), RecursionLimit); + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); } /// \brief Compute the base pointer and cumulative constant offsets for V. @@ -683,17 +687,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); - // X - (0 - Y) -> X if the second sub is NUW. - // If Y != 0, 0 - Y is a poison value. - // If Y == 0, 0 - Y simplifies to 0. - if (BinaryOperator::isNeg(Op1)) { - if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) { - assert(BO->getOpcode() == Instruction::Sub && - "Expected a subtraction operator!"); - if (BO->hasNoUnsignedWrap()) - return Op0; - } - } + // 0 - X -> 0 if the sub is NUW. + if (isNUW && match(Op0, m_Zero())) + return Op0; // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X @@ -788,10 +784,10 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, - Query (DL, TLI, DT, AT, CxtI), RecursionLimit); + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); } /// Given operands for an FAdd, see if we can fold the result. If not, this @@ -966,37 +962,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, - const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + const DataLayout *DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, - const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + const DataLayout *DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, - FastMathFlags FMF, +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1017,6 +1013,10 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (match(Op1, m_Undef())) return Op1; + // X / 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op1->getType()); + // undef / X -> 0 if (match(Op0, m_Undef())) return Constant::getNullValue(Op0->getType()); @@ -1094,10 +1094,9 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1113,15 +1112,14 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, - unsigned) { +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1130,15 +1128,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, if (match(Op1, m_Undef())) return Op1; + // 0 / X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + return nullptr; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1215,10 +1219,9 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1234,15 +1237,14 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, - unsigned) { +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &, unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1251,15 +1253,21 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, if (match(Op1, m_Undef())) return Op1; + // 0 % X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + return nullptr; } -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1340,13 +1348,18 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); + // undef >> X -> 0 + // undef >> X -> undef (if it's exact) + if (match(Op0, m_Undef())) + return isExact ? Op0 : Constant::getNullValue(Op0->getType()); + // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); APInt Op0KnownZero(BitWidth, 0); APInt Op0KnownOne(BitWidth, 0); - computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AT, Q.CxtI, - Q.DT); + computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC, + Q.CxtI, Q.DT); if (Op0KnownOne[0]) return Op0; } @@ -1362,8 +1375,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return V; // undef << X -> 0 + // undef << X -> undef if (if it's NSW/NUW) if (match(Op0, m_Undef())) - return Constant::getNullValue(Op0->getType()); + return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType()); // (X >> A) << A -> X Value *X; @@ -1374,9 +1388,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1388,10 +1402,6 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, MaxRecurse)) return V; - // undef >>l X -> 0 - if (match(Op0, m_Undef())) - return Constant::getNullValue(Op0->getType()); - // (X << A) >> A -> X Value *X; if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) @@ -1403,10 +1413,9 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1422,17 +1431,13 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, if (match(Op0, m_AllOnes())) return Op0; - // undef >>a X -> all ones - if (match(Op0, m_Undef())) - return Constant::getAllOnesValue(Op0->getType()); - // (X << A) >> A -> X Value *X; if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1)))) return X; // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AT, Q.CxtI, Q.DT); + unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return Op0; @@ -1442,19 +1447,63 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } +static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, + ICmpInst *UnsignedICmp, bool IsAnd) { + Value *X, *Y; + + ICmpInst::Predicate EqPred; + if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) || + !ICmpInst::isEquality(EqPred)) + return nullptr; + + ICmpInst::Predicate UnsignedPred; + if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && + ICmpInst::isUnsigned(UnsignedPred)) + ; + else if (match(UnsignedICmp, + m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) && + ICmpInst::isUnsigned(UnsignedPred)) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + else + return nullptr; + + // X < Y && Y != 0 --> X < Y + // X < Y || Y != 0 --> Y != 0 + if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // X >= Y || Y != 0 --> true + // X >= Y || Y == 0 --> X >= Y + if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { + if (EqPred == ICmpInst::ICMP_NE) + return getTrue(UnsignedICmp->getType()); + return UnsignedICmp; + } + + // X < Y && Y == 0 --> false + if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && + IsAnd) + return getFalse(UnsignedICmp->getType()); + + return nullptr; +} + // Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range // of possible values cannot be satisfied. static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; ConstantInt *CI1, *CI2; Value *V; + + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + return X; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), m_ConstantInt(CI2)))) return nullptr; @@ -1547,9 +1596,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) return Op1; } @@ -1596,9 +1645,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1608,6 +1657,10 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; ConstantInt *CI1, *CI2; Value *V; + + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + return X; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), m_ConstantInt(CI2)))) return nullptr; @@ -1748,22 +1801,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue(), Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (V1 == B && + MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; - if (V2 == B && MaskedValueIsZero(V1, C2->getValue(), Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (V2 == B && + MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. if ((C1->getValue() & (C1->getValue() + 1)) == 0 && match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue(), Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (V1 == A && + MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; - if (V2 == A && MaskedValueIsZero(V1, C1->getValue(), Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (V2 == A && + MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } @@ -1780,9 +1833,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1837,9 +1890,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -2015,6 +2068,50 @@ static Constant *computePointerICmp(const DataLayout *DL, return ConstantExpr::getICmp(Pred, ConstantExpr::getAdd(LHSOffset, LHSNoBound), ConstantExpr::getAdd(RHSOffset, RHSNoBound)); + + // If one side of the equality comparison must come from a noalias call + // (meaning a system memory allocation function), and the other side must + // come from a pointer that cannot overlap with dynamically-allocated + // memory within the lifetime of the current function (allocas, byval + // arguments, globals), then determine the comparison result here. + SmallVector<Value *, 8> LHSUObjs, RHSUObjs; + GetUnderlyingObjects(LHS, LHSUObjs, DL); + GetUnderlyingObjects(RHS, RHSUObjs, DL); + + // Is the set of underlying objects all noalias calls? + auto IsNAC = [](SmallVectorImpl<Value *> &Objects) { + return std::all_of(Objects.begin(), Objects.end(), + [](Value *V){ return isNoAliasCall(V); }); + }; + + // Is the set of underlying objects all things which must be disjoint from + // noalias calls. For allocas, we consider only static ones (dynamic + // allocas might be transformed into calls to malloc not simultaneously + // live with the compared-to allocation). For globals, we exclude symbols + // that might be resolve lazily to symbols in another dynamically-loaded + // library (and, thus, could be malloc'ed by the implementation). + auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) { + return std::all_of(Objects.begin(), Objects.end(), + [](Value *V){ + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) + return AI->getParent() && AI->getParent()->getParent() && + AI->isStaticAlloca(); + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return (GV->hasLocalLinkage() || + GV->hasHiddenVisibility() || + GV->hasProtectedVisibility() || + GV->hasUnnamedAddr()) && + !GV->isThreadLocal(); + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + return false; + }); + }; + + if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) || + (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); } // Otherwise, fail. @@ -2094,46 +2191,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (LHSKnownNonNegative && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT)) + if (LHSKnownNonNegative && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } @@ -2485,6 +2582,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // icmp pred (or X, Y), X + if (LBO && match(LBO, m_CombineOr(m_Or(m_Value(), m_Specific(RHS)), + m_Or(m_Specific(RHS), m_Value())))) { + if (Pred == ICmpInst::ICMP_ULT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_UGE) + return getTrue(ITy); + } + // icmp pred X, (or X, Y) + if (RBO && match(RBO, m_CombineOr(m_Or(m_Value(), m_Specific(LHS)), + m_Or(m_Specific(LHS), m_Value())))) { + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + } + + // icmp pred (and X, Y), X + if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)), + m_And(m_Specific(RHS), m_Value())))) { + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + // icmp pred X, (and X, Y) + if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)), + m_And(m_Specific(LHS), m_Value())))) { + if (Pred == ICmpInst::ICMP_UGE) + return getTrue(ITy); + if (Pred == ICmpInst::ICMP_ULT) + return getFalse(ITy); + } + // 0 - (zext X) pred C if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { @@ -2515,8 +2646,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2526,8 +2657,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2546,8 +2677,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2557,8 +2688,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, - 0, Q.AT, Q.CxtI, Q.DT); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2867,7 +2998,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, uint32_t BitWidth = CI->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AT, + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); const APInt &RHSVal = CI->getValue(); if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0)) @@ -2895,10 +3026,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -2936,44 +3066,57 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { // If the constant is a nan, see if we can fold the comparison based on it. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->getValueAPF().isNaN()) { - if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. return ConstantInt::getFalse(CFP->getContext()); - assert(FCmpInst::isUnordered(Pred) && - "Comparison must be either ordered or unordered!"); - // True if unordered. - return ConstantInt::getTrue(CFP->getContext()); - } - // Check whether the constant is an infinity. - if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { - switch (Pred) { - case FCmpInst::FCMP_OLT: - // No value is ordered and less than negative infinity. - return ConstantInt::getFalse(CFP->getContext()); - case FCmpInst::FCMP_UGE: - // All values are unordered with or at least negative infinity. - return ConstantInt::getTrue(CFP->getContext()); - default: - break; - } - } else { - switch (Pred) { - case FCmpInst::FCMP_OGT: - // No value is ordered and greater than infinity. - return ConstantInt::getFalse(CFP->getContext()); - case FCmpInst::FCMP_ULE: - // All values are unordered with and at most infinity. - return ConstantInt::getTrue(CFP->getContext()); - default: - break; - } + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; } } } + if (CFP->getValueAPF().isZero()) { + switch (Pred) { + case FCmpInst::FCMP_UGE: + if (CannotBeOrderedLessThanZero(LHS)) + return ConstantInt::getTrue(CFP->getContext()); + break; + case FCmpInst::FCMP_OLT: + // X < 0 + if (CannotBeOrderedLessThanZero(LHS)) + return ConstantInt::getFalse(CFP->getContext()); + break; + default: + break; + } + } } // If the comparison is with the result of a select instruction, check whether @@ -2994,10 +3137,9 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3029,17 +3171,71 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; + const auto *ICI = dyn_cast<ICmpInst>(CondVal); + unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); + if (ICI && BitWidth) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + APInt MinSignedValue = APInt::getSignBit(BitWidth); + Value *X; + const APInt *Y; + bool TrueWhenUnset; + bool IsBitTest = false; + if (ICmpInst::isEquality(Pred) && + match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) && + match(ICI->getOperand(1), m_Zero())) { + IsBitTest = true; + TrueWhenUnset = Pred == ICmpInst::ICMP_EQ; + } else if (Pred == ICmpInst::ICMP_SLT && + match(ICI->getOperand(1), m_Zero())) { + X = ICI->getOperand(0); + Y = &MinSignedValue; + IsBitTest = true; + TrueWhenUnset = false; + } else if (Pred == ICmpInst::ICMP_SGT && + match(ICI->getOperand(1), m_AllOnes())) { + X = ICI->getOperand(0); + Y = &MinSignedValue; + IsBitTest = true; + TrueWhenUnset = true; + } + if (IsBitTest) { + const APInt *C; + // (X & Y) == 0 ? X & ~Y : X --> X + // (X & Y) != 0 ? X & ~Y : X --> X & ~Y + if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) && + *Y == ~*C) + return TrueWhenUnset ? FalseVal : TrueVal; + // (X & Y) == 0 ? X : X & ~Y --> X & ~Y + // (X & Y) != 0 ? X : X & ~Y --> X + if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) && + *Y == ~*C) + return TrueWhenUnset ? FalseVal : TrueVal; + + if (Y->isPowerOf2()) { + // (X & Y) == 0 ? X | Y : X --> X | Y + // (X & Y) != 0 ? X | Y : X --> X + if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) && + *Y == *C) + return TrueWhenUnset ? TrueVal : FalseVal; + // (X & Y) == 0 ? X : X | Y --> X + // (X & Y) != 0 ? X : X | Y --> X | Y + if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) && + *Y == *C) + return TrueWhenUnset ? TrueVal : FalseVal; + } + } + } + return nullptr; } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifySelectInst(Cond, TrueVal, FalseVal, - Query (DL, TLI, DT, AT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can @@ -3126,9 +3322,9 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); + return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we @@ -3160,15 +3356,11 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return nullptr; } -Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef<unsigned> Idxs, - const DataLayout *DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, - const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, - Query (DL, TLI, DT, AT, CxtI), +Value *llvm::SimplifyInsertValueInst( + Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL, + const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3215,10 +3407,9 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3246,10 +3437,12 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: + return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: + return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); @@ -3289,14 +3482,42 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } +/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const Query &Q, + unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); + default: + return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); + } +} + Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } +Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const DataLayout *DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -3308,9 +3529,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3384,27 +3605,25 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout *DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, - const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AT, CxtI), + const TargetLibraryInfo *TLI, const DominatorTree *DT, + AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionTracker *AT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, Args.begin(), Args.end(), - Query(DL, TLI, DT, AT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionTracker *AT) { + const DominatorTree *DT, AssumptionCache *AC) { Value *Result; switch (I->getOpcode()) { @@ -3413,122 +3632,122 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - DL, TLI, DT, AT, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, + TLI, DT, AC, I); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - DL, TLI, DT, AT, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, + TLI, DT, AC, I); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = + SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, + AC, I); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, + AC, I); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, + AC, I); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, + AC, I); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - DL, TLI, DT, AT, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, + TLI, DT, AC, I); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), - cast<BinaryOperator>(I)->isExact(), - DL, TLI, DT, AT, I); + cast<BinaryOperator>(I)->isExact(), DL, TLI, DT, + AC, I); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), - cast<BinaryOperator>(I)->isExact(), - DL, TLI, DT, AT, I); + cast<BinaryOperator>(I)->isExact(), DL, TLI, DT, + AC, I); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = + SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AT, I); + Result = + SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = + SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::ICmp: - Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = + SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0), + I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::FCmp: - Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), - DL, TLI, DT, AT, I); + Result = + SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), + I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT, AT, I); + I->getOperand(2), DL, TLI, DT, AC, I); break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, DL, TLI, DT, AT, I); + Result = SimplifyGEPInst(Ops, DL, TLI, DT, AC, I); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast<InsertValueInst>(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT, AT, I); + IV->getIndices(), DL, TLI, DT, AC, I); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT, AT, I)); + Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I)); break; case Instruction::Call: { CallSite CS(cast<CallInst>(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - DL, TLI, DT, AT, I); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL, + TLI, DT, AC, I); break; } case Instruction::Trunc: - Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, - AT, I); + Result = + SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I); break; } @@ -3553,7 +3772,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionTracker *AT) { + AssumptionCache *AC) { bool Simplified = false; SmallSetVector<Instruction *, 8> Worklist; @@ -3580,7 +3799,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, DL, TLI, DT, AT); + SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC); if (!SimpleV) continue; @@ -3603,20 +3822,19 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, return Simplified; } -bool llvm::recursivelySimplifyInstruction(Instruction *I, - const DataLayout *DL, +bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionTracker *AT) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AT); + AssumptionCache *AC) { + return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionTracker *AT) { + AssumptionCache *AC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AT); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC); } diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index a8a8079..3039dde 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = IPA type = Library name = Analysis parent = Libraries -required_libraries = Core Support Target +required_libraries = Core Support diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index 767da4e..c8d0410 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -708,11 +708,11 @@ static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) { OS << "\n"; } -PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, +PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M, ModuleAnalysisManager *AM) { LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M); - OS << "Printing the call graph for module: " << M->getModuleIdentifier() + OS << "Printing the call graph for module: " << M.getModuleIdentifier() << "\n\n"; SmallPtrSet<LazyCallGraph::Node *, 16> Printed; @@ -724,5 +724,4 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, printSCC(OS, SCC); return PreservedAnalyses::all(); - } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index c712c9f..87c31fd 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -1,4 +1,4 @@ -//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===// +//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,8 +15,9 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" @@ -29,7 +30,6 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <map> #include <stack> using namespace llvm; @@ -40,8 +40,8 @@ using namespace PatternMatch; char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -54,8 +54,7 @@ namespace llvm { // LVILatticeVal //===----------------------------------------------------------------------===// -/// LVILatticeVal - This is the information tracked by LazyValueInfo for each -/// value. +/// This is the information tracked by LazyValueInfo for each value. /// /// FIXME: This is basically just for bringup, this can be made a lot more rich /// in the future. @@ -63,19 +62,19 @@ namespace llvm { namespace { class LVILatticeVal { enum LatticeValueTy { - /// undefined - This Value has no known value yet. + /// This Value has no known value yet. undefined, - /// constant - This Value has a specific constant value. + /// This Value has a specific constant value. constant, - /// notconstant - This Value is known to not have the specified value. + + /// This Value is known to not have the specified value. notconstant, - /// constantrange - The Value falls within this range. + /// The Value falls within this range. constantrange, - /// overdefined - This value is not known to be constant, and we know that - /// it has a value. + /// This value is not known to be constant, and we know that it has a value. overdefined }; @@ -128,7 +127,7 @@ public: return Range; } - /// markOverdefined - Return true if this is a change in status. + /// Return true if this is a change in status. bool markOverdefined() { if (isOverdefined()) return false; @@ -136,7 +135,7 @@ public: return true; } - /// markConstant - Return true if this is a change in status. + /// Return true if this is a change in status. bool markConstant(Constant *V) { assert(V && "Marking constant with NULL"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) @@ -152,7 +151,7 @@ public: return true; } - /// markNotConstant - Return true if this is a change in status. + /// Return true if this is a change in status. bool markNotConstant(Constant *V) { assert(V && "Marking constant with NULL"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) @@ -170,7 +169,7 @@ public: return true; } - /// markConstantRange - Return true if this is a change in status. + /// Return true if this is a change in status. bool markConstantRange(const ConstantRange NewR) { if (isConstantRange()) { if (NewR.isEmptySet()) @@ -190,7 +189,7 @@ public: return true; } - /// mergeIn - Merge the specified lattice value into this one, updating this + /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. bool mergeIn(const LVILatticeVal &RHS) { if (RHS.isUndefined() || isOverdefined()) return false; @@ -298,8 +297,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { //===----------------------------------------------------------------------===// namespace { - /// LVIValueHandle - A callback value handle updates the cache when - /// values are erased. + /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; struct LVIValueHandle : public CallbackVH { LazyValueInfoCache *Parent; @@ -315,62 +313,62 @@ namespace { } namespace { - /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which + /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - /// ValueCacheEntryTy - This is all of the cached block information for - /// exactly one Value*. The entries are sorted by the BasicBlock* of the + /// This is all of the cached block information for exactly one Value*. + /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; - /// ValueCache - This is all of the cached information for all values, + /// This is all of the cached information for all values, /// mapped from Value* to key information. std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; - /// OverDefinedCache - This tracks, on a per-block basis, the set of - /// values that are over-defined at the end of that block. This is required + /// This tracks, on a per-block basis, the set of values that are + /// over-defined at the end of that block. This is required /// for cache updating. typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; DenseSet<OverDefinedPairTy> OverDefinedCache; - /// SeenBlocks - Keep track of all blocks that we have ever seen, so we + /// Keep track of all blocks that we have ever seen, so we /// don't spend time removing unused blocks from our caches. DenseSet<AssertingVH<BasicBlock> > SeenBlocks; - /// BlockValueStack - This stack holds the state of the value solver - /// during a query. It basically emulates the callstack of the naive + /// This stack holds the state of the value solver during a query. + /// It basically emulates the callstack of the naive /// recursive value lookup process. std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack; + /// Keeps track of which block-value pairs are in BlockValueStack. + DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet; + + /// Push BV onto BlockValueStack unless it's already in there. + /// Returns true on success. + bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) { + if (BlockValueSet.count(BV)) + return false; // It's already in the stack. + + BlockValueStack.push(BV); + BlockValueSet.insert(BV); + return true; + } + /// A pointer to the cache of @llvm.assume calls. - AssumptionTracker *AT; + AssumptionCache *AC; /// An optional DL pointer. const DataLayout *DL; /// An optional DT pointer. DominatorTree *DT; friend struct LVIValueHandle; - - /// OverDefinedCacheUpdater - A helper object that ensures that the - /// OverDefinedCache is updated whenever solveBlockValue returns. - struct OverDefinedCacheUpdater { - LazyValueInfoCache *Parent; - Value *Val; - BasicBlock *BB; - LVILatticeVal &BBLV; - - OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, - LazyValueInfoCache *P) - : Parent(P), Val(V), BB(B), BBLV(LV) { } - - bool markResult(bool changed) { - if (changed && BBLV.isOverdefined()) - Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); - return changed; - } - }; - + void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { + SeenBlocks.insert(BB); + lookup(Val)[BB] = Result; + if (Result.isOverdefined()) + OverDefinedCache.insert(std::make_pair(BB, Val)); + } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, @@ -398,27 +396,26 @@ namespace { } public: - /// getValueInBlock - This is the query interface to determine the lattice + /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); - /// getValueAt - This is the query interface to determine the lattice + /// This is the query interface to determine the lattice /// value for the specified Value* at the specified instruction (generally /// from an assume intrinsic). LVILatticeVal getValueAt(Value *V, Instruction *CxtI); - /// getValueOnEdge - This is the query interface to determine the lattice + /// This is the query interface to determine the lattice /// value for the specified Value* that is true on the specified edge. LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, Instruction *CxtI = nullptr); - /// threadEdge - This is the update interface to inform the cache that an - /// edge from PredBB to OldSucc has been threaded to be from PredBB to - /// NewSucc. + /// This is the update interface to inform the cache that an edge from + /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); - /// eraseBlock - This is part of the update interface to inform the cache + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB); @@ -429,9 +426,9 @@ namespace { OverDefinedCache.clear(); } - LazyValueInfoCache(AssumptionTracker *AT, - const DataLayout *DL = nullptr, - DominatorTree *DT = nullptr) : AT(AT), DL(DL), DT(DT) {} + LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr, + DominatorTree *DT = nullptr) + : AC(AC), DL(DL), DT(DT) {} }; } // end anonymous namespace @@ -439,17 +436,11 @@ void LVIValueHandle::deleted() { typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; SmallVector<OverDefinedPairTy, 4> ToErase; - for (DenseSet<OverDefinedPairTy>::iterator - I = Parent->OverDefinedCache.begin(), - E = Parent->OverDefinedCache.end(); - I != E; ++I) { - if (I->second == getValPtr()) - ToErase.push_back(*I); - } - - for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), - E = ToErase.end(); I != E; ++I) - Parent->OverDefinedCache.erase(*I); + for (const OverDefinedPairTy &P : Parent->OverDefinedCache) + if (P.second == getValPtr()) + ToErase.push_back(P); + for (const OverDefinedPairTy &P : ToErase) + Parent->OverDefinedCache.erase(P); // This erasure deallocates *this, so it MUST happen after we're done // using any and all members of *this. @@ -464,15 +455,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { SeenBlocks.erase(I); SmallVector<OverDefinedPairTy, 4> ToErase; - for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), - E = OverDefinedCache.end(); I != E; ++I) { - if (I->first == BB) - ToErase.push_back(*I); - } - - for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), - E = ToErase.end(); I != E; ++I) - OverDefinedCache.erase(*I); + for (const OverDefinedPairTy& P : OverDefinedCache) + if (P.first == BB) + ToErase.push_back(P); + for (const OverDefinedPairTy &P : ToErase) + OverDefinedCache.erase(P); for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) @@ -482,9 +469,18 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { void LazyValueInfoCache::solve() { while (!BlockValueStack.empty()) { std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); + assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); + if (solveBlockValue(e.second, e.first)) { - assert(BlockValueStack.top() == e); + // The work item was completely processed. + assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); + assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + BlockValueStack.pop(); + BlockValueSet.erase(e); + } else { + // More work needs to be done before revisiting. + assert(BlockValueStack.top() != e && "Stack should have been pushed!"); } } } @@ -514,43 +510,40 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa<Constant>(Val)) return true; - ValueCacheEntryTy &Cache = lookup(Val); - SeenBlocks.insert(BB); - LVILatticeVal &BBLV = Cache[BB]; - - // OverDefinedCacheUpdater is a helper object that will update - // the OverDefinedCache for us when this method exits. Make sure to - // call markResult on it as we exist, passing a bool to indicate if the - // cache needs updating, i.e. if we have solve a new value or not. - OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); - - if (!BBLV.isUndefined()) { - DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); - - // Since we're reusing a cached value here, we don't need to update the - // OverDefinedCahce. The cache will have been properly updated - // whenever the cached value was inserted. - ODCacheUpdater.markResult(false); + if (lookup(Val).count(BB)) { + // If we have a cached value, use that. + DEBUG(dbgs() << " reuse BB '" << BB->getName() + << "' val=" << lookup(Val)[BB] << '\n'); + + // Since we're reusing a cached value, we don't need to update the + // OverDefinedCache. The cache will have been properly updated whenever the + // cached value was inserted. return true; } - // Otherwise, this is the first time we're seeing this block. Reset the - // lattice value to overdefined, so that cycles will terminate and be - // conservatively correct. - BBLV.markOverdefined(); + // Hold off inserting this value into the Cache in case we have to return + // false and come back later. + LVILatticeVal Res; Instruction *BBI = dyn_cast<Instruction>(Val); if (!BBI || BBI->getParent() != BB) { - return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); + if (!solveBlockValueNonLocal(Res, Val, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); + if (!solveBlockValuePHINode(Res, PN, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { - BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); - return ODCacheUpdater.markResult(true); + Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + insertResult(Val, BB, Res); + return true; } // We can only analyze the definitions of certain classes of instructions @@ -560,8 +553,9 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { !BBI->getType()->isIntegerTy()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } // FIXME: We're currently limited to binops with a constant RHS. This should @@ -571,11 +565,15 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } - return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); + if (!solveBlockValueConstantRange(Res, BBI, BB)) + return false; + insertResult(Val, BB, Res); + return true; } static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { @@ -620,9 +618,8 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - if (InstructionDereferencesPointer(BI, UnderlyingVal)) { + for (Instruction &I : *BB) { + if (InstructionDereferencesPointer(&I, UnderlyingVal)) { NotNull = true; break; } @@ -724,16 +721,20 @@ static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, LVILatticeVal &Result, bool isTrueDest = true); -// If we can determine a constant range for the value Val at the context +// If we can determine a constant range for the value Val in the context // provided by the instruction BBI, then merge it into BBLV. If we did find a // constant range, return true. -void LazyValueInfoCache::mergeAssumeBlockValueConstantRange( - Value *Val, LVILatticeVal &BBLV, Instruction *BBI) { +void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val, + LVILatticeVal &BBLV, + Instruction *BBI) { BBI = BBI ? BBI : dyn_cast<Instruction>(Val); if (!BBI) return; - for (auto &I : AT->assumptions(BBI->getParent()->getParent())) { + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + auto *I = cast<CallInst>(AssumeVH); if (!isValidAssumeForContext(I, BBI, DL, DT)) continue; @@ -755,8 +756,10 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, BasicBlock *BB) { // Figure out the range of the LHS. If that fails, bail. if (!hasBlockValue(BBI->getOperand(0), BB)) { - BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); - return false; + if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + return false; + BBLV.markOverdefined(); + return true; } LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); @@ -881,7 +884,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { // If this is a conditional branch and only one successor goes to BBTo, then - // we maybe able to infer something from the condition. + // we may be able to infer something from the condition. if (BI->isConditional() && BI->getSuccessor(0) != BI->getSuccessor(1)) { bool isTrueDest = BI->getSuccessor(0) == BBTo; @@ -898,9 +901,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. - ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); - if (getValueFromFromCondition(Val, ICI, Result, isTrueDest)) - return true; + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (getValueFromFromCondition(Val, ICI, Result, isTrueDest)) + return true; } } @@ -914,8 +917,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, unsigned BitWidth = Val->getType()->getIntegerBitWidth(); ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { + for (SwitchInst::CaseIt i : SI->cases()) { ConstantRange EdgeVal(i.getCaseValue()->getValue()); if (DefaultCase) { // It is possible that the default destination is the destination of @@ -931,8 +933,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, return false; } -/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at -/// the basic block if the edge does not constraint Val. +/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at +/// the basic block if the edge does not constrain Val. bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, LVILatticeVal &Result, Instruction *CxtI) { @@ -944,15 +946,17 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) { if (!Result.isConstantRange() || - Result.getConstantRange().getSingleElement()) + Result.getConstantRange().getSingleElement()) return true; // FIXME: this check should be moved to the beginning of the function when // LVI better supports recursive values. Even for the single value case, we // can intersect to detect dead code (an empty range). if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } // Try to intersect ranges of the BB and the constraint on the edge. @@ -971,11 +975,13 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, } if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } - // if we couldn't compute the value on the edge, use the value from the BB + // If we couldn't compute the value on the edge, use the value from the BB. Result = getBlockValue(Val, BBFrom); mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator()); // We can use the context instruction (generically the ultimate instruction @@ -995,7 +1001,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - BlockValueStack.push(std::make_pair(BB, V)); + assert(BlockValueStack.empty() && BlockValueSet.empty()); + pushBlockValue(std::make_pair(BB, V)); + solve(); LVILatticeVal Result = getBlockValue(V, BB); mergeAssumeBlockValueConstantRange(V, Result, CxtI); @@ -1041,7 +1049,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, // we clear their entries from the cache, and allow lazy updating to recompute // them when needed. - // The updating process is fairly simple: we need to dropped cached info + // The updating process is fairly simple: we need to drop cached info // for all values that were marked overdefined in OldSucc, and for those same // values in any successor of OldSucc (except NewSucc) in which they were // also marked overdefined. @@ -1049,11 +1057,9 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, worklist.push_back(OldSucc); DenseSet<Value*> ClearSet; - for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), - E = OverDefinedCache.end(); I != E; ++I) { - if (I->first == OldSucc) - ClearSet.insert(I->second); - } + for (OverDefinedPairTy &P : OverDefinedCache) + if (P.first == OldSucc) + ClearSet.insert(P.second); // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already @@ -1067,15 +1073,14 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, if (ToUpdate == NewSucc) continue; bool changed = false; - for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); - I != E; ++I) { + for (Value *V : ClearSet) { // If a value was marked overdefined in OldSucc, and is here too... DenseSet<OverDefinedPairTy>::iterator OI = - OverDefinedCache.find(std::make_pair(ToUpdate, *I)); + OverDefinedCache.find(std::make_pair(ToUpdate, V)); if (OI == OverDefinedCache.end()) continue; // Remove it from the caches. - ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; + ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)]; ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); assert(CI != Entry.end() && "Couldn't find entry to update?"); @@ -1097,18 +1102,17 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, // LazyValueInfo Impl //===----------------------------------------------------------------------===// -/// getCache - This lazily constructs the LazyValueInfoCache. -static LazyValueInfoCache &getCache(void *&PImpl, - AssumptionTracker *AT, +/// This lazily constructs the LazyValueInfoCache. +static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC, const DataLayout *DL = nullptr, DominatorTree *DT = nullptr) { if (!PImpl) - PImpl = new LazyValueInfoCache(AT, DL, DT); + PImpl = new LazyValueInfoCache(AC, DL, DT); return *static_cast<LazyValueInfoCache*>(PImpl); } bool LazyValueInfo::runOnFunction(Function &F) { - AT = &getAnalysis<AssumptionTracker>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); @@ -1116,10 +1120,11 @@ bool LazyValueInfo::runOnFunction(Function &F) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfo>(); + + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (PImpl) - getCache(PImpl, AT, DL, DT).clear(); + getCache(PImpl, AC, DL, DT).clear(); // Fully lazy. return false; @@ -1127,14 +1132,14 @@ bool LazyValueInfo::runOnFunction(Function &F) { void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<AssumptionTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getCache(PImpl, AT); + delete &getCache(PImpl, AC); PImpl = nullptr; } } @@ -1142,8 +1147,8 @@ void LazyValueInfo::releaseMemory() { Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, Instruction *CxtI) { LVILatticeVal Result = - getCache(PImpl, AT, DL, DT).getValueInBlock(V, BB, CxtI); - + getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI); + if (Result.isConstant()) return Result.getConstant(); if (Result.isConstantRange()) { @@ -1154,14 +1159,14 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, return nullptr; } -/// getConstantOnEdge - Determine whether the specified value is known to be a +/// Determine whether the specified value is known to be a /// constant on the specified edge. Return null if not. Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { LVILatticeVal Result = - getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); - + getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + if (Result.isConstant()) return Result.getConstant(); if (Result.isConstantRange()) { @@ -1239,15 +1244,14 @@ getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, return LazyValueInfo::Unknown; } -/// getPredicateOnEdge - Determine whether the specified value comparison -/// with a constant is known to be true or false on the specified CFG edge. -/// Pred is a CmpInst predicate. +/// Determine whether the specified value comparison with a constant is known to +/// be true or false on the specified CFG edge. Pred is a CmpInst predicate. LazyValueInfo::Tristate LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { LVILatticeVal Result = - getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } @@ -1255,17 +1259,18 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { - LVILatticeVal Result = - getCache(PImpl, AT, DL, DT).getValueAt(V, CxtI); + LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - if (PImpl) getCache(PImpl, AT, DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + if (PImpl) + getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc); } void LazyValueInfo::eraseBlock(BasicBlock *BB) { - if (PImpl) getCache(PImpl, AT, DL, DT).eraseBlock(BB); + if (PImpl) + getCache(PImpl, AC, DL, DT).eraseBlock(BB); } diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 23639e7..cf752dd 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -61,3 +62,41 @@ LibCallInfo::getFunctionInfo(const Function *F) const { return Map->lookup(F->getName()); } +/// See if the given exception handling personality function is one that we +/// understand. If so, return a description of it; otherwise return Unknown. +EHPersonality llvm::classifyEHPersonality(const Value *Pers) { + const Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return EHPersonality::Unknown; + return StringSwitch<EHPersonality>(F->getName()) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("__except_handler3", EHPersonality::MSVC_X86SEH) + .Case("__except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Default(EHPersonality::Unknown); +} + +bool llvm::isAsynchronousEHPersonality(EHPersonality Pers) { + // The two SEH personality functions can catch asynch exceptions. We assume + // unknown personalities don't catch asynch exceptions. + switch (Pers) { + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + return true; + default: return false; + } + llvm_unreachable("invalid enum"); +} + +bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) { + const LandingPadInst *LP = II->getLandingPadInst(); + EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn()); + // We can't simplify any invokes to nounwind functions if the personality + // function wants to catch asynch exceptions. The nounwind attribute only + // implies that the function does not throw synchronous exceptions. + return !isAsynchronousEHPersonality(Personality); +} diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 8ee9b8a..874ed0a 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -36,12 +36,14 @@ #include "llvm/Analysis/Lint.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -49,11 +51,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" -#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; namespace { @@ -73,6 +74,8 @@ namespace { void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); void visitCallInst(CallInst &I); void visitInvokeInst(InvokeInst &I); @@ -102,7 +105,7 @@ namespace { public: Module *Mod; AliasAnalysis *AA; - AssumptionTracker *AT; + AssumptionCache *AC; DominatorTree *DT; const DataLayout *DL; TargetLibraryInfo *TLI; @@ -120,8 +123,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); - AU.addRequired<AssumptionTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); } void print(raw_ostream &O, const Module *M) const override {} @@ -154,8 +157,8 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", @@ -179,11 +182,11 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis<AliasAnalysis>(); - AT = &getAnalysis<AssumptionTracker>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfo>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); @@ -346,6 +349,13 @@ void Lint::visitCallSite(CallSite CS) { visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; + + case Intrinsic::eh_begincatch: + visitEHBeginCatch(II); + break; + case Intrinsic::eh_endcatch: + visitEHEndCatch(II); + break; } } @@ -509,8 +519,190 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } +static bool +allPredsCameFromLandingPad(BasicBlock *BB, + SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + if (BB->isLandingPad()) + return true; + // If we find a block with no predecessors, the search failed. + if (pred_empty(BB)) + return false; + for (BasicBlock *Pred : predecessors(BB)) { + if (VisitedBlocks.count(Pred)) + continue; + if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) + return false; + } + return true; +} + +static bool +allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, + IntrinsicInst **SecondBeginCatch, + SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { + IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); + if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) + return true; + // If we find another begincatch while looking for an endcatch, + // that's also an error. + if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { + *SecondBeginCatch = IC; + return false; + } + } + + // If we reach a block with no successors while searching, the + // search has failed. + if (succ_empty(BB)) + return false; + // Otherwise, search all of the successors. + for (BasicBlock *Succ : successors(BB)) { + if (VisitedBlocks.count(Succ)) + continue; + if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, + VisitedBlocks)) + return false; + } + return true; +} + +void Lint::visitEHBeginCatch(IntrinsicInst *II) { + // The checks in this function make a potentially dubious assumption about + // the CFG, namely that any block involved in a catch is only used for the + // catch. This will very likely be true of IR generated by a front end, + // but it may cease to be true, for example, if the IR is run through a + // pass which combines similar blocks. + // + // In general, if we encounter a block the isn't dominated by the catch + // block while we are searching the catch block's successors for a call + // to end catch intrinsic, then it is possible that it will be legal for + // a path through this block to never reach a call to llvm.eh.endcatch. + // An analogous statement could be made about our search for a landing + // pad among the catch block's predecessors. + // + // What is actually required is that no path is possible at runtime that + // reaches a call to llvm.eh.begincatch without having previously visited + // a landingpad instruction and that no path is possible at runtime that + // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch + // (mentally adjusting for the fact that in reality these calls will be + // removed before code generation). + // + // Because this is a lint check, we take a pessimistic approach and warn if + // the control flow is potentially incorrect. + + SmallSet<BasicBlock *, 4> VisitedBlocks; + BasicBlock *CatchBB = II->getParent(); + + // The begin catch must occur in a landing pad block or all paths + // to it must have come from a landing pad. + Assert1(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), + "llvm.eh.begincatch may be reachable without passing a landingpad", + II); + + // Reset the visited block list. + VisitedBlocks.clear(); + + IntrinsicInst *SecondBeginCatch = nullptr; + + // This has to be called before it is asserted. Otherwise, the first assert + // below can never be hit. + bool EndCatchFound = allSuccessorsReachEndCatch( + CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), + &SecondBeginCatch, VisitedBlocks); + Assert2( + SecondBeginCatch == nullptr, + "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", + II, SecondBeginCatch); + Assert1(EndCatchFound, + "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", + II); +} + +static bool allPredCameFromBeginCatch( + BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, + IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + // Look for a begincatch in this block. + for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; + ++RI) { + IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); + if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) + return true; + // If we find another end catch before we find a begin catch, that's + // an error. + if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { + *SecondEndCatch = IC; + return false; + } + // If we encounter a landingpad instruction, the search failed. + if (isa<LandingPadInst>(*RI)) + return false; + } + // If while searching we find a block with no predeccesors, + // the search failed. + if (pred_empty(BB)) + return false; + // Search any predecessors we haven't seen before. + for (BasicBlock *Pred : predecessors(BB)) { + if (VisitedBlocks.count(Pred)) + continue; + if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, + VisitedBlocks)) + return false; + } + return true; +} + +void Lint::visitEHEndCatch(IntrinsicInst *II) { + // The check in this function makes a potentially dubious assumption about + // the CFG, namely that any block involved in a catch is only used for the + // catch. This will very likely be true of IR generated by a front end, + // but it may cease to be true, for example, if the IR is run through a + // pass which combines similar blocks. + // + // In general, if we encounter a block the isn't post-dominated by the + // end catch block while we are searching the end catch block's predecessors + // for a call to the begin catch intrinsic, then it is possible that it will + // be legal for a path to reach the end catch block without ever having + // called llvm.eh.begincatch. + // + // What is actually required is that no path is possible at runtime that + // reaches a call to llvm.eh.endcatch without having previously visited + // a call to llvm.eh.begincatch (mentally adjusting for the fact that in + // reality these calls will be removed before code generation). + // + // Because this is a lint check, we take a pessimistic approach and warn if + // the control flow is potentially incorrect. + + BasicBlock *EndCatchBB = II->getParent(); + + // Alls paths to the end catch call must pass through a begin catch call. + + // If llvm.eh.begincatch wasn't called in the current block, we'll use this + // lambda to recursively look for it in predecessors. + SmallSet<BasicBlock *, 4> VisitedBlocks; + IntrinsicInst *SecondEndCatch = nullptr; + + // This has to be called before it is asserted. Otherwise, the first assert + // below can never be hit. + bool BeginCatchFound = + allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), + &SecondEndCatch, VisitedBlocks); + Assert2( + SecondEndCatch == nullptr, + "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", + II, SecondEndCatch); + Assert1( + BeginCatchFound, + "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", + II); +} + static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, - AssumptionTracker *AT) { + AssumptionCache *AC) { // Assume undef could be zero. if (isa<UndefValue>(V)) return true; @@ -519,8 +711,8 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, - 0, AT, dyn_cast<Instruction>(V), DT); + computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, + dyn_cast<Instruction>(V), DT); return KnownZero.isAllOnesValue(); } @@ -550,22 +742,22 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AT), + Assert1(!isZero(I.getOperand(1), DL, DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AT), + Assert1(!isZero(I.getOperand(1), DL, DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AT), + Assert1(!isZero(I.getOperand(1), DL, DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AT), + Assert1(!isZero(I.getOperand(1), DL, DT, AC), "Undefined behavior: Division by zero", &I); } @@ -686,7 +878,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AT)) + if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) @@ -711,7 +903,7 @@ void llvm::lintFunction(const Function &f) { Function &F = const_cast<Function&>(f); assert(!F.isDeclaration() && "Cannot lint external functions"); - FunctionPassManager FPM(F.getParent()); + legacy::FunctionPassManager FPM(F.getParent()); Lint *V = new Lint(); FPM.add(V); FPM.run(F); @@ -720,7 +912,7 @@ void llvm::lintFunction(const Function &f) { /// lintModule - Check a module for errors, printing messages on stderr. /// void llvm::lintModule(const Module &M) { - PassManager PM; + legacy::PassManager PM; Lint *V = new Lint(); PM.add(V); PM.run(const_cast<Module&>(M)); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index bb0d60e..5042eb9 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -176,8 +176,13 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - // If we're using alias analysis to disambiguate get the size of *Ptr. - uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0; + // Try to get the DataLayout for this module. This may be null, in which case + // the optimizations will be limited. + const DataLayout *DL = ScanBB->getDataLayout(); + + // Try to get the store size for the type. + uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy) + : AA ? AA->getTypeStoreSize(AccessTy) : 0; Value *StrippedPtr = Ptr->stripPointerCasts(); @@ -202,7 +207,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) if (AreEquivalentAddressValues( LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && - CastInst::isBitCastable(LI->getType(), AccessTy)) { + CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) { if (AATags) LI->getAAMetadata(*AATags); return LI; @@ -214,7 +219,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) if (AreEquivalentAddressValues(StorePtr, StrippedPtr) && - CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) { + CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(), + AccessTy, DL)) { if (AATags) SI->getAAMetadata(*AATags); return SI->getOperand(0); diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp new file mode 100644 index 0000000..7bedd40 --- /dev/null +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -0,0 +1,1396 @@ +//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The implementation for the loop memory dependence that was originally +// developed for the loop vectorizer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/VectorUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-accesses" + +static cl::opt<unsigned, true> +VectorizationFactor("force-vector-width", cl::Hidden, + cl::desc("Sets the SIMD width. Zero is autoselect."), + cl::location(VectorizerParams::VectorizationFactor)); +unsigned VectorizerParams::VectorizationFactor; + +static cl::opt<unsigned, true> +VectorizationInterleave("force-vector-interleave", cl::Hidden, + cl::desc("Sets the vectorization interleave count. " + "Zero is autoselect."), + cl::location( + VectorizerParams::VectorizationInterleave)); +unsigned VectorizerParams::VectorizationInterleave; + +static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( + "runtime-memory-check-threshold", cl::Hidden, + cl::desc("When performing memory disambiguation checks at runtime do not " + "generate more than this number of comparisons (default = 8)."), + cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); +unsigned VectorizerParams::RuntimeMemoryCheckThreshold; + +/// Maximum SIMD width. +const unsigned VectorizerParams::MaxVectorWidth = 64; + +bool VectorizerParams::isInterleaveForced() { + return ::VectorizationInterleave.getNumOccurrences() > 0; +} + +void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, + const Function *TheFunction, + const Loop *TheLoop, + const char *PassName) { + DebugLoc DL = TheLoop->getStartLoc(); + if (const Instruction *I = Message.getInstr()) + DL = I->getDebugLoc(); + emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName, + *TheFunction, DL, Message.str()); +} + +Value *llvm::stripIntegerCast(Value *V) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if (CI->getOperand(0)->getType()->isIntegerTy()) + return CI->getOperand(0); + return V; +} + +const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, + const ValueToValueMap &PtrToStride, + Value *Ptr, Value *OrigPtr) { + + const SCEV *OrigSCEV = SE->getSCEV(Ptr); + + // If there is an entry in the map return the SCEV of the pointer with the + // symbolic stride replaced by one. + ValueToValueMap::const_iterator SI = + PtrToStride.find(OrigPtr ? OrigPtr : Ptr); + if (SI != PtrToStride.end()) { + Value *StrideVal = SI->second; + + // Strip casts. + StrideVal = stripIntegerCast(StrideVal); + + // Replace symbolic stride by one. + Value *One = ConstantInt::get(StrideVal->getType(), 1); + ValueToValueMap RewriteMap; + RewriteMap[StrideVal] = One; + + const SCEV *ByOne = + SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + << "\n"); + return ByOne; + } + + // Otherwise, just return the SCEV of the original pointer. + return SE->getSCEV(Ptr); +} + +void LoopAccessInfo::RuntimePointerCheck::insert( + ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, + unsigned ASId, const ValueToValueMap &Strides) { + // Get the stride replaced scev. + const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + Pointers.push_back(Ptr); + Starts.push_back(AR->getStart()); + Ends.push_back(ScEnd); + IsWritePtr.push_back(WritePtr); + DependencySetId.push_back(DepSetId); + AliasSetId.push_back(ASId); +} + +bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I, + unsigned J) const { + // No need to check if two readonly pointers intersect. + if (!IsWritePtr[I] && !IsWritePtr[J]) + return false; + + // Only need to check pointers between two different dependency sets. + if (DependencySetId[I] == DependencySetId[J]) + return false; + + // Only need to check pointers in the same alias set. + if (AliasSetId[I] != AliasSetId[J]) + return false; + + return true; +} + +void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS, + unsigned Depth) const { + unsigned NumPointers = Pointers.size(); + if (NumPointers == 0) + return; + + OS.indent(Depth) << "Run-time memory checks:\n"; + unsigned N = 0; + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J)) { + OS.indent(Depth) << N++ << ":\n"; + OS.indent(Depth + 2) << *Pointers[I] << "\n"; + OS.indent(Depth + 2) << *Pointers[J] << "\n"; + } +} + +namespace { +/// \brief Analyses memory accesses in a loop. +/// +/// Checks whether run time pointer checks are needed and builds sets for data +/// dependence checking. +class AccessAnalysis { +public: + /// \brief Read or write access location. + typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; + typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + + /// \brief Set of potential dependent memory accesses. + typedef EquivalenceClasses<MemAccessInfo> DepCandidates; + + AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) : + DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} + + /// \brief Register a load and whether it is only read from. + void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { + Value *Ptr = const_cast<Value*>(Loc.Ptr); + AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); + Accesses.insert(MemAccessInfo(Ptr, false)); + if (IsReadOnly) + ReadOnlyPtr.insert(Ptr); + } + + /// \brief Register a store. + void addStore(AliasAnalysis::Location &Loc) { + Value *Ptr = const_cast<Value*>(Loc.Ptr); + AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); + Accesses.insert(MemAccessInfo(Ptr, true)); + } + + /// \brief Check whether we can check the pointers at runtime for + /// non-intersection. + bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, + unsigned &NumComparisons, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, + bool ShouldCheckStride = false); + + /// \brief Goes over all memory accesses, checks whether a RT check is needed + /// and builds sets of dependent accesses. + void buildDependenceSets() { + processMemAccesses(); + } + + bool isRTCheckNeeded() { return IsRTCheckNeeded; } + + bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } + void resetDepChecks() { CheckDeps.clear(); } + + MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } + +private: + typedef SetVector<MemAccessInfo> PtrAccessSet; + + /// \brief Go over all memory access and check whether runtime pointer checks + /// are needed /// and build sets of dependency check candidates. + void processMemAccesses(); + + /// Set of all accesses. + PtrAccessSet Accesses; + + /// Set of accesses that need a further dependence check. + MemAccessInfoSet CheckDeps; + + /// Set of pointers that are read only. + SmallPtrSet<Value*, 16> ReadOnlyPtr; + + const DataLayout *DL; + + /// An alias set tracker to partition the access set by underlying object and + //intrinsic property (such as TBAA metadata). + AliasSetTracker AST; + + /// Sets of potentially dependent accesses - members of one set share an + /// underlying pointer. The set "CheckDeps" identfies which sets really need a + /// dependence check. + DepCandidates &DepCands; + + bool IsRTCheckNeeded; +}; + +} // end anonymous namespace + +/// \brief Check whether a pointer can participate in a runtime bounds check. +static bool hasComputableBounds(ScalarEvolution *SE, + const ValueToValueMap &Strides, Value *Ptr) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + if (!AR) + return false; + + return AR->isAffine(); +} + +/// \brief Check the stride of the pointer and ensure that it does not wrap in +/// the address space. +static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap); + +bool AccessAnalysis::canCheckPtrAtRT( + LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, + ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool CanDoRT = true; + + bool IsDepCheckNeeded = isDependencyCheckNeeded(); + NumComparisons = 0; + + // We assign a consecutive id to access from different alias sets. + // Accesses between different groups doesn't need to be checked. + unsigned ASId = 1; + for (auto &AS : AST) { + unsigned NumReadPtrChecks = 0; + unsigned NumWritePtrChecks = 0; + + // We assign consecutive id to access from different dependence sets. + // Accesses within the same set don't need a runtime check. + unsigned RunningDepId = 1; + DenseMap<Value *, unsigned> DepSetId; + + for (auto A : AS) { + Value *Ptr = A.getValue(); + bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); + MemAccessInfo Access(Ptr, IsWrite); + + if (IsWrite) + ++NumWritePtrChecks; + else + ++NumReadPtrChecks; + + if (hasComputableBounds(SE, StridesMap, Ptr) && + // When we run after a failing dependency check we have to make sure we + // don't have wrapping pointers. + (!ShouldCheckStride || + isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) { + // The id of the dependence set. + unsigned DepId; + + if (IsDepCheckNeeded) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + + DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + } else { + CanDoRT = false; + } + } + + if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) + NumComparisons += 0; // Only one dependence set. + else { + NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + + NumWritePtrChecks - 1)); + } + + ++ASId; + } + + // If the pointers that we would use for the bounds comparison have different + // address spaces, assume the values aren't directly comparable, so we can't + // use them for the runtime check. We also have to assume they could + // overlap. In the future there should be metadata for whether address spaces + // are disjoint. + unsigned NumPointers = RtCheck.Pointers.size(); + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i + 1; j < NumPointers; ++j) { + // Only need to check pointers between two different dependency sets. + if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + continue; + // Only need to check pointers in the same alias set. + if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + continue; + + Value *PtrI = RtCheck.Pointers[i]; + Value *PtrJ = RtCheck.Pointers[j]; + + unsigned ASi = PtrI->getType()->getPointerAddressSpace(); + unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); + if (ASi != ASj) { + DEBUG(dbgs() << "LAA: Runtime check would require comparison between" + " different address spaces\n"); + return false; + } + } + } + + return CanDoRT; +} + +void AccessAnalysis::processMemAccesses() { + // We process the set twice: first we process read-write pointers, last we + // process read-only pointers. This allows us to skip dependence tests for + // read-only pointers. + + DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); + DEBUG(dbgs() << " AST: "; AST.dump()); + DEBUG(dbgs() << "LAA: Accesses:\n"); + DEBUG({ + for (auto A : Accesses) + dbgs() << "\t" << *A.getPointer() << " (" << + (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? + "read-only" : "read")) << ")\n"; + }); + + // The AliasSetTracker has nicely partitioned our pointers by metadata + // compatibility and potential for underlying-object overlap. As a result, we + // only need to check for potential pointer dependencies within each alias + // set. + for (auto &AS : AST) { + // Note that both the alias-set tracker and the alias sets themselves used + // linked lists internally and so the iteration order here is deterministic + // (matching the original instruction order within each set). + + bool SetHasWrite = false; + + // Map of pointers to last access encountered. + typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; + UnderlyingObjToAccessMap ObjToLastAccess; + + // Set of access to check after all writes have been processed. + PtrAccessSet DeferredAccesses; + + // Iterate over each alias set twice, once to process read/write pointers, + // and then to process read-only pointers. + for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { + bool UseDeferred = SetIteration > 0; + PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; + + for (auto AV : AS) { + Value *Ptr = AV.getValue(); + + // For a single memory access in AliasSetTracker, Accesses may contain + // both read and write, and they both need to be handled for CheckDeps. + for (auto AC : S) { + if (AC.getPointer() != Ptr) + continue; + + bool IsWrite = AC.getInt(); + + // If we're using the deferred access set, then it contains only + // reads. + bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; + if (UseDeferred && !IsReadOnlyPtr) + continue; + // Otherwise, the pointer must be in the PtrAccessSet, either as a + // read or a write. + assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || + S.count(MemAccessInfo(Ptr, false))) && + "Alias-set pointer not in the access set?"); + + MemAccessInfo Access(Ptr, IsWrite); + DepCands.insert(Access); + + // Memorize read-only pointers for later processing and skip them in + // the first round (they need to be checked after we have seen all + // write pointers). Note: we also mark pointer that are not + // consecutive as "read-only" pointers (so that we check + // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". + if (!UseDeferred && IsReadOnlyPtr) { + DeferredAccesses.insert(Access); + continue; + } + + // If this is a write - check other reads and writes for conflicts. If + // this is a read only check other writes for conflicts (but only if + // there is no other write to the ptr - this is an optimization to + // catch "a[i] = a[i] + " without having to do a dependence check). + if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { + CheckDeps.insert(Access); + IsRTCheckNeeded = true; + } + + if (IsWrite) + SetHasWrite = true; + + // Create sets of pointers connected by a shared alias set and + // underlying object. + typedef SmallVector<Value *, 16> ValueVector; + ValueVector TempObjects; + GetUnderlyingObjects(Ptr, TempObjects, DL); + for (Value *UnderlyingObj : TempObjects) { + UnderlyingObjToAccessMap::iterator Prev = + ObjToLastAccess.find(UnderlyingObj); + if (Prev != ObjToLastAccess.end()) + DepCands.unionSets(Access, Prev->second); + + ObjToLastAccess[UnderlyingObj] = Access; + } + } + } + } + } +} + +namespace { +/// \brief Checks memory dependences among accesses to the same underlying +/// object to determine whether there vectorization is legal or not (and at +/// which vectorization factor). +/// +/// This class works under the assumption that we already checked that memory +/// locations with different underlying pointers are "must-not alias". +/// We use the ScalarEvolution framework to symbolically evalutate access +/// functions pairs. Since we currently don't restructure the loop we can rely +/// on the program order of memory accesses to determine their safety. +/// At the moment we will only deem accesses as safe for: +/// * A negative constant distance assuming program order. +/// +/// Safe: tmp = a[i + 1]; OR a[i + 1] = x; +/// a[i] = tmp; y = a[i]; +/// +/// The latter case is safe because later checks guarantuee that there can't +/// be a cycle through a phi node (that is, we check that "x" and "y" is not +/// the same variable: a header phi can only be an induction or a reduction, a +/// reduction can't have a memory sink, an induction can't have a memory +/// source). This is important and must not be violated (or we have to +/// resort to checking for cycles through memory). +/// +/// * A positive constant distance assuming program order that is bigger +/// than the biggest memory access. +/// +/// tmp = a[i] OR b[i] = x +/// a[i+2] = tmp y = b[i+2]; +/// +/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. +/// +/// * Zero distances and all accesses have the same size. +/// +class MemoryDepChecker { +public: + typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; + typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + + MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L) + : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0), + ShouldRetryWithRuntimeCheck(false) {} + + /// \brief Register the location (instructions are given increasing numbers) + /// of a write access. + void addAccess(StoreInst *SI) { + Value *Ptr = SI->getPointerOperand(); + Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); + InstMap.push_back(SI); + ++AccessIdx; + } + + /// \brief Register the location (instructions are given increasing numbers) + /// of a write access. + void addAccess(LoadInst *LI) { + Value *Ptr = LI->getPointerOperand(); + Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); + InstMap.push_back(LI); + ++AccessIdx; + } + + /// \brief Check whether the dependencies between the accesses are safe. + /// + /// Only checks sets with elements in \p CheckDeps. + bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, + MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides); + + /// \brief The maximum number of bytes of a vector register we can vectorize + /// the accesses safely with. + unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } + + /// \brief In same cases when the dependency check fails we can still + /// vectorize the loop with a dynamic array access check. + bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } + +private: + ScalarEvolution *SE; + const DataLayout *DL; + const Loop *InnermostLoop; + + /// \brief Maps access locations (ptr, read/write) to program order. + DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; + + /// \brief Memory access instructions in program order. + SmallVector<Instruction *, 16> InstMap; + + /// \brief The program order index to be used for the next instruction. + unsigned AccessIdx; + + // We can access this many bytes in parallel safely. + unsigned MaxSafeDepDistBytes; + + /// \brief If we see a non-constant dependence distance we can still try to + /// vectorize this loop with runtime checks. + bool ShouldRetryWithRuntimeCheck; + + /// \brief Check whether there is a plausible dependence between the two + /// accesses. + /// + /// Access \p A must happen before \p B in program order. The two indices + /// identify the index into the program order map. + /// + /// This function checks whether there is a plausible dependence (or the + /// absence of such can't be proved) between the two accesses. If there is a + /// plausible dependence but the dependence distance is bigger than one + /// element access it records this distance in \p MaxSafeDepDistBytes (if this + /// distance is smaller than any other distance encountered so far). + /// Otherwise, this function returns true signaling a possible dependence. + bool isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides); + + /// \brief Check whether the data dependence could prevent store-load + /// forwarding. + bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); +}; + +} // end anonymous namespace + +static bool isInBoundsGep(Value *Ptr) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) + return GEP->isInBounds(); + return false; +} + +/// \brief Check whether the access through \p Ptr has a constant stride. +static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { + const Type *Ty = Ptr->getType(); + assert(Ty->isPointerTy() && "Unexpected non-ptr"); + + // Make sure that the pointer does not point to aggregate types. + const PointerType *PtrTy = cast<PointerType>(Ty); + if (PtrTy->getElementType()->isAggregateType()) { + DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" + << *Ptr << "\n"); + return 0; + } + + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + if (!AR) { + DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " + << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; + } + + // The accesss function must stride over the innermost loop. + if (Lp != AR->getLoop()) { + DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << + *Ptr << " SCEV: " << *PtrScev << "\n"); + } + + // The address calculation must not wrap. Otherwise, a dependence could be + // inverted. + // An inbounds getelementptr that is a AddRec with a unit stride + // cannot wrap per definition. The unit stride requirement is checked later. + // An getelementptr without an inbounds attribute and unit stride would have + // to access the pointer value "0" which is undefined behavior in address + // space 0, therefore we can also vectorize this case. + bool IsInBoundsGEP = isInBoundsGep(Ptr); + bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask); + bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; + if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { + DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " + << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; + } + + // Check the step is constant. + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Calculate the pointer stride and check if it is consecutive. + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) { + DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << + " SCEV: " << *PtrScev << "\n"); + return 0; + } + + int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType()); + const APInt &APStepVal = C->getValue()->getValue(); + + // Huge step value - give up. + if (APStepVal.getBitWidth() > 64) + return 0; + + int64_t StepVal = APStepVal.getSExtValue(); + + // Strided access. + int64_t Stride = StepVal / Size; + int64_t Rem = StepVal % Size; + if (Rem) + return 0; + + // If the SCEV could wrap but we have an inbounds gep with a unit stride we + // know we can't "wrap around the address space". In case of address space + // zero we know that this won't happen without triggering undefined behavior. + if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && + Stride != 1 && Stride != -1) + return 0; + + return Stride; +} + +bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, + unsigned TypeByteSize) { + // If loads occur at a distance that is not a multiple of a feasible vector + // factor store-load forwarding does not take place. + // Positive dependences might cause troubles because vectorizing them might + // prevent store-load forwarding making vectorized code run a lot slower. + // a[i] = a[i-3] ^ a[i-8]; + // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and + // hence on your typical architecture store-load forwarding does not take + // place. Vectorizing in such cases does not make sense. + // Store-load forwarding distance. + const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; + // Maximum vector factor. + unsigned MaxVFWithoutSLForwardIssues = + VectorizerParams::MaxVectorWidth * TypeByteSize; + if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) + MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; + + for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues; + vf *= 2) { + if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) { + MaxVFWithoutSLForwardIssues = (vf >>=1); + break; + } + } + + if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { + DEBUG(dbgs() << "LAA: Distance " << Distance << + " that could cause a store-load forwarding conflict\n"); + return true; + } + + if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && + MaxVFWithoutSLForwardIssues != + VectorizerParams::MaxVectorWidth * TypeByteSize) + MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; + return false; +} + +bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides) { + assert (AIdx < BIdx && "Must pass arguments in program order"); + + Value *APtr = A.getPointer(); + Value *BPtr = B.getPointer(); + bool AIsWrite = A.getInt(); + bool BIsWrite = B.getInt(); + + // Two reads are independent. + if (!AIsWrite && !BIsWrite) + return false; + + // We cannot check pointers in different address spaces. + if (APtr->getType()->getPointerAddressSpace() != + BPtr->getType()->getPointerAddressSpace()) + return true; + + const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + + int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides); + + const SCEV *Src = AScev; + const SCEV *Sink = BScev; + + // If the induction step is negative we have to invert source and sink of the + // dependence. + if (StrideAPtr < 0) { + //Src = BScev; + //Sink = AScev; + std::swap(APtr, BPtr); + std::swap(Src, Sink); + std::swap(AIsWrite, BIsWrite); + std::swap(AIdx, BIdx); + std::swap(StrideAPtr, StrideBPtr); + } + + const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + + DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink + << "(Induction step: " << StrideAPtr << ")\n"); + DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " + << *InstMap[BIdx] << ": " << *Dist << "\n"); + + // Need consecutive accesses. We don't want to vectorize + // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in + // the address space. + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ + DEBUG(dbgs() << "Non-consecutive pointer access\n"); + return true; + } + + const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); + if (!C) { + DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); + ShouldRetryWithRuntimeCheck = true; + return true; + } + + Type *ATy = APtr->getType()->getPointerElementType(); + Type *BTy = BPtr->getType()->getPointerElementType(); + unsigned TypeByteSize = DL->getTypeAllocSize(ATy); + + // Negative distances are not plausible dependencies. + const APInt &Val = C->getValue()->getValue(); + if (Val.isNegative()) { + bool IsTrueDataDependence = (AIsWrite && !BIsWrite); + if (IsTrueDataDependence && + (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || + ATy != BTy)) + return true; + + DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); + return false; + } + + // Write to the same location with the same size. + // Could be improved to assert type sizes are the same (i32 == float, etc). + if (Val == 0) { + if (ATy == BTy) + return false; + DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); + return true; + } + + assert(Val.isStrictlyPositive() && "Expect a positive value"); + + if (ATy != BTy) { + DEBUG(dbgs() << + "LAA: ReadWrite-Write positive dependency with different types\n"); + return true; + } + + unsigned Distance = (unsigned) Val.getZExtValue(); + + // Bail out early if passed-in parameters make vectorization not feasible. + unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? + VectorizerParams::VectorizationFactor : 1); + unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? + VectorizerParams::VectorizationInterleave : 1); + + // The distance must be bigger than the size needed for a vectorized version + // of the operation and the size of the vectorized operation must not be + // bigger than the currrent maximum size. + if (Distance < 2*TypeByteSize || + 2*TypeByteSize > MaxSafeDepDistBytes || + Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { + DEBUG(dbgs() << "LAA: Failure because of Positive distance " + << Val.getSExtValue() << '\n'); + return true; + } + + // Positive distance bigger than max vectorization factor. + MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? + Distance : MaxSafeDepDistBytes; + + bool IsTrueDataDependence = (!AIsWrite && BIsWrite); + if (IsTrueDataDependence && + couldPreventStoreLoadForward(Distance, TypeByteSize)) + return true; + + DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << + " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); + + return false; +} + +bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, + MemAccessInfoSet &CheckDeps, + const ValueToValueMap &Strides) { + + MaxSafeDepDistBytes = -1U; + while (!CheckDeps.empty()) { + MemAccessInfo CurAccess = *CheckDeps.begin(); + + // Get the relevant memory access set. + EquivalenceClasses<MemAccessInfo>::iterator I = + AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); + + // Check accesses within this set. + EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE; + AI = AccessSets.member_begin(I), AE = AccessSets.member_end(); + + // Check every access pair. + while (AI != AE) { + CheckDeps.erase(*AI); + EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); + while (OI != AE) { + // Check every accessing instruction pair in program order. + for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), + I1E = Accesses[*AI].end(); I1 != I1E; ++I1) + for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), + I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { + if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides)) + return false; + if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides)) + return false; + } + ++OI; + } + AI++; + } + } + return true; +} + +bool LoopAccessInfo::canAnalyzeLoop() { + // We can only analyze innermost loops. + if (!TheLoop->empty()) { + emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); + return false; + } + + // We must have a single backedge. + if (TheLoop->getNumBackEdges() != 1) { + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // We must have a single exiting block. + if (!TheLoop->getExitingBlock()) { + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // We need to have a loop header. + DEBUG(dbgs() << "LAA: Found a loop: " << + TheLoop->getHeader()->getName() << '\n'); + + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); + if (ExitCount == SE->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() << + "could not determine number of loop iterations"); + DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); + return false; + } + + return true; +} + +void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { + + typedef SmallVector<Value*, 16> ValueVector; + typedef SmallPtrSet<Value*, 16> ValueSet; + + // Holds the Load and Store *instructions*. + ValueVector Loads; + ValueVector Stores; + + // Holds all the different accesses in the loop. + unsigned NumReads = 0; + unsigned NumReadWrites = 0; + + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; + + const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + MemoryDepChecker DepChecker(SE, DL, TheLoop); + + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + + // If this is a load, save it. If this instruction can read from memory + // but is not a load, then we quit. Notice that we don't handle function + // calls that read or write. + if (it->mayReadFromMemory()) { + // Many math library functions read the rounding mode. We will only + // vectorize a loop if it contains known function calls that don't set + // the flag. Therefore, it is safe to ignore this read from memory. + CallInst *Call = dyn_cast<CallInst>(it); + if (Call && getIntrinsicIDForCall(Call, TLI)) + continue; + + LoadInst *Ld = dyn_cast<LoadInst>(it); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + emitAnalysis(LoopAccessReport(Ld) + << "read with atomic ordering or volatile read"); + DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); + CanVecMem = false; + return; + } + NumLoads++; + Loads.push_back(Ld); + DepChecker.addAccess(Ld); + continue; + } + + // Save 'store' instructions. Abort if other instructions write to memory. + if (it->mayWriteToMemory()) { + StoreInst *St = dyn_cast<StoreInst>(it); + if (!St) { + emitAnalysis(LoopAccessReport(it) << + "instruction cannot be vectorized"); + CanVecMem = false; + return; + } + if (!St->isSimple() && !IsAnnotatedParallel) { + emitAnalysis(LoopAccessReport(St) + << "write with atomic ordering or volatile write"); + DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); + CanVecMem = false; + return; + } + NumStores++; + Stores.push_back(St); + DepChecker.addAccess(St); + } + } // Next instr. + } // Next block. + + // Now we have two lists that hold the loads and the stores. + // Next, we find the pointers that they use. + + // Check if we see any stores. If there are no stores, then we don't + // care if the pointers are *restrict*. + if (!Stores.size()) { + DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); + CanVecMem = true; + return; + } + + AccessAnalysis::DepCandidates DependentAccesses; + AccessAnalysis Accesses(DL, AA, DependentAccesses); + + // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects + // multiple times on the same object. If the ptr is accessed twice, once + // for read and once for write, it will only appear once (on the write + // list). This is okay, since we are going to check for conflicts between + // writes and between reads and writes, but not between reads and reads. + ValueSet Seen; + + ValueVector::iterator I, IE; + for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { + StoreInst *ST = cast<StoreInst>(*I); + Value* Ptr = ST->getPointerOperand(); + + if (isUniform(Ptr)) { + emitAnalysis( + LoopAccessReport(ST) + << "write to a loop invariant address could not be vectorized"); + DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n"); + CanVecMem = false; + return; + } + + // If we did *not* see this pointer before, insert it to the read-write + // list. At this phase it is only a 'write' list. + if (Seen.insert(Ptr).second) { + ++NumReadWrites; + + AliasAnalysis::Location Loc = AA->getLocation(ST); + // The TBAA metadata could have a control dependency on the predication + // condition, so we cannot rely on it when determining whether or not we + // need runtime pointer checks. + if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) + Loc.AATags.TBAA = nullptr; + + Accesses.addStore(Loc); + } + } + + if (IsAnnotatedParallel) { + DEBUG(dbgs() + << "LAA: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + CanVecMem = true; + return; + } + + for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { + LoadInst *LD = cast<LoadInst>(*I); + Value* Ptr = LD->getPointerOperand(); + // If we did *not* see this pointer before, insert it to the + // read list. If we *did* see it before, then it is already in + // the read-write list. This allows us to vectorize expressions + // such as A[i] += x; Because the address of A[i] is a read-write + // pointer. This only works if the index of A[i] is consecutive. + // If the address of i is unknown (for example A[B[i]]) then we may + // read a few words, modify, and write a few words, and some of the + // words may be written to the same address. + bool IsReadOnlyPtr = false; + if (Seen.insert(Ptr).second || + !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) { + ++NumReads; + IsReadOnlyPtr = true; + } + + AliasAnalysis::Location Loc = AA->getLocation(LD); + // The TBAA metadata could have a control dependency on the predication + // condition, so we cannot rely on it when determining whether or not we + // need runtime pointer checks. + if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) + Loc.AATags.TBAA = nullptr; + + Accesses.addLoad(Loc, IsReadOnlyPtr); + } + + // If we write (or read-write) to a single destination and there are no + // other reads in this loop then is it safe to vectorize. + if (NumReadWrites == 1 && NumReads == 0) { + DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); + CanVecMem = true; + return; + } + + // Build dependence sets and check whether we need a runtime pointer bounds + // check. + Accesses.buildDependenceSets(); + bool NeedRTCheck = Accesses.isRTCheckNeeded(); + + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + unsigned NumComparisons = 0; + bool CanDoRT = false; + if (NeedRTCheck) + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, + Strides); + + DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << + " pointer comparisons.\n"); + + // If we only have one set of dependences to check pointers among we don't + // need a runtime check. + if (NumComparisons == 0 && NeedRTCheck) + NeedRTCheck = false; + + // Check that we did not collect too many pointers or found an unsizeable + // pointer. + if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { + PtrRtCheck.reset(); + CanDoRT = false; + } + + if (CanDoRT) { + DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + } + + if (NeedRTCheck && !CanDoRT) { + emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); + DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << + "the array bounds.\n"); + PtrRtCheck.reset(); + CanVecMem = false; + return; + } + + PtrRtCheck.Need = NeedRTCheck; + + CanVecMem = true; + if (Accesses.isDependencyCheckNeeded()) { + DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); + CanVecMem = DepChecker.areDepsSafe( + DependentAccesses, Accesses.getDependenciesToCheck(), Strides); + MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); + + if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { + DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); + NeedRTCheck = true; + + // Clear the dependency checks. We assume they are not needed. + Accesses.resetDepChecks(); + + PtrRtCheck.reset(); + PtrRtCheck.Need = true; + + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, + TheLoop, Strides, true); + // Check that we did not collect too many pointers or found an unsizeable + // pointer. + if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { + if (!CanDoRT && NumComparisons > 0) + emitAnalysis(LoopAccessReport() + << "cannot check memory dependencies at runtime"); + else + emitAnalysis(LoopAccessReport() + << NumComparisons << " exceeds limit of " + << RuntimeMemoryCheckThreshold + << " dependent memory operations checked at runtime"); + DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); + PtrRtCheck.reset(); + CanVecMem = false; + return; + } + + CanVecMem = true; + } + } + + if (!CanVecMem) + emitAnalysis(LoopAccessReport() << + "unsafe dependent memory operations in loop"); + + DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") << + " need a runtime memory check.\n"); +} + +bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, + DominatorTree *DT) { + assert(TheLoop->contains(BB) && "Unknown block used"); + + // Blocks that do not dominate the latch need predication. + BasicBlock* Latch = TheLoop->getLoopLatch(); + return !DT->dominates(BB, Latch); +} + +void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { + assert(!Report && "Multiple reports generated"); + Report = Message; +} + +bool LoopAccessInfo::isUniform(Value *V) const { + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +} + +// FIXME: this function is currently a duplicate of the one in +// LoopVectorize.cpp. +static Instruction *getFirstInst(Instruction *FirstInst, Value *V, + Instruction *Loc) { + if (FirstInst) + return FirstInst; + if (Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == Loc->getParent() ? I : nullptr; + return nullptr; +} + +std::pair<Instruction *, Instruction *> +LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { + Instruction *tnullptr = nullptr; + if (!PtrRtCheck.Need) + return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); + + unsigned NumPointers = PtrRtCheck.Pointers.size(); + SmallVector<TrackingVH<Value> , 2> Starts; + SmallVector<TrackingVH<Value> , 2> Ends; + + LLVMContext &Ctx = Loc->getContext(); + SCEVExpander Exp(*SE, "induction"); + Instruction *FirstInst = nullptr; + + for (unsigned i = 0; i < NumPointers; ++i) { + Value *Ptr = PtrRtCheck.Pointers[i]; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << + *Ptr <<"\n"); + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } else { + DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + + Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); + Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + IRBuilder<> ChkBuilder(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i+1; j < NumPointers; ++j) { + if (!PtrRtCheck.needsChecking(i, j)) + continue; + + unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); + unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); + + assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && + (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, + "conflict.rdx"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + } + MemoryRuntimeCheck = IsConflict; + } + } + + // We have to do this trickery because the IRBuilder might fold the check to a + // constant expression in which case there is no Instruction anchored in a + // the block. + Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, + ConstantInt::getTrue(Ctx)); + ChkBuilder.Insert(Check, "memcheck.conflict"); + FirstInst = getFirstInst(FirstInst, Check, Loc); + return std::make_pair(FirstInst, Check); +} + +LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, + const DataLayout *DL, + const TargetLibraryInfo *TLI, AliasAnalysis *AA, + DominatorTree *DT, + const ValueToValueMap &Strides) + : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), + NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) { + if (canAnalyzeLoop()) + analyzeLoop(Strides); +} + +void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { + if (CanVecMem) { + if (PtrRtCheck.empty()) + OS.indent(Depth) << "Memory dependences are safe\n"; + else + OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; + } + + if (Report) + OS.indent(Depth) << "Report: " << Report->str() << "\n"; + + // FIXME: Print unsafe dependences + + // List the pair of accesses need run-time checks to prove independence. + PtrRtCheck.print(OS, Depth); + OS << "\n"; +} + +const LoopAccessInfo & +LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { + auto &LAI = LoopAccessInfoMap[L]; + +#ifndef NDEBUG + assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) && + "Symbolic strides changed for loop"); +#endif + + if (!LAI) { + LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides); +#ifndef NDEBUG + LAI->NumSymbolicStrides = Strides.size(); +#endif + } + return *LAI.get(); +} + +void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { + LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this); + + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + ValueToValueMap NoSymbolicStrides; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) { + OS.indent(2) << L->getHeader()->getName() << ":\n"; + auto &LAI = LAA.getInfo(L, NoSymbolicStrides); + LAI.print(OS, 4); + } +} + +bool LoopAccessAnalysis::runOnFunction(Function &F) { + SE = &getAnalysis<ScalarEvolution>(); + DL = F.getParent()->getDataLayout(); + auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + TLI = TLIP ? &TLIP->getTLI() : nullptr; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + return false; +} + +void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + + AU.setPreservesAll(); +} + +char LoopAccessAnalysis::ID = 0; +static const char laa_name[] = "Loop Access Analysis"; +#define LAA_NAME "loop-accesses" + +INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) + +namespace llvm { + Pass *createLAAPass() { + return new LoopAccessAnalysis(); + } +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index b1f62c4..95f6eb0 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <algorithm> @@ -45,11 +46,6 @@ static cl::opt<bool,true> VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), cl::desc("Verify loop info (time consuming)")); -char LoopInfo::ID = 0; -INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) - // Loop identifier metadata name. static const char *const LoopMDName = "llvm.loop"; @@ -609,15 +605,6 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -//===----------------------------------------------------------------------===// -// LoopInfo implementation -// -bool LoopInfo::runOnFunction(Function &) { - releaseMemory(); - LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); - return false; -} - /// updateUnloop - The last backedge has been removed from a loop--now the /// "unloop". Find a new parent for the blocks contained within unloop and /// update the loop tree. We don't necessarily have valid dominators at this @@ -631,7 +618,8 @@ void LoopInfo::updateUnloop(Loop *Unloop) { if (!Unloop->getParentLoop()) { // Since BBLoop had no parent, Unloop blocks are no longer in a loop. for (Loop::block_iterator I = Unloop->block_begin(), - E = Unloop->block_end(); I != E; ++I) { + E = Unloop->block_end(); + I != E; ++I) { // Don't reparent blocks in subloops. if (getLoopFor(*I) != Unloop) @@ -639,21 +627,21 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Blocks no longer have a parent but are still referenced by Unloop until // the Unloop object is deleted. - LI.changeLoopFor(*I, nullptr); + changeLoopFor(*I, nullptr); } // Remove the loop from the top-level LoopInfo object. - for (LoopInfo::iterator I = LI.begin();; ++I) { - assert(I != LI.end() && "Couldn't find loop"); + for (iterator I = begin();; ++I) { + assert(I != end() && "Couldn't find loop"); if (*I == Unloop) { - LI.removeLoop(I); + removeLoop(I); break; } } // Move all of the subloops to the top-level. while (!Unloop->empty()) - LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); + addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); return; } @@ -680,35 +668,59 @@ void LoopInfo::updateUnloop(Loop *Unloop) { } } -void LoopInfo::verifyAnalysis() const { - // LoopInfo is a FunctionPass, but verifying every loop in the function - // each time verifyAnalysis is called is very expensive. The - // -verify-loop-info option can enable this. In order to perform some - // checking by default, LoopPass has been taught to call verifyLoop - // manually during loop pass sequences. +char LoopAnalysis::PassID; + +LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) { + // FIXME: Currently we create a LoopInfo from scratch for every function. + // This may prove to be too wasteful due to deallocating and re-allocating + // memory each time for the underlying map and vector datastructures. At some + // point it may prove worthwhile to use a freelist and recycle LoopInfo + // objects. I don't want to add that kind of complexity until the scope of + // the problem is better understood. + LoopInfo LI; + LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F)); + return std::move(LI); +} + +PreservedAnalyses LoopPrinterPass::run(Function &F, + AnalysisManager<Function> *AM) { + AM->getResult<LoopAnalysis>(F).print(OS); + return PreservedAnalyses::all(); +} - if (!VerifyLoopInfo) return; +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// - DenseSet<const Loop*> Loops; - for (iterator I = begin(), E = end(); I != E; ++I) { - assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); - (*I)->verifyLoopNest(&Loops); - } +char LoopInfoWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information", + true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information", + true, true) - // Verify that blocks are mapped to valid loops. - for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), - E = LI.BBMap.end(); I != E; ++I) { - assert(Loops.count(I->second) && "orphaned loop"); - assert(I->second->contains(I->first) && "orphaned block"); - } +bool LoopInfoWrapperPass::runOnFunction(Function &) { + releaseMemory(); + LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); + return false; +} + +void LoopInfoWrapperPass::verifyAnalysis() const { + // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the + // function each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop manually + // during loop pass sequences. + if (VerifyLoopInfo) + LI.verify(); } -void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { +void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<DominatorTreeWrapperPass>(); } -void LoopInfo::print(raw_ostream &OS, const Module*) const { +void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { LI.print(OS); } diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 190abc7..a99c949 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -187,14 +187,15 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { // LPPassManager needs LoopInfo. In the long term LoopInfo class will // become part of LPPassManager. - Info.addRequired<LoopInfo>(); + Info.addRequired<LoopInfoWrapperPass>(); Info.setPreservesAll(); } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. bool LPPassManager::runOnFunction(Function &F) { - LI = &getAnalysis<LoopInfo>(); + auto &LIWP = getAnalysis<LoopInfoWrapperPass>(); + LI = &LIWP.getLoopInfo(); bool Changed = false; // Collect inherited analysis from Module level pass manager. @@ -262,7 +263,7 @@ bool LPPassManager::runOnFunction(Function &F) { // loop in the function every time. That level of checking can be // enabled with the -verify-loop-info option. { - TimeRegion PassTimer(getPassTimer(LI)); + TimeRegion PassTimer(getPassTimer(&LIWP)); CurrentLoop->verifyLoop(); } diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 10da3d5..e1b7b4b 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -92,13 +92,12 @@ const char *const MemDepPrinter::DepTypeStr[] bool MemDepPrinter::runOnFunction(Function &F) { this->F = &F; - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); // All this code uses non-const interfaces because MemDep is not // const-friendly, though nothing is actually modified. - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - Instruction *Inst = &*I; + for (auto &I : inst_range(F)) { + Instruction *Inst = &I; if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) continue; @@ -119,30 +118,9 @@ bool MemDepPrinter::runOnFunction(Function &F) { } } else { SmallVector<NonLocalDepResult, 4> NLDI; - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - if (!LI->isUnordered()) { - // FIXME: Handle atomic/volatile loads. - Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), - static_cast<BasicBlock *>(nullptr))); - continue; - } - AliasAnalysis::Location Loc = AA.getLocation(LI); - MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); - } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (!SI->isUnordered()) { - // FIXME: Handle atomic/volatile stores. - Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), - static_cast<BasicBlock *>(nullptr))); - continue; - } - AliasAnalysis::Location Loc = AA.getLocation(SI); - MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); - } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { - AliasAnalysis::Location Loc = AA.getLocation(VI); - MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI); - } else { - llvm_unreachable("Unknown memory instruction!"); - } + assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || + isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); + MDA.getNonLocalPointerDependency(Inst, NLDI); DepSet &InstDeps = Deps[Inst]; for (SmallVectorImpl<NonLocalDepResult>::const_iterator @@ -157,8 +135,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { } void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { - for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) { - const Instruction *Inst = &*I; + for (const auto &I : inst_range(*F)) { + const Instruction *Inst = &I; DepSetMap::const_iterator DI = Deps.find(Inst); if (DI == Deps.end()) @@ -166,11 +144,10 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { const DepSet &InstDeps = DI->second; - for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); - I != E; ++I) { - const Instruction *DepInst = I->first.getPointer(); - DepType type = I->first.getInt(); - const BasicBlock *DepBB = I->second; + for (const auto &I : InstDeps) { + const Instruction *DepInst = I.first.getPointer(); + DepType type = I.first.getInt(); + const BasicBlock *DepBB = I.second; OS << " "; OS << DepTypeStr[type]; diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp new file mode 100644 index 0000000..531d75e --- /dev/null +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -0,0 +1,70 @@ +//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct MemDerefPrinter : public FunctionPass { + SmallVector<Value *, 4> Vec; + + static char ID; // Pass identifcation, replacement for typeid + MemDerefPrinter() : FunctionPass(ID) { + initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DataLayoutPass>(); + AU.setPreservesAll(); + } + bool runOnFunction(Function &F) override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void releaseMemory() override { + Vec.clear(); + } + }; +} + +char MemDerefPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs", + "Memory Dereferenciblity of pointers in function", false, true) +INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) +INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs", + "Memory Dereferenciblity of pointers in function", false, true) + +FunctionPass *llvm::createMemDerefPrinter() { + return new MemDerefPrinter(); +} + +bool MemDerefPrinter::runOnFunction(Function &F) { + const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + for (auto &I: inst_range(F)) { + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Value *PO = LI->getPointerOperand(); + if (PO->isDereferenceablePointer(DL)) + Vec.push_back(PO); + } + } + return false; +} + +void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { + OS << "The following are dereferenceable:\n"; + for (auto &V: Vec) { + V->print(OS); + OS << "\n\n"; + } +} diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 08b41fe..6108af3 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" @@ -25,7 +26,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -319,7 +319,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { if (!CI || isa<IntrinsicInst>(CI)) return nullptr; Function *Callee = CI->getCalledFunction(); - if (Callee == nullptr || !Callee->isDeclaration()) + if (Callee == nullptr) return nullptr; StringRef FnName = Callee->getName(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 187eada..6d38863 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" @@ -59,7 +59,7 @@ char MemoryDependenceAnalysis::ID = 0; // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -82,19 +82,17 @@ void MemoryDependenceAnalysis::releaseMemory() { PredCache->clear(); } - - /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. /// void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<AssumptionTracker>(); + AU.addRequired<AssumptionCacheTracker>(); AU.addRequiredTransitive<AliasAnalysis>(); } -bool MemoryDependenceAnalysis::runOnFunction(Function &) { +bool MemoryDependenceAnalysis::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); - AT = &getAnalysis<AssumptionTracker>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = @@ -300,8 +298,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // Load widening is hostile to ThreadSanitizer: it may cause false positives // or make the reports more cryptic (access sizes are wrong). - if (LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return 0; // Get the base of this load. @@ -346,9 +343,9 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, !DL.fitsInLegalInteger(NewLoadByteSize*8)) return 0; - if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) + if (LIOffs + NewLoadByteSize > MemLocEnd && + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -362,6 +359,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, } } +static bool isVolatile(Instruction *Inst) { + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + return LI->isVolatile(); + else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return SI->isVolatile(); + else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) + return AI->isVolatile(); + return false; +} + + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -448,12 +456,26 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // does not alias with when this atomic load indicates that another thread may // be accessing the location. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + + // While volatile access cannot be eliminated, they do not have to clobber + // non-aliasing locations, as normal accesses, for example, can be safely + // reordered with volatile accesses. + if (LI->isVolatile()) { + if (!QueryInst) + // Original QueryInst *may* be volatile + return MemDepResult::getClobber(LI); + if (isVolatile(QueryInst)) + // Ordering required if QueryInst is itself volatile + return MemDepResult::getClobber(LI); + // Otherwise, volatile doesn't imply any special ordering + } + // Atomic loads have complications involved. // A Monotonic (or higher) load is OK if the query inst is itself not atomic. // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any // release store will know to return getClobber. // FIXME: This is overly conservative. - if (!LI->isUnordered()) { + if (LI->isAtomic() && LI->getOrdering() > Unordered) { if (!QueryInst) return MemDepResult::getClobber(LI); if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { @@ -470,13 +492,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, HasSeenAcquire = true; } - // FIXME: this is overly conservative. - // While volatile access cannot be eliminated, they do not have to clobber - // non-aliasing locations, as normal accesses can for example be reordered - // with volatile accesses. - if (LI->isVolatile()) - return MemDepResult::getClobber(LI); - AliasAnalysis::Location LoadLoc = AA->getLocation(LI); // If we found a pointer, check if it could be the same as our pointer. @@ -859,21 +874,65 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// own block. /// void MemoryDependenceAnalysis:: -getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, - BasicBlock *FromBB, +getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { + + auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) { + if (auto *I = dyn_cast<LoadInst>(Inst)) + return AA->getLocation(I); + else if (auto *I = dyn_cast<StoreInst>(Inst)) + return AA->getLocation(I); + else if (auto *I = dyn_cast<VAArgInst>(Inst)) + return AA->getLocation(I); + else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) + return AA->getLocation(I); + else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) + return AA->getLocation(I); + else + llvm_unreachable("unsupported memory instruction"); + }; + + const AliasAnalysis::Location Loc = getLocation(AA, QueryInst); + bool isLoad = isa<LoadInst>(QueryInst); + BasicBlock *FromBB = QueryInst->getParent(); + assert(FromBB); + assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); + + // This routine does not expect to deal with volatile instructions. + // Doing so would require piping through the QueryInst all the way through. + // TODO: volatiles can't be elided, but they can be reordered with other + // non-volatile accesses. + + // We currently give up on any instruction which is ordered, but we do handle + // atomic instructions which are unordered. + // TODO: Handle ordered instructions + auto isOrdered = [](Instruction *Inst) { + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + return !LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + return !SI->isUnordered(); + } + return false; + }; + if (isVolatile(QueryInst) || isOrdered(QueryInst)) { + Result.push_back(NonLocalDepResult(FromBB, + MemDepResult::getUnknown(), + const_cast<Value *>(Loc.Ptr))); + return; + } + - PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT); + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); @@ -887,7 +946,8 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, /// lookup (which may use dirty cache info if available). If we do a lookup, /// add the result to the cache. MemDepResult MemoryDependenceAnalysis:: -GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, +GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { @@ -928,7 +988,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, } // Scan the block for the dependency. - MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, + QueryInst); // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. @@ -1001,7 +1062,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, +getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepResult> &Result, @@ -1040,7 +1102,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, } else if (CacheInfo->Size > Loc.Size) { // This query's Size is less than the cached one. Conservatively restart // the query using the greater size. - return getNonLocalPointerDepFromBB(Pointer, + return getNonLocalPointerDepFromBB(QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, StartBB, Result, Visited, SkipFirstBlock); @@ -1060,7 +1122,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->NonLocalDeps.clear(); } if (Loc.AATags) - return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), + return getNonLocalPointerDepFromBB(QueryInst, + Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, Visited, SkipFirstBlock); } @@ -1145,7 +1208,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // cache value will only see properly sorted cache arrays. if (Cache && NumSortedEntries != Cache->size()) { SortNonLocalDepInfoCache(*Cache, NumSortedEntries); - NumSortedEntries = Cache->size(); } // Since we bail out, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate @@ -1164,7 +1226,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, + Loc, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. @@ -1298,7 +1361,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // result conflicted with the Visited list; we have to conservatively // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || - getNonLocalPointerDepFromBB(PredPointer, + getNonLocalPointerDepFromBB(QueryInst, PredPointer, Loc.getWithNewPtr(PredPtrVal), isLoad, Pred, Result, Visited)) { @@ -1361,7 +1424,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (I->getBB() != BB) continue; - assert(I->getResult().isNonLocal() && + assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index b3d060a..a534418 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AT)) { + if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AC)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -283,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -369,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, SmallVectorImpl<Instruction*> &NewInsts) { // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. - PHITransAddr Tmp(InVal, DL, AT); + PHITransAddr Tmp(InVal, DL, AC); if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) return Tmp.getAddr(); diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 08ebf0d..8cd8534 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -10,10 +10,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index de34b72..6fa7b2e 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -15,9 +15,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Support/Timer.h" - #include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" using namespace llvm; #define DEBUG_TYPE "regionpassmgr" diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 68549ef..9e4eb11 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -63,11 +63,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -87,7 +88,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -116,10 +116,10 @@ VerifySCEV("verify-scev", INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; @@ -675,62 +675,6 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, } } -static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.sext(ABW); - else if (ABW < BBW) - A = A.sext(BBW); - - return APIntOps::srem(A, B); -} - -static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.sext(ABW); - else if (ABW < BBW) - A = A.sext(BBW); - - return APIntOps::sdiv(A, B); -} - -static const APInt urem(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.zext(ABW); - else if (ABW < BBW) - A = A.zext(BBW); - - return APIntOps::urem(A, B); -} - -static const APInt udiv(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.zext(ABW); - else if (ABW < BBW) - A = A.zext(BBW); - - return APIntOps::udiv(A, B); -} - namespace { struct FindSCEVSize { int Size; @@ -757,8 +701,7 @@ static inline int sizeOfSCEV(const SCEV *S) { namespace { -template <typename Derived> -struct SCEVDivision : public SCEVVisitor<Derived, void> { +struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { public: // Computes the Quotient and Remainder of the division of Numerator by // Denominator. @@ -767,7 +710,7 @@ public: const SCEV **Remainder) { assert(Numerator && Denominator && "Uninitialized SCEV"); - Derived D(SE, Numerator, Denominator); + SCEVDivision D(SE, Numerator, Denominator); // Check for the trivial case here to avoid having to check for it in the // rest of the code. @@ -819,6 +762,27 @@ public: void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { + APInt NumeratorVal = Numerator->getValue()->getValue(); + APInt DenominatorVal = D->getValue()->getValue(); + uint32_t NumeratorBW = NumeratorVal.getBitWidth(); + uint32_t DenominatorBW = DenominatorVal.getBitWidth(); + + if (NumeratorBW > DenominatorBW) + DenominatorVal = DenominatorVal.sext(NumeratorBW); + else if (NumeratorBW < DenominatorBW) + NumeratorVal = NumeratorVal.sext(DenominatorBW); + + APInt QuotientVal(NumeratorVal.getBitWidth(), 0); + APInt RemainderVal(NumeratorVal.getBitWidth(), 0); + APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); + Quotient = SE.getConstant(QuotientVal); + Remainder = SE.getConstant(RemainderVal); + return; + } + } + void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { const SCEV *StartQ, *StartR, *StepQ, *StepR; assert(Numerator->isAffine() && "Numerator should be affine"); @@ -956,37 +920,6 @@ private: ScalarEvolution &SE; const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; - - friend struct SCEVSDivision; - friend struct SCEVUDivision; -}; - -struct SCEVSDivision : public SCEVDivision<SCEVSDivision> { - SCEVSDivision(ScalarEvolution &S, const SCEV *Numerator, - const SCEV *Denominator) - : SCEVDivision(S, Numerator, Denominator) {} - - void visitConstant(const SCEVConstant *Numerator) { - if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { - Quotient = SE.getConstant(sdiv(Numerator, D)); - Remainder = SE.getConstant(srem(Numerator, D)); - return; - } - } -}; - -struct SCEVUDivision : public SCEVDivision<SCEVUDivision> { - SCEVUDivision(ScalarEvolution &S, const SCEV *Numerator, - const SCEV *Denominator) - : SCEVDivision(S, Numerator, Denominator) {} - - void visitConstant(const SCEVConstant *Numerator) { - if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { - Quotient = SE.getConstant(udiv(Numerator, D)); - Remainder = SE.getConstant(urem(Numerator, D)); - return; - } - } }; } @@ -1215,6 +1148,183 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return S; } +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the +// loop does not exceed this limit before incrementing. +static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return nullptr; +} + +// Get the limit of a recurrence such that incrementing by Step cannot cause +// unsigned overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + *Pred = ICmpInst::ICMP_ULT; + + return SE->getConstant(APInt::getMinValue(BitWidth) - + SE->getUnsignedRange(Step).getUnsignedMax()); +} + +namespace { + +struct ExtendOpTraitsBase { + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); +}; + +// Used to make code generic over signed and unsigned overflow. +template <typename ExtendOp> struct ExtendOpTraits { + // Members present: + // + // static const SCEV::NoWrapFlags WrapType; + // + // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; + // + // static const SCEV *getOverflowLimitForStep(const SCEV *Step, + // ICmpInst::Predicate *Pred, + // ScalarEvolution *SE); +}; + +template <> +struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; + + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getSignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; + +template <> +struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; + + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getUnsignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; +} + +// The recurrence AR has been shown to have no signed/unsigned wrap or something +// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as +// easily prove NSW/NUW for its preincrement or postincrement sibling. This +// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + +// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the +// expression "Step + sext/zext(PreIncAR)" is congruent with +// "sext/zext(PostIncAR)" +template <typename ExtendOpTy> +static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; + auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; + + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA) + return nullptr; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector<const SCEV *, 4> DiffOps; + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + + if (DiffOps.size() == SA->getNumOperands()) + return nullptr; + + // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + + // `Step`: + + // 1. NSW/NUW flags on the step increment. + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies + // "S+X does not sign/unsign-overflow". + // + + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (PreAR && PreAR->getNoWrapFlags(WrapType) && + !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), + (SE->*GetExtendExpr)(Step, WideTy)); + if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { + if (PreAR && AR->getNoWrapFlags(WrapType)) { + // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW + // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then + // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); + } + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = + ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return nullptr; +} + +// Get the normalized zero or sign extended expression for this AddRec's Start. +template <typename ExtendOpTy> +static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; + + const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE); + if (!PreStart) + return (SE->*GetExtendExpr)(AR->getStart(), Ty); + + return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), + (SE->*GetExtendExpr)(PreStart, Ty)); +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1268,9 +1378,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNUW)) - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1307,9 +1417,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1322,9 +1432,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1342,9 +1452,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - @@ -1357,9 +1467,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } } @@ -1374,104 +1484,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } -// Get the limit of a recurrence such that incrementing by Step cannot cause -// signed overflow as long as the value of the recurrence within the loop does -// not exceed this limit before incrementing. -static const SCEV *getOverflowLimitForStep(const SCEV *Step, - ICmpInst::Predicate *Pred, - ScalarEvolution *SE) { - unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); - if (SE->isKnownPositive(Step)) { - *Pred = ICmpInst::ICMP_SLT; - return SE->getConstant(APInt::getSignedMinValue(BitWidth) - - SE->getSignedRange(Step).getSignedMax()); - } - if (SE->isKnownNegative(Step)) { - *Pred = ICmpInst::ICMP_SGT; - return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - - SE->getSignedRange(Step).getSignedMin()); - } - return nullptr; -} - -// The recurrence AR has been shown to have no signed wrap. Typically, if we can -// prove NSW for AR, then we can just as easily prove NSW for its preincrement -// or postincrement sibling. This allows normalizing a sign extended AddRec as -// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a -// result, the expression "Step + sext(PreIncAR)" is congruent with -// "sext(PostIncAR)" -static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const Loop *L = AR->getLoop(); - const SCEV *Start = AR->getStart(); - const SCEV *Step = AR->getStepRecurrence(*SE); - - // Check for a simple looking step prior to loop entry. - const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); - if (!SA) - return nullptr; - - // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV - // subtraction is expensive. For this purpose, perform a quick and dirty - // difference, by checking for Step in the operand list. - SmallVector<const SCEV *, 4> DiffOps; - for (const SCEV *Op : SA->operands()) - if (Op != Step) - DiffOps.push_back(Op); - - if (DiffOps.size() == SA->getNumOperands()) - return nullptr; - - // This is a postinc AR. Check for overflow on the preinc recurrence using the - // same three conditions that getSignExtendedExpr checks. - - // 1. NSW flags on the step increment. - const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); - const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( - SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); - - if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) - return PreStart; - - // 2. Direct overflow check on the step operation's expression. - unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); - Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); - const SCEV *OperandExtendedStart = - SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), - SE->getSignExtendExpr(Step, WideTy)); - if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { - // Cache knowledge of PreAR NSW. - if (PreAR) - const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); - // FIXME: this optimization needs a unit test - DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); - return PreStart; - } - - // 3. Loop precondition. - ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); - - if (OverflowLimit && - SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { - return PreStart; - } - return nullptr; -} - -// Get the normalized sign-extended expression for this AddRec's Start. -static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); - if (!PreStart) - return SE->getSignExtendExpr(AR->getStart(), Ty); - - return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), - SE->getSignExtendExpr(PreStart, Ty)); -} - const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1550,9 +1562,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNSW)) - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1589,9 +1601,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -1600,12 +1612,20 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); if (SAdd == OperandExtendedAdd) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // If AR wraps around then + // + // abs(Step) * MaxBECount > unsigned-max(AR->getType()) + // => SAdd != OperandExtendedAdd + // + // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> + // (SAdd == OperandExtendedAdd => AR is NW) + + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1614,7 +1634,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // with the start value and the backedge is guarded by a comparison // with the post-inc value, the addrec is safe. ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + const SCEV *OverflowLimit = + getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && @@ -1622,9 +1643,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, OverflowLimit)))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } // If Start and Step are constants, check if we can apply this @@ -1804,6 +1825,36 @@ namespace { }; } +// We're trying to construct a SCEV of type `Type' with `Ops' as operands and +// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of +// can't-overflow flags for the operation if possible. +static SCEV::NoWrapFlags +StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, + const SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags OldFlags) { + using namespace std::placeholders; + + bool CanAnalyze = + Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; + (void)CanAnalyze; + assert(CanAnalyze && "don't call from other places!"); + + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = + ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask); + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + auto IsKnownNonNegative = + std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1); + + if (SignOrUnsignWrap == SCEV::FlagNSW && + std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative)) + return ScalarEvolution::setFlags(OldFlags, + (SCEV::NoWrapFlags)SignOrUnsignMask); + + return OldFlags; +} + /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, @@ -1819,20 +1870,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVAddExpr operand types don't match!"); #endif - // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. - // And vice-versa. - int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; - SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); - if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { - bool All = true; - for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), - E = Ops.end(); I != E; ++I) - if (!isKnownNonNegative(*I)) { - All = false; - break; - } - if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); - } + Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, LI); @@ -2207,6 +2245,24 @@ static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { return r; } +/// Determine if any of the operands in this SCEV are a constant or if +/// any of the add or multiply expressions in this SCEV contain a constant. +static bool containsConstantSomewhere(const SCEV *StartExpr) { + SmallVector<const SCEV *, 4> Ops; + Ops.push_back(StartExpr); + while (!Ops.empty()) { + const SCEV *CurrentExpr = Ops.pop_back_val(); + if (isa<SCEVConstant>(*CurrentExpr)) + return true; + + if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) { + const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr); + Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end()); + } + } + return false; +} + /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, @@ -2222,20 +2278,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVMulExpr operand types don't match!"); #endif - // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. - // And vice-versa. - int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; - SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); - if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { - bool All = true; - for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), - E = Ops.end(); I != E; ++I) - if (!isKnownNonNegative(*I)) { - All = false; - break; - } - if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); - } + Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, LI); @@ -2246,11 +2289,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // C1*(C2+V) -> C1*C2 + C1*V if (Ops.size() == 2) - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) - if (Add->getNumOperands() == 2 && - isa<SCEVConstant>(Add->getOperand(0))) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), - getMulExpr(LHSC, Add->getOperand(1))); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) + // If any of Add's ops are Adds or Muls with a constant, + // apply this transformation as well. + if (Add->getNumOperands() == 2) + if (containsConstantSomewhere(Add)) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), + getMulExpr(LHSC, Add->getOperand(1))); ++Idx; while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { @@ -2699,20 +2744,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // meaningful BE count at this point (and if we don't, we'd be stuck // with a SCEVCouldNotCompute as the cached BE count). - // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. - // And vice-versa. - int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; - SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); - if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { - bool All = true; - for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), - E = Operands.end(); I != E; ++I) - if (!isKnownNonNegative(*I)) { - All = false; - break; - } - if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); - } + Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { @@ -3209,8 +3241,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return getConstant(LHS->getType(), 0); - // X - Y --> X + -Y - return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); + // X - Y --> X + -Y. + // X -(nsw || nuw) Y --> X + -Y. + return getAddExpr(LHS, getNegativeSCEV(RHS)); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3516,12 +3549,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) Flags = setFlags(Flags, SCEV::FlagNUW); } - } else if (const SubOperator *OBO = - dyn_cast<SubOperator>(BEValueV)) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); + + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. } const SCEV *StartVal = getSCEV(StartValueV); @@ -3577,7 +3608,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT)) + if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3709,7 +3740,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); return Zeros.countTrailingOnes(); } @@ -3729,8 +3760,10 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { assert(NumRanges >= 1); for (unsigned i = 0; i < NumRanges; ++i) { - ConstantInt *Lower = cast<ConstantInt>(MD->getOperand(2*i + 0)); - ConstantInt *Upper = cast<ConstantInt>(MD->getOperand(2*i + 1)); + ConstantInt *Lower = + mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0)); + ConstantInt *Upper = + mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); TotalRange = TotalRange.unionWith(Range); } @@ -3878,7 +3911,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -4035,7 +4068,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. if (!U->getValue()->getType()->isIntegerTy() && !DL) return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( @@ -4142,8 +4175,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, - 0, AT, nullptr, DT); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC, + nullptr, DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4334,9 +4367,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case ICmpInst::ICMP_SGE: // a >s b ? a+x : b+x -> smax(a, b)+x // a >s b ? b+x : a+x -> smin(a, b)+x - if (LHS->getType() == U->getType()) { - const SCEV *LS = getSCEV(LHS); - const SCEV *RS = getSCEV(RHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType())) { + const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType()); + const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4357,9 +4391,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case ICmpInst::ICMP_UGE: // a >u b ? a+x : b+x -> umax(a, b)+x // a >u b ? b+x : a+x -> umin(a, b)+x - if (LHS->getType() == U->getType()) { - const SCEV *LS = getSCEV(LHS); - const SCEV *RS = getSCEV(RHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType())) { + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); + const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4374,11 +4409,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case ICmpInst::ICMP_NE: // n != 0 ? n+x : 1+x -> umax(n, 1)+x - if (LHS->getType() == U->getType() && - isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(LHS->getType(), 1); - const SCEV *LS = getSCEV(LHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(U->getType(), 1); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4389,11 +4424,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case ICmpInst::ICMP_EQ: // n == 0 ? 1+x : n+x -> umax(n, 1)+x - if (LHS->getType() == U->getType() && - isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(LHS->getType(), 1); - const SCEV *LS = getSCEV(LHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(U->getType(), 1); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, One); @@ -6138,15 +6173,18 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { return ExitLimit(Distance, MaxBECount); } - // If the step exactly divides the distance then unsigned divide computes the - // backedge count. - const SCEV *Q, *R; - ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - SCEVUDivision::divide(SE, Distance, Step, &Q, &R); - if (R->isZero()) { - const SCEV *Exact = - getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact); + // As a special case, handle the instance where Step is a positive power of + // two. In this case, determining whether Step divides Distance evenly can be + // done by counting and comparing the number of trailing zeros of Step and + // Distance. + if (!CountDown) { + const APInt &StepV = StepC->getValue()->getValue(); + // StepV.isPowerOf2() returns true if StepV is an positive power of two. It + // also returns true if StepV is maximally negative (eg, INT_MIN), but that + // case is not handled as this code is guarded by !CountDown. + if (StepV.isPowerOf2() && + GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) + return getUDivExactExpr(Distance, Step); } // If the condition controls loop exit (the loop exits only if the expression @@ -6671,7 +6709,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; // Check conditions due to any @llvm.assume intrinsics. - for (auto &CI : AT->assumptions(F)) { + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + auto *CI = cast<CallInst>(AssumeVH); if (!DT->dominates(CI, Latch->getTerminator())) continue; @@ -6716,7 +6757,10 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, } // Check conditions due to any @llvm.assume intrinsics. - for (auto &CI : AT->assumptions(F)) { + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + auto *CI = cast<CallInst>(AssumeVH); if (!DT->dominates(CI, L->getHeader())) continue; @@ -6927,6 +6971,85 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, getNotSCEV(FoundLHS)); } + +/// If Expr computes ~A, return A else return nullptr +static const SCEV *MatchNotExpr(const SCEV *Expr) { + const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); + if (!Add || Add->getNumOperands() != 2) return nullptr; + + const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0)); + if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue())) + return nullptr; + + const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); + if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr; + + const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0)); + if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue())) + return nullptr; + + return AddRHS->getOperand(1); +} + + +/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? +template<typename MaxExprType> +static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, + const SCEV *Candidate) { + const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr); + if (!MaxExpr) return false; + + auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate); + return It != MaxExpr->op_end(); +} + + +/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? +template<typename MaxExprType> +static bool IsMinConsistingOf(ScalarEvolution &SE, + const SCEV *MaybeMinExpr, + const SCEV *Candidate) { + const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); + if (!MaybeMaxExpr) + return false; + + return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate)); +} + + +/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max +/// expression? +static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + switch (Pred) { + default: + return false; + + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SLE: + return + // min(A, ...) <= A + IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) || + // A <= max(A, ...) + IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS); + + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_ULE: + return + // min(A, ...) <= A + IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) || + // A <= max(A, ...) + IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); + } + + llvm_unreachable("covered switch fell through?!"); +} + /// isImpliedCondOperandsHelper - Test whether the condition described by /// Pred, LHS, and RHS is true whenever the condition described by Pred, /// FoundLHS, and FoundRHS is true. @@ -6935,6 +7058,12 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { + auto IsKnownPredicateFull = + [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { + return isKnownPredicateWithRanges(Pred, LHS, RHS) || + IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS); + }; + switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: @@ -6944,26 +7073,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && - isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && - isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && - isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && - isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } @@ -6971,8 +7100,8 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } -// Verify if an linear IV with positive stride can overflow when in a -// less-than comparison, knowing the invariant term of the comparison, the +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the // stride and the knowledge of NSW/NUW flags on the recurrence. bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap) { @@ -7000,7 +7129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); } -// Verify if an linear IV with negative stride can overflow when in a +// Verify if an linear IV with negative stride can overflow when in a // greater-than comparison, knowing the invariant term of the comparison, // the stride and the knowledge of NSW/NUW flags on the recurrence. bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, @@ -7031,7 +7160,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, // Compute the backedge taken count knowing the interval difference, the // stride and presence of the equality in the comparison. -const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, bool Equality) { const SCEV *One = getConstant(Step->getType(), 1); Delta = Equality ? getAddExpr(Delta, Step) @@ -7071,7 +7200,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions - // exploit NoWrapFlags, allowing to optimize in presence of undefined + // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) return getCouldNotCompute(); @@ -7151,7 +7280,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions - // exploit NoWrapFlags, allowing to optimize in presence of undefined + // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) return getCouldNotCompute(); @@ -7199,7 +7328,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVConstant>(BECount)) MaxBECount = BECount; else - MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), getConstant(MinStride), false); if (isa<SCEVCouldNotCompute>(MaxBECount)) @@ -7457,7 +7586,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, for (const SCEV *&Term : Terms) { // Normalize the terms before the next call to findArrayDimensionsRec. const SCEV *Q, *R; - SCEVSDivision::divide(SE, Term, Step, &Q, &R); + SCEVDivision::divide(SE, Term, Step, &Q, &R); // Bail out when GCD does not evenly divide one of the terms. if (!R->isZero()) @@ -7594,7 +7723,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, // Divide all terms by the element size. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; - SCEVSDivision::divide(SE, Term, ElementSize, &Q, &R); + SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); Term = Q; } @@ -7641,7 +7770,7 @@ void SCEVAddRecExpr::computeAccessFunctions( int Last = Sizes.size() - 1; for (int i = Last; i >= 0; i--) { const SCEV *Q, *R; - SCEVSDivision::divide(SE, Res, Sizes[i], &Q, &R); + SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); DEBUG({ dbgs() << "Res: " << *Res << "\n"; @@ -7825,11 +7954,11 @@ ScalarEvolution::ScalarEvolution() bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; - AT = &getAnalysis<AssumptionTracker>(); - LI = &getAnalysis<LoopInfo>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfo>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); return false; } @@ -7866,10 +7995,10 @@ void ScalarEvolution::releaseMemory() { void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<AssumptionTracker>(); - AU.addRequiredTransitive<LoopInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -7960,17 +8089,17 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { - SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == L) - return Values[u].second; + auto &Values = LoopDispositions[S]; + for (auto &V : Values) { + if (V.getPointer() == L) + return V.getInt(); } - Values.push_back(std::make_pair(L, LoopVariant)); + Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); - SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == L) { - Values2[u - 1].second = D; + auto &Values2 = LoopDispositions[S]; + for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + if (V.getPointer() == L) { + V.setInt(D); break; } } @@ -8066,17 +8195,17 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { - SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == BB) - return Values[u].second; + auto &Values = BlockDispositions[S]; + for (auto &V : Values) { + if (V.getPointer() == BB) + return V.getInt(); } - Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); + Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); - SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == BB) { - Values2[u - 1].second = D; + auto &Values2 = BlockDispositions[S]; + for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + if (V.getPointer() == BB) { + V.setInt(D); break; } } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index bee3685..2625cf3 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1063,6 +1063,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE, return false; } +static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), + SE.getSignExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + +static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), + SE.getZeroExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -1188,6 +1216,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if + // we actually do emit an addition. It does not apply if we emit a + // subtraction. + bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); + bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); + // Create the PHI. BasicBlock *Header = L->getHeader(); Builder.SetInsertPoint(Header, Header->begin()); @@ -1213,10 +1247,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa<OverflowingBinaryOperator>(IncV)) { - if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + if (IncrementIsNUW) cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); - if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + if (IncrementIsNSW) cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); } PN->addIncoming(IncV, Pred); @@ -1711,7 +1746,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AT)) { + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp index f6c300a..c6ea3af 100644 --- a/lib/Analysis/ScopedNoAliasAA.cpp +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -33,8 +33,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp new file mode 100644 index 0000000..91041fc --- /dev/null +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -0,0 +1,810 @@ +//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetLibraryInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/ADT/Triple.h" +using namespace llvm; + +const char* TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = + { + "_IO_getc", + "_IO_putc", + "_ZdaPv", + "_ZdaPvRKSt9nothrow_t", + "_ZdaPvj", + "_ZdaPvm", + "_ZdlPv", + "_ZdlPvRKSt9nothrow_t", + "_ZdlPvj", + "_ZdlPvm", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", + "__cospi", + "__cospif", + "__cxa_atexit", + "__cxa_guard_abort", + "__cxa_guard_acquire", + "__cxa_guard_release", + "__isoc99_scanf", + "__isoc99_sscanf", + "__memcpy_chk", + "__memmove_chk", + "__memset_chk", + "__sincospi_stret", + "__sincospif_stret", + "__sinpi", + "__sinpif", + "__sqrt_finite", + "__sqrtf_finite", + "__sqrtl_finite", + "__stpcpy_chk", + "__stpncpy_chk", + "__strcpy_chk", + "__strdup", + "__strncpy_chk", + "__strndup", + "__strtok_r", + "abs", + "access", + "acos", + "acosf", + "acosh", + "acoshf", + "acoshl", + "acosl", + "asin", + "asinf", + "asinh", + "asinhf", + "asinhl", + "asinl", + "atan", + "atan2", + "atan2f", + "atan2l", + "atanf", + "atanh", + "atanhf", + "atanhl", + "atanl", + "atof", + "atoi", + "atol", + "atoll", + "bcmp", + "bcopy", + "bzero", + "calloc", + "cbrt", + "cbrtf", + "cbrtl", + "ceil", + "ceilf", + "ceill", + "chmod", + "chown", + "clearerr", + "closedir", + "copysign", + "copysignf", + "copysignl", + "cos", + "cosf", + "cosh", + "coshf", + "coshl", + "cosl", + "ctermid", + "exp", + "exp10", + "exp10f", + "exp10l", + "exp2", + "exp2f", + "exp2l", + "expf", + "expl", + "expm1", + "expm1f", + "expm1l", + "fabs", + "fabsf", + "fabsl", + "fclose", + "fdopen", + "feof", + "ferror", + "fflush", + "ffs", + "ffsl", + "ffsll", + "fgetc", + "fgetpos", + "fgets", + "fileno", + "fiprintf", + "flockfile", + "floor", + "floorf", + "floorl", + "fmax", + "fmaxf", + "fmaxl", + "fmin", + "fminf", + "fminl", + "fmod", + "fmodf", + "fmodl", + "fopen", + "fopen64", + "fprintf", + "fputc", + "fputs", + "fread", + "free", + "frexp", + "frexpf", + "frexpl", + "fscanf", + "fseek", + "fseeko", + "fseeko64", + "fsetpos", + "fstat", + "fstat64", + "fstatvfs", + "fstatvfs64", + "ftell", + "ftello", + "ftello64", + "ftrylockfile", + "funlockfile", + "fwrite", + "getc", + "getc_unlocked", + "getchar", + "getenv", + "getitimer", + "getlogin_r", + "getpwnam", + "gets", + "gettimeofday", + "htonl", + "htons", + "iprintf", + "isascii", + "isdigit", + "labs", + "lchown", + "ldexp", + "ldexpf", + "ldexpl", + "llabs", + "log", + "log10", + "log10f", + "log10l", + "log1p", + "log1pf", + "log1pl", + "log2", + "log2f", + "log2l", + "logb", + "logbf", + "logbl", + "logf", + "logl", + "lstat", + "lstat64", + "malloc", + "memalign", + "memccpy", + "memchr", + "memcmp", + "memcpy", + "memmove", + "memrchr", + "memset", + "memset_pattern16", + "mkdir", + "mktime", + "modf", + "modff", + "modfl", + "nearbyint", + "nearbyintf", + "nearbyintl", + "ntohl", + "ntohs", + "open", + "open64", + "opendir", + "pclose", + "perror", + "popen", + "posix_memalign", + "pow", + "powf", + "powl", + "pread", + "printf", + "putc", + "putchar", + "puts", + "pwrite", + "qsort", + "read", + "readlink", + "realloc", + "reallocf", + "realpath", + "remove", + "rename", + "rewind", + "rint", + "rintf", + "rintl", + "rmdir", + "round", + "roundf", + "roundl", + "scanf", + "setbuf", + "setitimer", + "setvbuf", + "sin", + "sinf", + "sinh", + "sinhf", + "sinhl", + "sinl", + "siprintf", + "snprintf", + "sprintf", + "sqrt", + "sqrtf", + "sqrtl", + "sscanf", + "stat", + "stat64", + "statvfs", + "statvfs64", + "stpcpy", + "stpncpy", + "strcasecmp", + "strcat", + "strchr", + "strcmp", + "strcoll", + "strcpy", + "strcspn", + "strdup", + "strlen", + "strncasecmp", + "strncat", + "strncmp", + "strncpy", + "strndup", + "strnlen", + "strpbrk", + "strrchr", + "strspn", + "strstr", + "strtod", + "strtof", + "strtok", + "strtok_r", + "strtol", + "strtold", + "strtoll", + "strtoul", + "strtoull", + "strxfrm", + "system", + "tan", + "tanf", + "tanh", + "tanhf", + "tanhl", + "tanl", + "times", + "tmpfile", + "tmpfile64", + "toascii", + "trunc", + "truncf", + "truncl", + "uname", + "ungetc", + "unlink", + "unsetenv", + "utime", + "utimes", + "valloc", + "vfprintf", + "vfscanf", + "vprintf", + "vscanf", + "vsnprintf", + "vsprintf", + "vsscanf", + "write" + }; + +static bool hasSinCosPiStret(const Triple &T) { + // Only Darwin variants have _stret versions of combined trig functions. + if (!T.isOSDarwin()) + return false; + + // The ABI is rather complicated on x86, so don't do anything special there. + if (T.getArch() == Triple::x86) + return false; + + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9)) + return false; + + if (T.isiOS() && T.isOSVersionLT(7, 0)) + return false; + + return true; +} + +/// initialize - Initialize the set of available library functions based on the +/// specified target triple. This should be carefully written so that a missing +/// target triple gets a sane set of defaults. +static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, + const char **StandardNames) { +#ifndef NDEBUG + // Verify that the StandardNames array is in alphabetical order. + for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { + if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) + llvm_unreachable("TargetLibraryInfoImpl function names must be sorted"); + } +#endif // !NDEBUG + + // There are no library implementations of mempcy and memset for AMD gpus and + // these can be difficult to lower in the backend. + if (T.getArch() == Triple::r600 || + T.getArch() == Triple::amdgcn) { + TLI.setUnavailable(LibFunc::memcpy); + TLI.setUnavailable(LibFunc::memset); + TLI.setUnavailable(LibFunc::memset_pattern16); + return; + } + + // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. + if (T.isMacOSX()) { + if (T.isMacOSXVersionLT(10, 5)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else if (T.isiOS()) { + if (T.isOSVersionLT(3, 0)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else { + TLI.setUnavailable(LibFunc::memset_pattern16); + } + + if (!hasSinCosPiStret(T)) { + TLI.setUnavailable(LibFunc::sinpi); + TLI.setUnavailable(LibFunc::sinpif); + TLI.setUnavailable(LibFunc::cospi); + TLI.setUnavailable(LibFunc::cospif); + TLI.setUnavailable(LibFunc::sincospi_stret); + TLI.setUnavailable(LibFunc::sincospif_stret); + } + + if (T.isMacOSX() && T.getArch() == Triple::x86 && + !T.isMacOSXVersionLT(10, 7)) { + // x86-32 OSX has a scheme where fwrite and fputs (and some other functions + // we don't care about) have two versions; on recent OSX, the one we want + // has a $UNIX2003 suffix. The two implementations are identical except + // for the return value in some edge cases. However, we don't want to + // generate code that depends on the old symbols. + TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003"); + TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003"); + } + + // iprintf and friends are only available on XCore and TCE. + if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { + TLI.setUnavailable(LibFunc::iprintf); + TLI.setUnavailable(LibFunc::siprintf); + TLI.setUnavailable(LibFunc::fiprintf); + } + + if (T.isOSWindows() && !T.isOSCygMing()) { + // Win32 does not support long double + TLI.setUnavailable(LibFunc::acosl); + TLI.setUnavailable(LibFunc::asinl); + TLI.setUnavailable(LibFunc::atanl); + TLI.setUnavailable(LibFunc::atan2l); + TLI.setUnavailable(LibFunc::ceill); + TLI.setUnavailable(LibFunc::copysignl); + TLI.setUnavailable(LibFunc::cosl); + TLI.setUnavailable(LibFunc::coshl); + TLI.setUnavailable(LibFunc::expl); + TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc::fabsl); + TLI.setUnavailable(LibFunc::floorl); + TLI.setUnavailable(LibFunc::fmaxl); + TLI.setUnavailable(LibFunc::fminl); + TLI.setUnavailable(LibFunc::fmodl); + TLI.setUnavailable(LibFunc::frexpl); + TLI.setUnavailable(LibFunc::ldexpf); + TLI.setUnavailable(LibFunc::ldexpl); + TLI.setUnavailable(LibFunc::logl); + TLI.setUnavailable(LibFunc::modfl); + TLI.setUnavailable(LibFunc::powl); + TLI.setUnavailable(LibFunc::sinl); + TLI.setUnavailable(LibFunc::sinhl); + TLI.setUnavailable(LibFunc::sqrtl); + TLI.setUnavailable(LibFunc::tanl); + TLI.setUnavailable(LibFunc::tanhl); + + // Win32 only has C89 math + TLI.setUnavailable(LibFunc::acosh); + TLI.setUnavailable(LibFunc::acoshf); + TLI.setUnavailable(LibFunc::acoshl); + TLI.setUnavailable(LibFunc::asinh); + TLI.setUnavailable(LibFunc::asinhf); + TLI.setUnavailable(LibFunc::asinhl); + TLI.setUnavailable(LibFunc::atanh); + TLI.setUnavailable(LibFunc::atanhf); + TLI.setUnavailable(LibFunc::atanhl); + TLI.setUnavailable(LibFunc::cbrt); + TLI.setUnavailable(LibFunc::cbrtf); + TLI.setUnavailable(LibFunc::cbrtl); + TLI.setUnavailable(LibFunc::exp2); + TLI.setUnavailable(LibFunc::exp2f); + TLI.setUnavailable(LibFunc::exp2l); + TLI.setUnavailable(LibFunc::expm1); + TLI.setUnavailable(LibFunc::expm1f); + TLI.setUnavailable(LibFunc::expm1l); + TLI.setUnavailable(LibFunc::log2); + TLI.setUnavailable(LibFunc::log2f); + TLI.setUnavailable(LibFunc::log2l); + TLI.setUnavailable(LibFunc::log1p); + TLI.setUnavailable(LibFunc::log1pf); + TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::logb); + TLI.setUnavailable(LibFunc::logbf); + TLI.setUnavailable(LibFunc::logbl); + TLI.setUnavailable(LibFunc::nearbyint); + TLI.setUnavailable(LibFunc::nearbyintf); + TLI.setUnavailable(LibFunc::nearbyintl); + TLI.setUnavailable(LibFunc::rint); + TLI.setUnavailable(LibFunc::rintf); + TLI.setUnavailable(LibFunc::rintl); + TLI.setUnavailable(LibFunc::round); + TLI.setUnavailable(LibFunc::roundf); + TLI.setUnavailable(LibFunc::roundl); + TLI.setUnavailable(LibFunc::trunc); + TLI.setUnavailable(LibFunc::truncf); + TLI.setUnavailable(LibFunc::truncl); + + // Win32 provides some C99 math with mangled names + TLI.setAvailableWithName(LibFunc::copysign, "_copysign"); + + if (T.getArch() == Triple::x86) { + // Win32 on x86 implements single-precision math functions as macros + TLI.setUnavailable(LibFunc::acosf); + TLI.setUnavailable(LibFunc::asinf); + TLI.setUnavailable(LibFunc::atanf); + TLI.setUnavailable(LibFunc::atan2f); + TLI.setUnavailable(LibFunc::ceilf); + TLI.setUnavailable(LibFunc::copysignf); + TLI.setUnavailable(LibFunc::cosf); + TLI.setUnavailable(LibFunc::coshf); + TLI.setUnavailable(LibFunc::expf); + TLI.setUnavailable(LibFunc::floorf); + TLI.setUnavailable(LibFunc::fminf); + TLI.setUnavailable(LibFunc::fmaxf); + TLI.setUnavailable(LibFunc::fmodf); + TLI.setUnavailable(LibFunc::logf); + TLI.setUnavailable(LibFunc::powf); + TLI.setUnavailable(LibFunc::sinf); + TLI.setUnavailable(LibFunc::sinhf); + TLI.setUnavailable(LibFunc::sqrtf); + TLI.setUnavailable(LibFunc::tanf); + TLI.setUnavailable(LibFunc::tanhf); + } + + // Win32 does *not* provide provide these functions, but they are + // generally available on POSIX-compliant systems: + TLI.setUnavailable(LibFunc::access); + TLI.setUnavailable(LibFunc::bcmp); + TLI.setUnavailable(LibFunc::bcopy); + TLI.setUnavailable(LibFunc::bzero); + TLI.setUnavailable(LibFunc::chmod); + TLI.setUnavailable(LibFunc::chown); + TLI.setUnavailable(LibFunc::closedir); + TLI.setUnavailable(LibFunc::ctermid); + TLI.setUnavailable(LibFunc::fdopen); + TLI.setUnavailable(LibFunc::ffs); + TLI.setUnavailable(LibFunc::fileno); + TLI.setUnavailable(LibFunc::flockfile); + TLI.setUnavailable(LibFunc::fseeko); + TLI.setUnavailable(LibFunc::fstat); + TLI.setUnavailable(LibFunc::fstatvfs); + TLI.setUnavailable(LibFunc::ftello); + TLI.setUnavailable(LibFunc::ftrylockfile); + TLI.setUnavailable(LibFunc::funlockfile); + TLI.setUnavailable(LibFunc::getc_unlocked); + TLI.setUnavailable(LibFunc::getitimer); + TLI.setUnavailable(LibFunc::getlogin_r); + TLI.setUnavailable(LibFunc::getpwnam); + TLI.setUnavailable(LibFunc::gettimeofday); + TLI.setUnavailable(LibFunc::htonl); + TLI.setUnavailable(LibFunc::htons); + TLI.setUnavailable(LibFunc::lchown); + TLI.setUnavailable(LibFunc::lstat); + TLI.setUnavailable(LibFunc::memccpy); + TLI.setUnavailable(LibFunc::mkdir); + TLI.setUnavailable(LibFunc::ntohl); + TLI.setUnavailable(LibFunc::ntohs); + TLI.setUnavailable(LibFunc::open); + TLI.setUnavailable(LibFunc::opendir); + TLI.setUnavailable(LibFunc::pclose); + TLI.setUnavailable(LibFunc::popen); + TLI.setUnavailable(LibFunc::pread); + TLI.setUnavailable(LibFunc::pwrite); + TLI.setUnavailable(LibFunc::read); + TLI.setUnavailable(LibFunc::readlink); + TLI.setUnavailable(LibFunc::realpath); + TLI.setUnavailable(LibFunc::rmdir); + TLI.setUnavailable(LibFunc::setitimer); + TLI.setUnavailable(LibFunc::stat); + TLI.setUnavailable(LibFunc::statvfs); + TLI.setUnavailable(LibFunc::stpcpy); + TLI.setUnavailable(LibFunc::stpncpy); + TLI.setUnavailable(LibFunc::strcasecmp); + TLI.setUnavailable(LibFunc::strncasecmp); + TLI.setUnavailable(LibFunc::times); + TLI.setUnavailable(LibFunc::uname); + TLI.setUnavailable(LibFunc::unlink); + TLI.setUnavailable(LibFunc::unsetenv); + TLI.setUnavailable(LibFunc::utime); + TLI.setUnavailable(LibFunc::utimes); + TLI.setUnavailable(LibFunc::write); + + // Win32 does *not* provide provide these functions, but they are + // specified by C99: + TLI.setUnavailable(LibFunc::atoll); + TLI.setUnavailable(LibFunc::frexpf); + TLI.setUnavailable(LibFunc::llabs); + } + + switch (T.getOS()) { + case Triple::MacOSX: + // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0 + // and their names are __exp10 and __exp10f. exp10l is not available on + // OS X or iOS. + TLI.setUnavailable(LibFunc::exp10l); + if (T.isMacOSXVersionLT(10, 9)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::IOS: + TLI.setUnavailable(LibFunc::exp10l); + if (T.isOSVersionLT(7, 0)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::Linux: + // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely + // buggy prior to glibc version 2.18. Until this version is widely deployed + // or we have a reasonable detection strategy, we cannot use exp10 reliably + // on Linux. + // + // Fall through to disable all of them. + default: + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc::exp10l); + } + + // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and + // Linux (GLIBC): + // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html + // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c + // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html + switch (T.getOS()) { + case Triple::Darwin: + case Triple::MacOSX: + case Triple::IOS: + case Triple::FreeBSD: + case Triple::Linux: + break; + default: + TLI.setUnavailable(LibFunc::ffsl); + } + + // ffsll is available on at least FreeBSD and Linux (GLIBC): + // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c + // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html + switch (T.getOS()) { + case Triple::FreeBSD: + case Triple::Linux: + break; + default: + TLI.setUnavailable(LibFunc::ffsll); + } + + // The following functions are available on at least Linux: + if (!T.isOSLinux()) { + TLI.setUnavailable(LibFunc::dunder_strdup); + TLI.setUnavailable(LibFunc::dunder_strtok_r); + TLI.setUnavailable(LibFunc::dunder_isoc99_scanf); + TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf); + TLI.setUnavailable(LibFunc::under_IO_getc); + TLI.setUnavailable(LibFunc::under_IO_putc); + TLI.setUnavailable(LibFunc::memalign); + TLI.setUnavailable(LibFunc::fopen64); + TLI.setUnavailable(LibFunc::fseeko64); + TLI.setUnavailable(LibFunc::fstat64); + TLI.setUnavailable(LibFunc::fstatvfs64); + TLI.setUnavailable(LibFunc::ftello64); + TLI.setUnavailable(LibFunc::lstat64); + TLI.setUnavailable(LibFunc::open64); + TLI.setUnavailable(LibFunc::stat64); + TLI.setUnavailable(LibFunc::statvfs64); + TLI.setUnavailable(LibFunc::tmpfile64); + } +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl() { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, Triple(), StandardNames); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, T, StandardNames); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) + : CustomNames(TLI.CustomNames) { + memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) + : CustomNames(std::move(TLI.CustomNames)) { + std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), + AvailableArray); +} + +TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { + CustomNames = TLI.CustomNames; + memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + return *this; +} + +TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) { + CustomNames = std::move(TLI.CustomNames); + std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), + AvailableArray); + return *this; +} + +namespace { +struct StringComparator { + /// Compare two strings and return true if LHS is lexicographically less than + /// RHS. Requires that RHS doesn't contain any zero bytes. + bool operator()(const char *LHS, StringRef RHS) const { + // Compare prefixes with strncmp. If prefixes match we know that LHS is + // greater or equal to RHS as RHS can't contain any '\0'. + return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; + } + + // Provided for compatibility with MSVC's debug mode. + bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } + bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } +}; +} + +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char **Start = &StandardNames[0]; + const char **End = &StandardNames[LibFunc::NumLibFuncs]; + + // Filter out empty names and names containing null bytes, those can't be in + // our table. + if (funcName.empty() || funcName.find('\0') != StringRef::npos) + return false; + + // Check for \01 prefix that is used to mangle __asm declarations and + // strip it if present. + if (funcName.front() == '\01') + funcName = funcName.substr(1); + const char **I = std::lower_bound(Start, End, funcName, StringComparator()); + if (I != End && *I == funcName) { + F = (LibFunc::Func)(I - Start); + return true; + } + return false; +} + +void TargetLibraryInfoImpl::disableAllFunctions() { + memset(AvailableArray, 0, sizeof(AvailableArray)); +} + +TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) { + if (PresetInfoImpl) + return TargetLibraryInfo(*PresetInfoImpl); + + return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple()))); +} + +TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) { + if (PresetInfoImpl) + return TargetLibraryInfo(*PresetInfoImpl); + + return TargetLibraryInfo( + lookupInfoImpl(Triple(F.getParent()->getTargetTriple()))); +} + +TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) { + std::unique_ptr<TargetLibraryInfoImpl> &Impl = + Impls[T.normalize()]; + if (!Impl) + Impl.reset(new TargetLibraryInfoImpl(T)); + + return *Impl; +} + + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() + : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T) + : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass( + const TargetLibraryInfoImpl &TLIImpl) + : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char TargetLibraryAnalysis::PassID; + +// Register the basic pass. +INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo", + "Target Library Information", false, true) +char TargetLibraryInfoWrapperPass::ID = 0; + +void TargetLibraryInfoWrapperPass::anchor() {} diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index c1ffb9d..7ff29b0 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -8,11 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" @@ -20,623 +22,290 @@ using namespace llvm; #define DEBUG_TYPE "tti" -// Setup the analysis group to manage the TargetTransformInfo passes. -INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) -char TargetTransformInfo::ID = 0; - -TargetTransformInfo::~TargetTransformInfo() { +namespace { +/// \brief No-op implementation of the TTI interface using the utility base +/// classes. +/// +/// This is used when no target specific information is available. +struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { + explicit NoTTIImpl(const DataLayout *DL) + : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} +}; } -void TargetTransformInfo::pushTTIStack(Pass *P) { - TopTTI = this; - PrevTTI = &P->getAnalysis<TargetTransformInfo>(); +TargetTransformInfo::TargetTransformInfo(const DataLayout *DL) + : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} - // Walk up the chain and update the top TTI pointer. - for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) - PTTI->TopTTI = this; -} +TargetTransformInfo::~TargetTransformInfo() {} + +TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) + : TTIImpl(std::move(Arg.TTIImpl)) {} -void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetTransformInfo>(); +TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { + TTIImpl = std::move(RHS.TTIImpl); + return *this; } unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { - return PrevTTI->getOperationCost(Opcode, Ty, OpTy); -} - -unsigned TargetTransformInfo::getGEPCost( - const Value *Ptr, ArrayRef<const Value *> Operands) const { - return PrevTTI->getGEPCost(Ptr, Operands); + return TTIImpl->getOperationCost(Opcode, Ty, OpTy); } unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { - return PrevTTI->getCallCost(FTy, NumArgs); -} - -unsigned TargetTransformInfo::getCallCost(const Function *F, - int NumArgs) const { - return PrevTTI->getCallCost(F, NumArgs); -} - -unsigned TargetTransformInfo::getCallCost( - const Function *F, ArrayRef<const Value *> Arguments) const { - return PrevTTI->getCallCost(F, Arguments); + return TTIImpl->getCallCost(FTy, NumArgs); } -unsigned TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const { - return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +unsigned +TargetTransformInfo::getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + return TTIImpl->getCallCost(F, Arguments); } -unsigned TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { - return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +unsigned +TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const { + return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); } unsigned TargetTransformInfo::getUserCost(const User *U) const { - return PrevTTI->getUserCost(U); + return TTIImpl->getUserCost(U); } bool TargetTransformInfo::hasBranchDivergence() const { - return PrevTTI->hasBranchDivergence(); + return TTIImpl->hasBranchDivergence(); } bool TargetTransformInfo::isLoweredToCall(const Function *F) const { - return PrevTTI->isLoweredToCall(F); + return TTIImpl->isLoweredToCall(F); } -void -TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { - PrevTTI->getUnrollingPreferences(F, L, UP); +void TargetTransformInfo::getUnrollingPreferences( + Loop *L, UnrollingPreferences &UP) const { + return TTIImpl->getUnrollingPreferences(L, UP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { - return PrevTTI->isLegalAddImmediate(Imm); + return TTIImpl->isLegalAddImmediate(Imm); } bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { - return PrevTTI->isLegalICmpImmediate(Imm); + return TTIImpl->isLegalICmpImmediate(Imm); } bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale); } +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, + int Consecutive) const { + return TTIImpl->isLegalMaskedStore(DataType, Consecutive); +} + +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + int Consecutive) const { + return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { - return PrevTTI->isTruncateFree(Ty1, Ty2); + return TTIImpl->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { + return TTIImpl->isProfitableToHoist(I); } bool TargetTransformInfo::isTypeLegal(Type *Ty) const { - return PrevTTI->isTypeLegal(Ty); + return TTIImpl->isTypeLegal(Ty); } unsigned TargetTransformInfo::getJumpBufAlignment() const { - return PrevTTI->getJumpBufAlignment(); + return TTIImpl->getJumpBufAlignment(); } unsigned TargetTransformInfo::getJumpBufSize() const { - return PrevTTI->getJumpBufSize(); + return TTIImpl->getJumpBufSize(); } bool TargetTransformInfo::shouldBuildLookupTables() const { - return PrevTTI->shouldBuildLookupTables(); + return TTIImpl->shouldBuildLookupTables(); } TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { - return PrevTTI->getPopcntSupport(IntTyWidthInBit); + return TTIImpl->getPopcntSupport(IntTyWidthInBit); } bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { - return PrevTTI->haveFastSqrt(Ty); + return TTIImpl->haveFastSqrt(Ty); +} + +unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { + return TTIImpl->getFPOpCost(Ty); } unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(Imm, Ty); + return TTIImpl->getIntImmCost(Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx, +unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty); + return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); } unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty); + return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return PrevTTI->getNumberOfRegisters(Vector); + return TTIImpl->getNumberOfRegisters(Vector); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { - return PrevTTI->getRegisterBitWidth(Vector); + return TTIImpl->getRegisterBitWidth(Vector); } unsigned TargetTransformInfo::getMaxInterleaveFactor() const { - return PrevTTI->getMaxInterleaveFactor(); + return TTIImpl->getMaxInterleaveFactor(); } unsigned TargetTransformInfo::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Op1Info, - OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo) const { - return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); } -unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, Type *SubTp) const { - return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); + return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); } unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { - return PrevTTI->getCastInstrCost(Opcode, Dst, Src); + return TTIImpl->getCastInstrCost(Opcode, Dst, Src); } unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - return PrevTTI->getCFInstrCost(Opcode); + return TTIImpl->getCFInstrCost(Opcode); } unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { - return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); + return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); } unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { - return PrevTTI->getVectorInstrCost(Opcode, Val, Index); + return TTIImpl->getVectorInstrCost(Opcode, Val, Index); } unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - ; + return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } unsigned -TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, - Type *RetTy, +TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys) const { - return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); + return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { - return PrevTTI->getNumberOfParts(Tp); + return TTIImpl->getNumberOfParts(Tp); } unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, bool IsComplex) const { - return PrevTTI->getAddressComputationCost(Tp, IsComplex); + return TTIImpl->getAddressComputationCost(Tp, IsComplex); } unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const { - return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); + bool IsPairwiseForm) const { + return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); } -unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) - const { - return PrevTTI->getCostOfKeepingLiveOverCall(Tys); +unsigned +TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { + return TTIImpl->getCostOfKeepingLiveOverCall(Tys); } -namespace { +bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) const { + return TTIImpl->getTgtMemIntrinsic(Inst, Info); +} -struct NoTTI final : ImmutablePass, TargetTransformInfo { - const DataLayout *DL; - - NoTTI() : ImmutablePass(ID), DL(nullptr) { - initializeNoTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { - // Note that this subclass is special, and must *not* call initializeTTI as - // it does not chain. - TopTTI = this; - PrevTTI = nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // Note that this subclass is special, and must *not* call - // TTI::getAnalysisUsage as it breaks the recursion. - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - unsigned getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy) const override { - switch (Opcode) { - default: - // By default, just classify everything as 'basic'. - return TCC_Basic; - - case Instruction::GetElementPtr: - llvm_unreachable("Use getGEPCost for GEP operations!"); - - case Instruction::BitCast: - assert(OpTy && "Cast instructions must provide the operand type"); - if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) - // Identity and pointer-to-pointer casts are free. - return TCC_Free; - - // Otherwise, the default basic cost is used. - return TCC_Basic; - - case Instruction::IntToPtr: { - if (!DL) - return TCC_Basic; - - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - unsigned OpSize = OpTy->getScalarSizeInBits(); - if (DL->isLegalInteger(OpSize) && - OpSize <= DL->getPointerTypeSizeInBits(Ty)) - return TCC_Free; - - // Otherwise it's not a no-op. - return TCC_Basic; - } - case Instruction::PtrToInt: { - if (!DL) - return TCC_Basic; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - unsigned DestSize = Ty->getScalarSizeInBits(); - if (DL->isLegalInteger(DestSize) && - DestSize >= DL->getPointerTypeSizeInBits(OpTy)) - return TCC_Free; - - // Otherwise it's not a no-op. - return TCC_Basic; - } - case Instruction::Trunc: - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) - return TCC_Free; - - return TCC_Basic; - } - } - - unsigned getGEPCost(const Value *Ptr, - ArrayRef<const Value *> Operands) const override { - // In the basic model, we just assume that all-constant GEPs will be folded - // into their uses via addressing modes. - for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) - if (!isa<Constant>(Operands[Idx])) - return TCC_Basic; - - return TCC_Free; - } - - unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override - { - assert(FTy && "FunctionType must be provided to this routine."); - - // The target-independent implementation just measures the size of the - // function by approximating that each argument will take on average one - // instruction to prepare. - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = FTy->getNumParams(); - - return TCC_Basic * (NumArgs + 1); - } - - unsigned getCallCost(const Function *F, int NumArgs = -1) const override - { - assert(F && "A concrete function must be provided to this routine."); - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = F->arg_size(); - - if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { - FunctionType *FTy = F->getFunctionType(); - SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); - return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); - } - - if (!TopTTI->isLoweredToCall(F)) - return TCC_Basic; // Give a basic cost if it will be lowered directly. - - return TopTTI->getCallCost(F->getFunctionType(), NumArgs); - } - - unsigned getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) const override { - // Simply delegate to generic handling of the call. - // FIXME: We should use instsimplify or something else to catch calls which - // will constant fold with these arguments. - return TopTTI->getCallCost(F, Arguments.size()); - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) const override { - switch (IID) { - default: - // Intrinsics rarely (if ever) have normal argument setup constraints. - // Model them as having a basic instruction cost. - // FIXME: This is wrong for libc intrinsics. - return TCC_Basic; - - case Intrinsic::annotation: - case Intrinsic::assume: - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't actually represent code after lowering. - return TCC_Free; - } - } - - unsigned - getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) const override { - // Delegate to the generic intrinsic handling code. This mostly provides an - // opportunity for targets to (for example) special case the cost of - // certain intrinsics based on constants used as arguments. - SmallVector<Type *, 8> ParamTys; - ParamTys.reserve(Arguments.size()); - for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) - ParamTys.push_back(Arguments[Idx]->getType()); - return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); - } - - unsigned getUserCost(const User *U) const override { - if (isa<PHINode>(U)) - return TCC_Free; // Model all PHI nodes as free. - - if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { - SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end()); - return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices); - } - - if (ImmutableCallSite CS = U) { - const Function *F = CS.getCalledFunction(); - if (!F) { - // Just use the called value type. - Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); - return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); - } - - SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); - return TopTTI->getCallCost(F, Arguments); - } - - if (const CastInst *CI = dyn_cast<CastInst>(U)) { - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - return TCC_Free; - } - - // Otherwise delegate to the fully generic implementations. - return getOperationCost(Operator::getOpcode(U), U->getType(), - U->getNumOperands() == 1 ? - U->getOperand(0)->getType() : nullptr); - } - - bool hasBranchDivergence() const override { return false; } - - bool isLoweredToCall(const Function *F) const override { - // FIXME: These should almost certainly not be handled here, and instead - // handled with the help of TLI or the target itself. This was largely - // ported from existing analysis heuristics here so that such refactorings - // can take place in the future. - - if (F->isIntrinsic()) - return false; - - if (F->hasLocalLinkage() || !F->hasName()) - return true; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || - Name == "fmin" || Name == "fminf" || Name == "fminl" || - Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || - Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || - Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") - return false; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || - Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == - "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || - Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") - return false; - - return true; - } - - void getUnrollingPreferences(const Function *, Loop *, - UnrollingPreferences &) const override {} - - bool isLegalAddImmediate(int64_t Imm) const override { - return false; - } - - bool isLegalICmpImmediate(int64_t Imm) const override { - return false; - } - - bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const override - { - // Guess that reg+reg addressing is allowed. This heuristic is taken from - // the implementation of LSR. - return !BaseGV && BaseOffset == 0 && Scale <= 1; - } - - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const override { - // Guess that all legal addressing mode are free. - if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) - return 0; - return -1; - } - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override { - return false; - } - - bool isTypeLegal(Type *Ty) const override { - return false; - } - - unsigned getJumpBufAlignment() const override { - return 0; - } - - unsigned getJumpBufSize() const override { - return 0; - } - - bool shouldBuildLookupTables() const override { - return true; - } - - PopcntSupportKind - getPopcntSupport(unsigned IntTyWidthInBit) const override { - return PSK_Software; - } - - bool haveFastSqrt(Type *Ty) const override { - return false; - } - - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override { - return TCC_Basic; - } - - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) const override { - return TCC_Free; - } - - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const override { - return TCC_Free; - } - - unsigned getNumberOfRegisters(bool Vector) const override { - return 8; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - return 32; - } - - unsigned getMaxInterleaveFactor() const override { - return 1; - } - - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind, OperandValueProperties, - OperandValueProperties) const override { - return 1; - } - - unsigned getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index = 0, Type *SubTp = nullptr) const override { - return 1; - } - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override { - return 1; - } - - unsigned getCFInstrCost(unsigned Opcode) const override { - return 1; - } - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = nullptr) const override { - return 1; - } - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const override { - return 1; - } - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override { - return 1; - } - - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type*> Tys) const override { - return 1; - } - - unsigned getNumberOfParts(Type *Tp) const override { - return 0; - } - - unsigned getAddressComputationCost(Type *Tp, bool) const override { - return 0; - } - - unsigned getReductionCost(unsigned, Type *, bool) const override { - return 1; - } - - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override { - return 0; - } +Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( + IntrinsicInst *Inst, Type *ExpectedType) const { + return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); +} -}; +TargetTransformInfo::Concept::~Concept() {} + +TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} -} // end anonymous namespace +TargetIRAnalysis::TargetIRAnalysis( + std::function<Result(Function &)> TTICallback) + : TTICallback(TTICallback) {} + +TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { + return TTICallback(F); +} -INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", - "No target information", true, true, true) -char NoTTI::ID = 0; +char TargetIRAnalysis::PassID; + +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { + return Result(F.getParent()->getDataLayout()); +} + +// Register the basic pass. +INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", + "Target Transform Information", false, true) +char TargetTransformInfoWrapperPass::ID = 0; + +void TargetTransformInfoWrapperPass::anchor() {} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() + : ImmutablePass(ID) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( + TargetIRAnalysis TIRA) + : ImmutablePass(ID), TIRA(std::move(TIRA)) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) { + TTI = TIRA.run(F); + return *TTI; +} -ImmutablePass *llvm::createNoTargetTransformInfoPass() { - return new NoTTI(); +ImmutablePass * +llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { + return new TargetTransformInfoWrapperPass(std::move(TIRA)); } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index f347eb5..ff89558 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -167,7 +167,7 @@ namespace { bool TypeIsImmutable() const { if (Node->getNumOperands() < 3) return false; - ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); + ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); if (!CI) return false; return CI->getValue()[0]; @@ -194,7 +194,7 @@ namespace { return dyn_cast_or_null<MDNode>(Node->getOperand(1)); } uint64_t getOffset() const { - return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); } /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for /// objects which are not modified (by any means) in the context where this @@ -202,7 +202,7 @@ namespace { bool TypeIsImmutable() const { if (Node->getNumOperands() < 4) return false; - ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); + ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); if (!CI) return false; return CI->getValue()[0]; @@ -233,8 +233,10 @@ namespace { // Fast path for a scalar type node and a struct type node with a single // field. if (Node->getNumOperands() <= 3) { - uint64_t Cur = Node->getNumOperands() == 2 ? 0 : - cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + uint64_t Cur = Node->getNumOperands() == 2 + ? 0 + : mdconst::extract<ConstantInt>(Node->getOperand(2)) + ->getZExtValue(); Offset -= Cur; MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); if (!P) @@ -246,8 +248,8 @@ namespace { // the current offset is bigger than the given offset. unsigned TheIdx = 0; for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { - uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> - getZExtValue(); + uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) + ->getZExtValue(); if (Cur > Offset) { assert(Idx >= 3 && "TBAAStructTypeNode::getParent should have an offset match!"); @@ -258,8 +260,8 @@ namespace { // Move along the last field. if (TheIdx == 0) TheIdx = Node->getNumOperands() - 2; - uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> - getZExtValue(); + uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) + ->getZExtValue(); Offset -= Cur; MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); if (!P) @@ -608,7 +610,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return nullptr; // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); - Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; + Metadata *Ops[3] = {Ret, Ret, + ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; return MDNode::get(A->getContext(), Ops); } @@ -620,8 +623,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.TBAA = getMetadata(LLVMContext::MD_tbaa); if (Merge) - N.Scope = - MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope)); + N.Scope = MDNode::getMostGenericAliasScope( + N.Scope, getMetadata(LLVMContext::MD_alias_scope)); else N.Scope = getMetadata(LLVMContext::MD_alias_scope); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index e9bbf83..0458d28 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" @@ -65,16 +65,16 @@ namespace { // figuring out if we can use it. struct Query { ExclInvsSet ExclInvs; - AssumptionTracker *AT; + AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; - Query(AssumptionTracker *AT = nullptr, const Instruction *CxtI = nullptr, + Query(AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr) - : AT(AT), CxtI(CxtI), DT(DT) {} + : AC(AC), CxtI(CxtI), DT(DT) {} Query(const Query &Q, const Value *NewExcl) - : ExclInvs(Q.ExclInvs), AT(Q.AT), CxtI(Q.CxtI), DT(Q.DT) { + : ExclInvs(Q.ExclInvs), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT) { ExclInvs.insert(NewExcl); } }; @@ -102,10 +102,10 @@ static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout *TD, unsigned Depth, - AssumptionTracker *AT, const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth, - Query(AT, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT)); } static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, @@ -114,52 +114,50 @@ static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout *TD, unsigned Depth, - AssumptionTracker *AT, const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth, - Query(AT, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT)); } static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, const Query &Q); bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - AssumptionTracker *AT, - const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(AT, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT)); } static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, - AssumptionTracker *AT, const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonZero(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT)); + return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth, const Query &Q); -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth, - AssumptionTracker *AT, const Instruction *CxtI, - const DominatorTree *DT) { +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { return ::MaskedValueIsZero(V, Mask, TD, Depth, - Query(AT, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT)); } static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, unsigned Depth, const Query &Q); unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, - unsigned Depth, AssumptionTracker *AT, + unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::ComputeNumSignBits(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT)); + return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, @@ -312,8 +310,10 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, // Use the high end of the ranges to find leading zeros. unsigned MinLeadingZeros = BitWidth; for (unsigned i = 0; i < NumRanges; ++i) { - ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0)); - ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1)); + ConstantInt *Lower = + mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); + ConstantInt *Upper = + mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); if (Range.isWrappedSet()) MinLeadingZeros = 0; // -1 has no zeros @@ -480,18 +480,31 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! - if (!Q.AT || !Q.CxtI) + if (!Q.AC || !Q.CxtI) return; unsigned BitWidth = KnownZero.getBitWidth(); - Function *F = const_cast<Function*>(Q.CxtI->getParent()->getParent()); - for (auto &CI : Q.AT->assumptions(F)) { - CallInst *I = CI; + for (auto &AssumeVH : Q.AC->assumptions()) { + if (!AssumeVH) + continue; + CallInst *I = cast<CallInst>(AssumeVH); + assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && + "Got assumption for the wrong function!"); if (Q.ExclInvs.count(I)) continue; - if (match(I, m_Intrinsic<Intrinsic::assume>(m_Specific(V))) && + // Warning: This loop can end up being somewhat performance sensetive. + // We're running this loop for once for each value queried resulting in a + // runtime of ~O(#assumes * #values). + + assert(isa<IntrinsicInst>(I) && + dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume && + "must be an assume intrinsic"); + + Value *Arg = I->getArgOperand(0); + + if (Arg == V && isValidAssumeForContext(I, Q, DL)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); @@ -499,6 +512,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, return; } + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth == MaxDepth) + continue; + Value *A, *B; auto m_V = m_CombineOr(m_Specific(V), m_CombineOr(m_PtrToInt(m_Specific(V)), @@ -507,16 +524,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, CmpInst::Predicate Pred; ConstantInt *C; // assume(v = a) - if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_V, m_Value(A)))) && + if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -528,9 +544,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownZero & MaskKnownOne; KnownOne |= RHSKnownOne & MaskKnownOne; // assume(~(v & b) = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), - m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -542,8 +557,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & MaskKnownOne; KnownOne |= RHSKnownZero & MaskKnownOne; // assume(v | b = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -555,9 +570,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownZero & BKnownZero; KnownOne |= RHSKnownOne & BKnownZero; // assume(~(v | b) = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), - m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -569,8 +583,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & BKnownZero; KnownOne |= RHSKnownZero & BKnownZero; // assume(v ^ b = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -585,9 +599,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & BKnownOne; KnownOne |= RHSKnownZero & BKnownOne; // assume(~(v ^ b) = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), - m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -602,9 +615,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownZero & BKnownOne; KnownOne |= RHSKnownOne & BKnownOne; // assume(v << c = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), - m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -613,9 +625,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); KnownOne |= RHSKnownOne.lshr(C->getZExtValue()); // assume(~(v << c) = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), - m_Value(A)))) && + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -624,11 +635,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); KnownOne |= RHSKnownZero.lshr(C->getZExtValue()); // assume(v >> c = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), + } else if (match(Arg, + m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), m_AShr(m_V, m_ConstantInt(C))), - m_Value(A)))) && + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -637,11 +648,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownZero << C->getZExtValue(); KnownOne |= RHSKnownOne << C->getZExtValue(); // assume(~(v >> c) = a) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_c_ICmp(Pred, m_Not(m_CombineOr( + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( m_LShr(m_V, m_ConstantInt(C)), m_AShr(m_V, m_ConstantInt(C)))), - m_Value(A)))) && + m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -650,8 +660,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne << C->getZExtValue(); KnownOne |= RHSKnownZero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -662,8 +671,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= APInt::getSignBit(BitWidth); } // assume(v >_s c) where c is at least -1. - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -674,8 +682,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= APInt::getSignBit(BitWidth); } // assume(v <=_s c) where c is negative - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -686,8 +693,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= APInt::getSignBit(BitWidth); } // assume(v <_s c) where c is non-positive - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -698,8 +704,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= APInt::getSignBit(BitWidth); } // assume(v <=_u c) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -709,8 +714,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); // assume(v <_u c) - } else if (match(I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_V, m_Value(A)))) && + } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q, DL)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -790,22 +794,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } - // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has - // the bits of its aliasee. - if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) { - KnownZero.clearAllBits(); KnownOne.clearAllBits(); - } else { - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1, Q); - } - return; - } - // The address of an aligned GlobalValue has trailing zeros. - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - unsigned Align = GV->getAlignment(); + if (auto *GO = dyn_cast<GlobalObject>(V)) { + unsigned Align = GO->getAlignment(); if (Align == 0 && TD) { - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { Type *ObjectType = GVar->getType()->getElementType(); if (ObjectType->isSized()) { // If the object is defined in the current Module, we'll be giving @@ -839,6 +832,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Align) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + else + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); // Don't give up yet... there might be an assumption that provides more // information... @@ -849,8 +845,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); + // Limit search depth. + // All recursive calls that increase depth must come after this. if (Depth == MaxDepth) - return; // Limit search depth. + return; + + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (!GA->mayBeOverridden()) + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q); + return; + } // Check whether a nearby assume intrinsic can determine some known bits. computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); @@ -1507,8 +1513,10 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, const unsigned NumRanges = Ranges->getNumOperands() / 2; assert(NumRanges >= 1); for (unsigned i = 0; i < NumRanges; ++i) { - ConstantInt *Lower = cast<ConstantInt>(Ranges->getOperand(2*i + 0)); - ConstantInt *Upper = cast<ConstantInt>(Ranges->getOperand(2*i + 1)); + ConstantInt *Lower = + mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); + ConstantInt *Upper = + mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); if (Range.contains(Value)) return false; @@ -1764,7 +1772,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): - if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) + if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); @@ -1789,7 +1797,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, if (Tmp2 == 1) return 1; // Handle NEG. - if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) + if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); @@ -1814,13 +1822,16 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, case Instruction::PHI: { PHINode *PN = cast<PHINode>(U); + unsigned NumIncomingValues = PN->getNumIncomingValues(); // Don't analyze large in-degree PHIs. - if (PN->getNumIncomingValues() > 4) break; + if (NumIncomingValues > 4) break; + // Unreachable blocks may have zero-operand PHI nodes. + if (NumIncomingValues == 0) break; // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q); - for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i), TD, @@ -1989,8 +2000,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); + // FIXME: Magic number! At the least, this should be given a name because it's + // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to + // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == 6) - return 1; // Limit search depth. + return false; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); if (!I) return false; @@ -2033,6 +2047,62 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); + + // FIXME: Magic number! At the least, this should be given a name because it's + // used similarly in CannotBeNegativeZero(). A better fix may be to + // expose it as a parameter, so it can be used for testing / experimenting. + if (Depth == 6) + return false; // Limit search depth. + + const Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::FMul: + // x*x is always non-negative or a NaN. + if (I->getOperand(0) == I->getOperand(1)) + return true; + // Fall through + case Instruction::FAdd: + case Instruction::FDiv: + case Instruction::FRem: + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && + CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); + case Instruction::FPExt: + case Instruction::FPTrunc: + // Widening/narrowing never change sign. + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::fabs: + case Intrinsic::sqrt: + return true; + case Intrinsic::powi: + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + // powi(x,n) is non-negative if n is even. + if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) + return true; + } + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); + case Intrinsic::fma: + case Intrinsic::fmuladd: + // x*x+y is non-negative if y is non-negative. + return I->getOperand(0) == I->getOperand(1) && + CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1); + } + break; + } + return false; +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, @@ -2057,26 +2127,16 @@ Value *llvm::isBytewiseValue(Value *V) { // Don't handle long double formats, which have strange constraints. } - // We can handle constant integers that are power of two in size and a - // multiple of 8 bits. + // We can handle constant integers that are multiple of 8 bits. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - unsigned Width = CI->getBitWidth(); - if (isPowerOf2_32(Width) && Width > 8) { - // We can handle this value if the recursive binary decomposition is the - // same at all levels. - APInt Val = CI->getValue(); - APInt Val2; - while (Val.getBitWidth() != 8) { - unsigned NextWidth = Val.getBitWidth()/2; - Val2 = Val.lshr(NextWidth); - Val2 = Val2.trunc(Val.getBitWidth()/2); - Val = Val.trunc(Val.getBitWidth()/2); - - // If the top/bottom halves aren't the same, reject it. - if (Val != Val2) - return nullptr; - } - return ConstantInt::get(V->getContext(), Val); + if (CI->getBitWidth() % 8 == 0) { + assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); + + // We can check that all bytes of an integer are equal by making use of a + // little trick: rotate by 8 and check if it's still the same value. + if (CI->getValue() != CI->getValue().rotl(8)) + return nullptr; + return ConstantInt::get(V->getContext(), CI->getValue().trunc(8)); } } @@ -2474,7 +2534,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Acquire a DominatorTree and AssumptionTracker and use them. + // TODO: Acquire a DominatorTree and AssumptionCache and use them. if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { V = Simplified; continue; @@ -2556,20 +2616,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::SDiv: case Instruction::SRem: { // x / y is undefined if y == 0 or x == INT_MIN and y == -1 - const APInt *X, *Y; - if (match(Inst->getOperand(1), m_APInt(Y))) { - if (*Y != 0) { - if (*Y == -1) { - // The numerator can't be MinSignedValue if the denominator is -1. - if (match(Inst->getOperand(0), m_APInt(X))) - return !Y->isMinSignedValue(); - // The numerator *might* be MinSignedValue. - return false; - } - // The denominator is not 0 or -1, it's safe to proceed. - return true; - } - } + const APInt *Numerator, *Denominator; + if (!match(Inst->getOperand(1), m_APInt(Denominator))) + return false; + // We cannot hoist this division if the denominator is 0. + if (*Denominator == 0) + return false; + // It's safe to hoist if the denominator is not 0 or -1. + if (*Denominator != -1) + return true; + // At this point we know that the denominator is -1. It is safe to hoist as + // long we know that the numerator is not INT_MIN. + if (match(Inst->getOperand(0), m_APInt(Numerator))) + return !Numerator->isMinSignedValue(); + // The numerator *might* be MinSignedValue. return false; } case Instruction::Load: { @@ -2668,3 +2728,82 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return false; } + +OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, + const DataLayout *DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + // This means if we have enough leading zero bits in the operands + // we can guarantee that the result does not overflow. + // Ref: "Hacker's Delight" by Henry Warren + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI, + DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI, + DT); + // Note that underestimating the number of zero bits gives a more + // conservative answer. + unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + + RHSKnownZero.countLeadingOnes(); + // First handle the easy case: if we have enough zero bits there's + // definitely no overflow. + if (ZeroBits >= BitWidth) + return OverflowResult::NeverOverflows; + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // We know the multiply operation doesn't overflow if the maximum values for + // each operand will not overflow after we multiply them together. + bool MaxOverflow; + LHSMax.umul_ov(RHSMax, MaxOverflow); + if (!MaxOverflow) + return OverflowResult::NeverOverflows; + + // We know it always overflows if multiplying the smallest possible values for + // the operands also results in overflow. + bool MinOverflow; + LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); + if (MinOverflow) + return OverflowResult::AlwaysOverflows; + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, + const DataLayout *DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + bool LHSKnownNonNegative, LHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + if (LHSKnownNonNegative || LHSKnownNegative) { + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + + if (LHSKnownNegative && RHSKnownNegative) { + // The sign bit is set in both cases: this MUST overflow. + // Create a simple add instruction, and insert it into the struct. + return OverflowResult::AlwaysOverflows; + } + + if (LHSKnownNonNegative && RHSKnownNonNegative) { + // The sign bit is clear in both cases: this CANNOT overflow. + // Create a simple add instruction, and insert it into the struct. + return OverflowResult::NeverOverflows; + } + } + + return OverflowResult::MayOverflow; +} |